expiration

2025-07-23 17:50:11 +00:00 · 2024-08-07 13:01:04 -07:00 · 2024-08-07 13:01:04 -07:00 · d503f04b32
commit d503f04b32
parent 8ccf543c53
5 changed files with 150 additions and 58 deletions
--- a/api/types.go
+++ b/api/types.go
@ -36,6 +36,13 @@ func (e StatusError) Error() string {
 // ImageData represents the raw binary data of an image file.
 type ImageData []byte

+type WhisperRequest struct {
+	Model      string    `json:"model"`
+	Audio      string    `json:"audio,omitempty"`
+	Transcribe bool      `json:"transcribe,omitempty"`
+	KeepAlive  *Duration `json:"keep_alive,omitempty"`
+}
+
 // GenerateRequest describes a request sent by [Client.Generate]. While you
 // have to specify the Model and Prompt fields, all the other fields have
 // reasonable defaults for basic uses.
@ -81,11 +88,7 @@ type GenerateRequest struct {
 	// set through this field, if the model supports it.
 	Options map[string]interface{} `json:"options"`

-	WhisperModel string `json:"whisper_model,omitempty"`
-
-	Audio string `json:"audio,omitempty"`
-
-	Transcribe bool `json:"transcribe,omitempty"`
+	Speech *WhisperRequest `json:"speech,omitempty"`
 }

 // ChatRequest describes a request sent by [Client.Chat].
@ -112,7 +115,7 @@ type ChatRequest struct {
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`

-	WhisperModel string `json:"whisper_model,omitempty"`
+	Speech *WhisperRequest `json:"speech,omitempty"`
 }

 type Tools []Tool
--- a/docs/speech.md
+++ b/docs/speech.md
@ -0,0 +1,73 @@
+# Speech to Text Prototype
+
+### To run
+`make {/path/to/whisper.cpp/server}`
+
+### Update routes.go
+- replace `whisperServer` with path to server
+
+## api/generate
+### Request fields
+- `speech` (required):
+    - `audio` (required): path to audio file
+    - `model` (required): path to whisper model
+    - `transcribe` (optional): if true, will transcribe and return the audio file
+    - `keep_alive`: (optional): sets how long the model is stored in memory (default: `5m`)
+- `prompt` (optional): if not null, passed in with the transcribed audio
+
+#### Transcription
+```
+curl http://localhost:11434/api/generate -d '{
+    "speech": {
+        "model": "/Users/royhan-ollama/.ollama/whisper/ggml-base.en.bin",
+        "audio": "/Users/royhan-ollama/ollama/llm/whisper.cpp/samples/jfk.wav",
+        "transcribe": true,
+        "keep_alive": "1m"
+    },
+    "stream": false
+}' | jq
+```
+
+#### Response Generation
+```
+curl http://localhost:11434/api/generate -d '{
+    "model": "llama3",
+    "prompt": "What do you think about this quote?",
+    "speech": {
+        "model": "/Users/royhan-ollama/.ollama/whisper/ggml-base.en.bin",
+        "audio": "/Users/royhan-ollama/ollama/llm/whisper.cpp/samples/jfk.wav",
+        "keep_alive": "1m"
+    },
+    "stream": false
+}' | jq
+```
+
+## api/chat
+### Request fields
+- `model` (required): language model to chat with
+- `speech` (required):
+    - `model` (required): path to whisper model
+    - `keep_alive`: (optional): sets how long the model is stored in memory (default: `5m`)
+- `messages`/`message`/`audio` (required): path to audio file
+
+```
+curl http://localhost:11434/api/chat -d '{
+    "model": "llama3",
+    "speech": {
+        "model": "/Users/royhan-ollama/.ollama/whisper/ggml-base.en.bin",
+        "keep_alive": "10m"
+    },
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a Canadian Nationalist"
+        },
+        {
+            "role": "user",
+            "content": "What do you think about this quote?",
+            "audio": "/Users/royhan-ollama/ollama/llm/whisper.cpp/samples/jfk.wav"
+        }
+    ],
+    "stream": false
+}' | jq
+```
--- a/docs/whisper.md
+++ b/docs/whisper.md
@ -1,20 +0,0 @@
-# Whisper Prototype
-
-### To run
-`make {/path/to/whisper.cpp/server}`
-
-### Update routes.go
- replace `whisperServer` with path to server
-
-## api/generate
-### Request fields
-    - "audio" (required): path to audio file
-    - "whisper_model" (required): path to whisper model
-    - "transcribe" (optional): if true, will transcribe and return the audio file
-    - "prompt" (optional): if not null, passed in with the transcribed audio
-
-## api/chat
-### Request fields
-    - "whisper_model" (required): path to whisper model
-    - "message" object
-        - "audio" (required): contains path to audio file
--- a/server/routes.go
+++ b/server/routes.go
@ -109,11 +109,23 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capabil
 	return runner.llama, model, &opts, nil
 }

-func (s *Server) runWhisperServer(c *gin.Context, portCh chan int, errCh chan error, modelPath string) {
+func (s *Server) runWhisperServer(c *gin.Context, portCh chan int, errCh chan error, speech *api.WhisperRequest) {
+	modelPath := speech.Model
+
+	// default to 5 minutes
+	var sessionDuration time.Duration
+	if speech.KeepAlive != nil {
+		sessionDuration = speech.KeepAlive.Duration
+	} else {
+		sessionDuration = 5 * time.Minute
+	}
+
 	s.sched.whisperMu.Lock()
 	if s.sched.whisperLoaded[modelPath] != nil {
 		slog.Info(fmt.Sprintf("whisper server already running %s on port %d", modelPath, *s.sched.whisperLoaded[modelPath]))
 		portCh <- *s.sched.whisperLoaded[modelPath]
+		// Renew the expiration time
+		s.sched.whisperExpiresAt[modelPath] = time.Now().Add(sessionDuration)
 		s.sched.whisperMu.Unlock()
 		return
 	}
@ -149,36 +161,52 @@ func (s *Server) runWhisperServer(c *gin.Context, portCh chan int, errCh chan er

 	// Wait for server connection
 	retries := 10
+	var connErr error
 	for range retries {
 		time.Sleep(50 * time.Millisecond)
 		conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", port), time.Second)
 		if err == nil {
 			conn.Close()
+			connErr = nil
 			break
 		}
+		connErr = err
 	}

-	if err != nil {
-		slog.Error("failed to connect to whisper server", "error", err)
-		errCh <- err
+	if connErr != nil {
+		slog.Error("failed to connect to whisper server", "error", connErr)
+		errCh <- connErr
 		return
 	}

 	portCh <- port
 	s.sched.whisperLoaded[modelPath] = &port
+	s.sched.whisperExpiresAt[modelPath] = time.Now().Add(sessionDuration)

 	s.sched.whisperMu.Unlock()

 	// Wait for the whisper server to exit
 	defer func() {
-		err = cmd.Wait()
-		if err != nil {
+		ticker := time.NewTicker(5 * time.Second)
+		defer ticker.Stop()
+		for range ticker.C {
+			s.sched.whisperMu.Lock()
+			if time.Now().After(s.sched.whisperExpiresAt[modelPath]) {
+				slog.Info("exiting whisper server")
+				delete(s.sched.whisperLoaded, modelPath)
+				delete(s.sched.whisperExpiresAt, modelPath)
+				s.sched.whisperMu.Unlock()
+
+				if err := cmd.Process.Kill(); err != nil {
 					c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err})
 					return
 				}
-		s.sched.whisperMu.Lock()
-		delete(s.sched.whisperLoaded, modelPath)
+
+				slog.Debug("whisper server stopped")
+				return
+			}
 			s.sched.whisperMu.Unlock()
+		}
 	}()
 }

@ -280,10 +308,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 		caps = append(caps, CapabilityInsert)
 	}

-	if req.Audio != "" {
+	if req.Speech != nil {
 		portCh := make(chan int, 1)
 		errCh := make(chan error, 1)
-		go s.runWhisperServer(c, portCh, errCh, req.WhisperModel)
+		go s.runWhisperServer(c, portCh, errCh, req.Speech)

 		var port int

@ -294,19 +322,19 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			return
 		}

-		w, err := whisperInference(c, req.Audio, port)
+		w, err := whisperInference(c, req.Speech.Audio, port)
 		if err != nil {
 			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "failed to generate completion"})
 			return
 		}

-		if req.Transcribe {
+		if req.Speech.Transcribe {
 			c.JSON(http.StatusOK, api.GenerateResponse{
 				Model:      req.Model,
 				CreatedAt:  time.Now().UTC(),
 				Response:   w.Text,
 				Done:       true,
-				DoneReason: "stop",
+				DoneReason: "transcribe",
 			})
 			return
 		}
@ -1481,13 +1509,13 @@ func (s *Server) ProcessHandler(c *gin.Context) {
 	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
 }

-func processAudio(c *gin.Context, s *Server, msgs []api.Message, model string) {
-	if model == "" {
+func processAudio(c *gin.Context, s *Server, msgs []api.Message, req *api.WhisperRequest) {
+	if req.Model == "" {
 		return
 	}
 	portCh := make(chan int, 1)
 	errCh := make(chan error, 1)
-	go s.runWhisperServer(c, portCh, errCh, model)
+	go s.runWhisperServer(c, portCh, errCh, req)

 	var port int
 	select {
@ -1554,7 +1582,9 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		msgs = append([]api.Message{{Role: "system", Content: m.System}}, msgs...)
 	}

-	processAudio(c, s, msgs, req.WhisperModel)
+	if req.Speech != nil {
+		processAudio(c, s, msgs, req.Speech)
+	}

 	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, msgs, req.Tools)
 	if err != nil {
--- a/server/sched.go
+++ b/server/sched.go
@ -48,6 +48,7 @@ type Scheduler struct {
 	reschedDelay time.Duration

 	whisperLoaded    map[string]*int
+	whisperExpiresAt map[string]time.Time
 	whisperMu        sync.Mutex
 }

@ -76,6 +77,7 @@ func InitScheduler(ctx context.Context) *Scheduler {
 		getCpuFn:         gpu.GetCPUInfo,
 		reschedDelay:     250 * time.Millisecond,
 		whisperLoaded:    make(map[string]*int),
+		whisperExpiresAt: make(map[string]time.Time),
 	}
 	sched.loadFn = sched.load
 	return sched
@ -114,6 +116,10 @@ func (s *Scheduler) Run(ctx context.Context) {
 	go func() {
 		s.processCompleted(ctx)
 	}()
+
+	// go func() {
+	// 	could clean up whisper servers in init thread
+	// }
 }

 func (s *Scheduler) processPending(ctx context.Context) {