diff --git a/server/routes.go b/server/routes.go index 8970cbe8..308970bc 100644 --- a/server/routes.go +++ b/server/routes.go @@ -109,7 +109,15 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capabil return runner.llama, model, &opts, nil } -func runWhisperServer(c *gin.Context, portCh chan int) { +func (s *Server) runWhisperServer(c *gin.Context, portCh chan int) { + s.sched.whisperMu.Lock() + if s.sched.whisperPort != nil { + slog.Info("whisper server already running", "port", *s.sched.whisperPort) + portCh <- *s.sched.whisperPort + s.sched.whisperMu.Unlock() + return + } + whisperServer := "/Users/royhan-ollama/ollama/llm/whisper.cpp/server" // Find an available port for whisper @@ -126,7 +134,7 @@ func runWhisperServer(c *gin.Context, portCh chan int) { slog.Debug("ResolveTCPAddr failed") port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range } - finalParams := append(params, "--port", strconv.Itoa(port), "--model", "/Users/royhan-ollama/ollama/llm/whisper.cpp/models/ggml-base.en.bin") + finalParams := append(params, "--port", strconv.Itoa(port), "--model", "/Users/royhan-ollama/.ollama/whisper/ggml-base.en.bin") cmd := exec.Command(whisperServer, finalParams...) slog.Info("starting whisper server", "cmd", cmd.String()) @@ -138,18 +146,32 @@ func runWhisperServer(c *gin.Context, portCh chan int) { c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "failed to start whisper server"}) } - // wait for server to start - time.Sleep(250 * time.Millisecond) - - portCh <- port - - // Wait for the whisper server to exit - err = cmd.Wait() - if err != nil { - c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "whisper server exited"}) + retries := 10 + for range retries { + time.Sleep(25 * time.Millisecond) + conn, err := net.DialTimeout("tcp", fmt.Sprintf("localhost:%d", port), time.Second) + if err == nil { + conn.Close() + break + } } + if err != nil { + slog.Error("failed to connect to whisper server", "error", err) + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "failed to connect to whisper server"}) + } + + portCh <- port + s.sched.whisperPort = &port + + s.sched.whisperMu.Unlock() + + // Wait for the whisper server to exit defer func() { + err = cmd.Wait() + if err != nil { + c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "whisper server exited"}) + } err := cmd.Process.Kill() if err != nil { slog.Error("failed to kill whisper server", "error", err) @@ -232,7 +254,6 @@ func whisperInference(c *gin.Context, filePath string, port int) (*api.WhisperCo } func (s *Server) GenerateHandler(c *gin.Context) { - slog.Info("generate request", "method", c.Request.Method, "url", c.Request.URL.String()) checkpointStart := time.Now() var req api.GenerateRequest if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) { @@ -258,7 +279,7 @@ func (s *Server) GenerateHandler(c *gin.Context) { if req.Audio != "" { port := make(chan int, 1) - go runWhisperServer(c, port) + go s.runWhisperServer(c, port) w, err := whisperInference(c, req.Audio, <-port) if err != nil { diff --git a/server/sched.go b/server/sched.go index c378865b..9adacdba 100644 --- a/server/sched.go +++ b/server/sched.go @@ -46,6 +46,9 @@ type Scheduler struct { getGpuFn func() gpu.GpuInfoList getCpuFn func() gpu.GpuInfoList reschedDelay time.Duration + + whisperPort *int + whisperMu sync.Mutex } // Default automatic value for number of models we allow per GPU