From 52e88ab7b32c39c5821d308f5d75f0f32343440f Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 27 Aug 2024 13:17:04 -0700 Subject: [PATCH] runner.go: Health endpoint comments The health endpoint needs a little more work to show progress as Ollama expects but we can at least return the right status and have comments for the future. --- llama/runner/runner.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llama/runner/runner.go b/llama/runner/runner.go index 65e6a991..ecf9c9dc 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -582,7 +582,7 @@ type HealthResponse struct { Progress float32 `json:"progress"` } -// TODO (jmorganca): is it safe to do this concurrently with decoding? +// TODO (jmorganca): is it safe to do this concurrently with updating status? func (s *Server) health(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(&HealthResponse{ @@ -659,9 +659,11 @@ func main() { batchSize: *batchSize, parallel: *parallel, seqs: make([]*Sequence, *parallel), - status: "loading", + status: "loading model", } + // TODO (jessegross): This should be in a separate goroutine so we can report progress, + // otherwise Ollama can timeout for large model loads // load the model llama.BackendInit() params := llama.NewModelParams(*nGpuLayers, *mainGpu, func(progress float32) {