mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-08 20:25:22 +00:00
runner.go: Health endpoint comments
The health endpoint needs a little more work to show progress as Ollama expects but we can at least return the right status and have comments for the future.
This commit is contained in:
parent
4ca8579428
commit
52e88ab7b3
@ -582,7 +582,7 @@ type HealthResponse struct {
|
|||||||
Progress float32 `json:"progress"`
|
Progress float32 `json:"progress"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO (jmorganca): is it safe to do this concurrently with decoding?
|
// TODO (jmorganca): is it safe to do this concurrently with updating status?
|
||||||
func (s *Server) health(w http.ResponseWriter, r *http.Request) {
|
func (s *Server) health(w http.ResponseWriter, r *http.Request) {
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
if err := json.NewEncoder(w).Encode(&HealthResponse{
|
if err := json.NewEncoder(w).Encode(&HealthResponse{
|
||||||
@ -659,9 +659,11 @@ func main() {
|
|||||||
batchSize: *batchSize,
|
batchSize: *batchSize,
|
||||||
parallel: *parallel,
|
parallel: *parallel,
|
||||||
seqs: make([]*Sequence, *parallel),
|
seqs: make([]*Sequence, *parallel),
|
||||||
status: "loading",
|
status: "loading model",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO (jessegross): This should be in a separate goroutine so we can report progress,
|
||||||
|
// otherwise Ollama can timeout for large model loads
|
||||||
// load the model
|
// load the model
|
||||||
llama.BackendInit()
|
llama.BackendInit()
|
||||||
params := llama.NewModelParams(*nGpuLayers, *mainGpu, func(progress float32) {
|
params := llama.NewModelParams(*nGpuLayers, *mainGpu, func(progress float32) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user