diff --git a/cmd/cmd.go b/cmd/cmd.go index b75c0b5e..b868ad71 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -124,6 +124,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error { } bars := make(map[string]*progress.Bar) + var convertSpin *progress.Spinner fn := func(resp api.ProgressResponse) error { if resp.Digest != "" { spinner.Stop() @@ -136,6 +137,16 @@ func CreateHandler(cmd *cobra.Command, args []string) error { } bar.Set(resp.Completed) + } else if strings.Contains(resp.Status, "converting") { + spinner.Stop() + + if convertSpin != nil { + convertSpin.SetMessage(resp.Status) + } else { + status = resp.Status + convertSpin = progress.NewSpinner(resp.Status) + p.Add("convert", convertSpin) + } } else if status != resp.Status { spinner.Stop() diff --git a/convert/convert.go b/convert/convert.go index 8c7b0943..225569a3 100644 --- a/convert/convert.go +++ b/convert/convert.go @@ -9,6 +9,7 @@ import ( "log/slog" "strings" + "github.com/ollama/ollama/api" "github.com/ollama/ollama/llm" ) @@ -79,12 +80,12 @@ func (ModelParameters) specialTokenTypes() []string { } } -func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { - return llm.WriteGGUF(ws, kv, ts) +func (ModelParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error { + return llm.WriteGGUF(ws, kv, ts, fn) } -func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor) error { - return llm.WriteGGUF(ws, kv, ts) +func (AdapterParameters) writeFile(ws io.WriteSeeker, kv llm.KV, ts []llm.Tensor, fn func(api.ProgressResponse)) error { + return llm.WriteGGUF(ws, kv, ts, fn) } type ModelConverter interface { @@ -99,7 +100,7 @@ type ModelConverter interface { // specialTokenTypes returns any special token types the model uses specialTokenTypes() []string // writeFile writes the model to the provided io.WriteSeeker - writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error } type moreParser interface { @@ -115,10 +116,10 @@ type AdapterConverter interface { // See [strings.Replacer](https://pkg.go.dev/strings#Replacer) for details Replacements() []string - writeFile(io.WriteSeeker, llm.KV, []llm.Tensor) error + writeFile(io.WriteSeeker, llm.KV, []llm.Tensor, func(api.ProgressResponse)) error } -func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error { +func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV, fn func(api.ProgressResponse)) error { bts, err := fs.ReadFile(fsys, "adapter_config.json") if err != nil { return err @@ -153,14 +154,17 @@ func ConvertAdapter(fsys fs.FS, ws io.WriteSeeker, baseKV llm.KV) error { return err } - return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts)) + fn(api.ProgressResponse{ + Status: fmt.Sprintf("converting adapter 0%%"), + }) + return conv.writeFile(ws, conv.KV(baseKV), conv.Tensors(ts), fn) } // Convert writes an Ollama compatible model to the provided io.WriteSeeker based on configurations // and files it finds in the input path. // Supported input model formats include safetensors. // Supported input tokenizers files include tokenizer.json (preferred) and tokenizer.model. -func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { +func ConvertModel(fsys fs.FS, ws io.WriteSeeker, fn func(api.ProgressResponse)) error { bts, err := fs.ReadFile(fsys, "config.json") if err != nil { return err @@ -224,5 +228,8 @@ func ConvertModel(fsys fs.FS, ws io.WriteSeeker) error { return err } - return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts)) + fn(api.ProgressResponse{ + Status: fmt.Sprintf("converting model 0%%"), + }) + return conv.writeFile(ws, conv.KV(t), conv.Tensors(ts), fn) } diff --git a/convert/convert_test.go b/convert/convert_test.go index f71ff8cd..6dc69561 100644 --- a/convert/convert_test.go +++ b/convert/convert_test.go @@ -19,6 +19,7 @@ import ( "golang.org/x/exp/maps" + "github.com/ollama/ollama/api" "github.com/ollama/ollama/llm" ) @@ -31,7 +32,7 @@ func convertFull(t *testing.T, fsys fs.FS) (*os.File, llm.KV, llm.Tensors) { } defer f.Close() - if err := ConvertModel(fsys, f); err != nil { + if err := ConvertModel(fsys, f, func(api.ProgressResponse){}); err != nil { t.Fatal(err) } @@ -150,7 +151,7 @@ func TestConvertInvalidDatatype(t *testing.T) { tempDir := t.TempDir() generateSafetensorTestData(t, tempDir) - err = ConvertModel(os.DirFS(tempDir), f) + err = ConvertModel(os.DirFS(tempDir), f, func(api.ProgressResponse){}) if err == nil || err.Error() != "unsupported safetensors model" { t.Errorf("expected error but didn't get one") } @@ -287,7 +288,7 @@ func TestConvertAdapter(t *testing.T) { tempDir := t.TempDir() generateLoraTestData(t, tempDir) - if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV); err != nil { + if err = ConvertAdapter(os.DirFS(tempDir), f, c.BaseKV, func(api.ProgressResponse){}); err != nil { t.Fatal(err) } diff --git a/llm/gguf.go b/llm/gguf.go index 2e6bc542..75287dcd 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -11,6 +11,7 @@ import ( "slices" "strings" + "github.com/ollama/ollama/api" "golang.org/x/exp/maps" ) @@ -506,7 +507,7 @@ func writeGGUFArray[S ~[]E, E any](w io.Writer, t uint32, s S) error { return binary.Write(w, binary.LittleEndian, s) } -func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { +func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor, fn func(api.ProgressResponse)) error { if err := binary.Write(ws, binary.LittleEndian, []byte("GGUF")); err != nil { return err } @@ -552,7 +553,10 @@ func WriteGGUF(ws io.WriteSeeker, kv KV, ts []Tensor) error { } var alignment int64 = 32 - for _, t := range ts { + for i, t := range ts { + fn(api.ProgressResponse{ + Status: fmt.Sprintf("converting model %d%%", 100*(i+1)/len(ts)), + }) if err := ggufWriteTensor(ws, t, alignment); err != nil { return err } diff --git a/llm/memory_test.go b/llm/memory_test.go index ffb14286..61e5aef2 100644 --- a/llm/memory_test.go +++ b/llm/memory_test.go @@ -41,7 +41,7 @@ func TestEstimateGPULayers(t *testing.T) { "tokenizer.ggml.tokens": []string{" "}, "tokenizer.ggml.scores": []float32{0}, "tokenizer.ggml.token_type": []int32{0}, - }, tensors) + }, tensors, func(api.ProgressResponse){}) require.NoError(t, err) ggml, err := LoadModel(f.Name(), 0) diff --git a/server/model.go b/server/model.go index 55fb2d8d..37c76f4e 100644 --- a/server/model.go +++ b/server/model.go @@ -98,7 +98,6 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML } defer os.RemoveAll(p) - fn(api.ProgressResponse{Status: "converting model"}) // TODO(mxyng): this should write directly into a layer // e.g. NewLayer(arch.Reader(), "application/vnd.ollama.image.model") t, err := os.CreateTemp(p, "fp16") @@ -123,13 +122,18 @@ func parseFromZipFile(_ context.Context, command string, baseLayers []*layerGGML if baseModel == nil { return nil, fmt.Errorf("no base model specified for the adapter") } - - if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV()); err != nil { + fn(api.ProgressResponse{ + Status: "converting adapter", + }) + if err := convert.ConvertAdapter(convert.NewZipReader(r, p, 32<<20), t, baseModel.KV(), fn); err != nil { return nil, err } layerType = "application/vnd.ollama.image.adapter" case "model": - if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t); err != nil { + fn(api.ProgressResponse{ + Status: "converting model", + }) + if err := convert.ConvertModel(convert.NewZipReader(r, p, 32<<20), t, fn); err != nil { return nil, err } layerType = "application/vnd.ollama.image.model" diff --git a/server/model_test.go b/server/model_test.go index 7753c549..a7036e1b 100644 --- a/server/model_test.go +++ b/server/model_test.go @@ -145,7 +145,7 @@ func TestParseFromFileFromLayer(t *testing.T) { t.Fatalf("failed to open file: %v", err) } defer file.Close() - if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil { + if err := llm.WriteGGUF(file, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse){}); err != nil { t.Fatalf("failed to write gguf: %v", err) } @@ -197,7 +197,7 @@ func TestParseLayerFromCopy(t *testing.T) { defer file2.Close() for range 5 { - if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}); err != nil { + if err := llm.WriteGGUF(file2, llm.KV{"general.architecture": "gemma"}, []llm.Tensor{}, func(api.ProgressResponse){}); err != nil { t.Fatalf("failed to write gguf: %v", err) } } diff --git a/server/routes_create_test.go b/server/routes_create_test.go index d436f26c..976647ec 100644 --- a/server/routes_create_test.go +++ b/server/routes_create_test.go @@ -30,7 +30,7 @@ func createBinFile(t *testing.T, kv map[string]any, ti []llm.Tensor) string { } defer f.Close() - if err := llm.WriteGGUF(f, kv, ti); err != nil { + if err := llm.WriteGGUF(f, kv, ti, func(api.ProgressResponse){}); err != nil { t.Fatal(err) } diff --git a/server/sched_test.go b/server/sched_test.go index fb049574..eea1224a 100644 --- a/server/sched_test.go +++ b/server/sched_test.go @@ -128,7 +128,8 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est }, []llm.Tensor{ {Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, {Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, - })) + }, + func(api.ProgressResponse){})) require.NoError(t, err) fname := f.Name()