From ce15ed6d69c92cb7e94eb989defb3c61c2aed708 Mon Sep 17 00:00:00 2001 From: jmorganca Date: Sun, 26 May 2024 23:23:09 -0700 Subject: [PATCH] remove dependency on `llm` --- llama/llama.go | 6 ++---- llama/runner/runner.go | 1 - llama/sampling_ext.cpp | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/llama/llama.go b/llama/llama.go index ae051b5b..48fd3a5c 100644 --- a/llama/llama.go +++ b/llama/llama.go @@ -35,8 +35,6 @@ import ( "runtime" "strings" "unsafe" - - "github.com/ollama/ollama/llm" ) func BackendInit() { @@ -227,7 +225,7 @@ func (m *Model) Tokenize(text string, maxTokens int, addSpecial bool, parseSpeci return tokens, nil } -func Quantize(infile, outfile string, ftype llm.FileType) error { +func Quantize(infile, outfile string, ftype uint32) error { cinfile := C.CString(infile) defer C.free(unsafe.Pointer(cinfile)) @@ -236,7 +234,7 @@ func Quantize(infile, outfile string, ftype llm.FileType) error { params := C.llama_model_quantize_default_params() params.nthread = -1 - params.ftype = ftype.Value() + params.ftype = ftype if rc := C.llama_model_quantize(cinfile, coutfile, ¶ms); rc != 0 { return fmt.Errorf("llama_model_quantize: %d", rc) diff --git a/llama/runner/runner.go b/llama/runner/runner.go index a732dae7..8cc7e0fc 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -209,7 +209,6 @@ func (s *Server) handler(w http.ResponseWriter, r *http.Request) { for i, sq := range s.seqs { if sq == nil { s.seqs[i] = seq - fmt.Println("signal") s.cond.Signal() break } diff --git a/llama/sampling_ext.cpp b/llama/sampling_ext.cpp index db6d9efc..0ad7f941 100644 --- a/llama/sampling_ext.cpp +++ b/llama/sampling_ext.cpp @@ -17,7 +17,7 @@ struct llama_sampling_context* llama_sampling_cinit(struct llama_sampling_cparam sparams.mirostat_eta = params->mirostat_eta; sparams.penalize_nl = params->penalize_nl; sparams.seed = params->seed; - sparams.grammar = std::string(params->grammar); + sparams.grammar = params->grammar; return llama_sampling_init(sparams); }