mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-09 12:37:07 +00:00
remove tmp directories created by previous servers (#559)
* remove tmp directories created by previous servers * clean up on server stop * Update routes.go * Update server/routes.go Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * create top-level temp ollama dir * check file exists before creating --------- Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
parent
8c83701e9f
commit
4cba75efc5
13
llm/ggml.go
13
llm/ggml.go
@ -4,7 +4,6 @@ import (
|
|||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"sync"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type GGML struct {
|
type GGML struct {
|
||||||
@ -165,18 +164,6 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
|
||||||
ggmlInit sync.Once
|
|
||||||
ggmlRunners []ModelRunner // a slice of ModelRunners ordered by priority
|
|
||||||
)
|
|
||||||
|
|
||||||
func ggmlRunner() []ModelRunner {
|
|
||||||
ggmlInit.Do(func() {
|
|
||||||
ggmlRunners = chooseRunners("ggml")
|
|
||||||
})
|
|
||||||
return ggmlRunners
|
|
||||||
}
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// Magic constant for `ggml` files (unversioned).
|
// Magic constant for `ggml` files (unversioned).
|
||||||
FILE_MAGIC_GGML = 0x67676d6c
|
FILE_MAGIC_GGML = 0x67676d6c
|
||||||
|
14
llm/gguf.go
14
llm/gguf.go
@ -6,7 +6,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"sync"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type containerGGUF struct {
|
type containerGGUF struct {
|
||||||
@ -368,16 +367,3 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
|
|||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
|
||||||
ggufInit sync.Once
|
|
||||||
ggufRunners []ModelRunner // a slice of ModelRunners ordered by priority
|
|
||||||
)
|
|
||||||
|
|
||||||
func ggufRunner() []ModelRunner {
|
|
||||||
ggufInit.Do(func() {
|
|
||||||
ggufRunners = chooseRunners("gguf")
|
|
||||||
})
|
|
||||||
|
|
||||||
return ggufRunners
|
|
||||||
}
|
|
||||||
|
34
llm/llama.go
34
llm/llama.go
@ -32,7 +32,7 @@ type ModelRunner struct {
|
|||||||
Path string // path to the model runner executable
|
Path string // path to the model runner executable
|
||||||
}
|
}
|
||||||
|
|
||||||
func chooseRunners(runnerType string) []ModelRunner {
|
func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||||
buildPath := path.Join("llama.cpp", runnerType, "build")
|
buildPath := path.Join("llama.cpp", runnerType, "build")
|
||||||
var runners []string
|
var runners []string
|
||||||
|
|
||||||
@ -61,11 +61,6 @@ func chooseRunners(runnerType string) []ModelRunner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy the files locally to run the llama.cpp server
|
|
||||||
tmpDir, err := os.MkdirTemp("", "llama-*")
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("load llama runner: failed to create temp dir: %v", err)
|
|
||||||
}
|
|
||||||
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
|
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
|
||||||
for _, r := range runners {
|
for _, r := range runners {
|
||||||
// find all the files in the runner's bin directory
|
// find all the files in the runner's bin directory
|
||||||
@ -85,18 +80,27 @@ func chooseRunners(runnerType string) []ModelRunner {
|
|||||||
defer srcFile.Close()
|
defer srcFile.Close()
|
||||||
|
|
||||||
// create the directory in case it does not exist
|
// create the directory in case it does not exist
|
||||||
destPath := filepath.Join(tmpDir, filepath.Dir(f))
|
destPath := filepath.Join(workDir, filepath.Dir(f))
|
||||||
if err := os.MkdirAll(destPath, 0o755); err != nil {
|
if err := os.MkdirAll(destPath, 0o755); err != nil {
|
||||||
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
|
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
|
||||||
}
|
}
|
||||||
destFile, err := os.OpenFile(filepath.Join(destPath, filepath.Base(f)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
|
||||||
if err != nil {
|
|
||||||
log.Fatalf("write llama runner %s: %v", f, err)
|
|
||||||
}
|
|
||||||
defer destFile.Close()
|
|
||||||
|
|
||||||
if _, err := io.Copy(destFile, srcFile); err != nil {
|
destFile := filepath.Join(destPath, filepath.Base(f))
|
||||||
log.Fatalf("copy llama runner %s: %v", f, err)
|
|
||||||
|
_, err = os.Stat(destFile)
|
||||||
|
switch {
|
||||||
|
case errors.Is(err, os.ErrNotExist):
|
||||||
|
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("write llama runner %s: %v", f, err)
|
||||||
|
}
|
||||||
|
defer destFile.Close()
|
||||||
|
|
||||||
|
if _, err := io.Copy(destFile, srcFile); err != nil {
|
||||||
|
log.Fatalf("copy llama runner %s: %v", f, err)
|
||||||
|
}
|
||||||
|
case err != nil:
|
||||||
|
log.Fatalf("stat llama runner %s: %v", f, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -107,7 +111,7 @@ func chooseRunners(runnerType string) []ModelRunner {
|
|||||||
// return the runners to try in priority order
|
// return the runners to try in priority order
|
||||||
localRunnersByPriority := []ModelRunner{}
|
localRunnersByPriority := []ModelRunner{}
|
||||||
for _, r := range runners {
|
for _, r := range runners {
|
||||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(tmpDir, r)})
|
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(workDir, r)})
|
||||||
}
|
}
|
||||||
|
|
||||||
return localRunnersByPriority
|
return localRunnersByPriority
|
||||||
|
@ -21,7 +21,7 @@ type LLM interface {
|
|||||||
Ping(context.Context) error
|
Ping(context.Context) error
|
||||||
}
|
}
|
||||||
|
|
||||||
func New(model string, adapters []string, opts api.Options) (LLM, error) {
|
func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
|
||||||
if _, err := os.Stat(model); err != nil {
|
if _, err := os.Stat(model); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -91,9 +91,9 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
|
|||||||
switch ggml.Name() {
|
switch ggml.Name() {
|
||||||
case "gguf":
|
case "gguf":
|
||||||
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
||||||
return newLlama(model, adapters, ggufRunner(), opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), opts)
|
||||||
case "ggml", "ggmf", "ggjt", "ggla":
|
case "ggml", "ggmf", "ggjt", "ggla":
|
||||||
return newLlama(model, adapters, ggmlRunner(), opts)
|
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), opts)
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||||
}
|
}
|
||||||
|
@ -267,7 +267,7 @@ func filenameWithPath(path, f string) (string, error) {
|
|||||||
return f, nil
|
return f, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func CreateModel(ctx context.Context, name string, path string, fn func(resp api.ProgressResponse)) error {
|
func CreateModel(ctx context.Context, workDir, name string, path string, fn func(resp api.ProgressResponse)) error {
|
||||||
mp := ParseModelPath(name)
|
mp := ParseModelPath(name)
|
||||||
|
|
||||||
var manifest *ManifestV2
|
var manifest *ManifestV2
|
||||||
@ -524,7 +524,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
|||||||
}
|
}
|
||||||
|
|
||||||
// generate the embedding layers
|
// generate the embedding layers
|
||||||
embeddingLayers, err := embeddingLayers(embed)
|
embeddingLayers, err := embeddingLayers(workDir, embed)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -581,7 +581,7 @@ type EmbeddingParams struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// embeddingLayers loads the associated LLM and generates the embeddings to be stored from an input file
|
// embeddingLayers loads the associated LLM and generates the embeddings to be stored from an input file
|
||||||
func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
|
func embeddingLayers(workDir string, e EmbeddingParams) ([]*LayerReader, error) {
|
||||||
layers := []*LayerReader{}
|
layers := []*LayerReader{}
|
||||||
if len(e.files) > 0 {
|
if len(e.files) > 0 {
|
||||||
// check if the model is a file path or a model name
|
// check if the model is a file path or a model name
|
||||||
@ -594,7 +594,7 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
|
|||||||
model = &Model{ModelPath: e.model}
|
model = &Model{ModelPath: e.model}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := load(context.Background(), model, e.opts, defaultSessionDuration); err != nil {
|
if err := load(context.Background(), workDir, model, e.opts, defaultSessionDuration); err != nil {
|
||||||
return nil, fmt.Errorf("load model to generate embeddings: %v", err)
|
return nil, fmt.Errorf("load model to generate embeddings: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,7 +58,7 @@ var loaded struct {
|
|||||||
var defaultSessionDuration = 5 * time.Minute
|
var defaultSessionDuration = 5 * time.Minute
|
||||||
|
|
||||||
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
|
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
|
||||||
func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
|
func load(ctx context.Context, workDir string, model *Model, reqOpts map[string]interface{}, sessionDuration time.Duration) error {
|
||||||
opts := api.DefaultOptions()
|
opts := api.DefaultOptions()
|
||||||
if err := opts.FromMap(model.Options); err != nil {
|
if err := opts.FromMap(model.Options); err != nil {
|
||||||
log.Printf("could not load model options: %v", err)
|
log.Printf("could not load model options: %v", err)
|
||||||
@ -94,7 +94,7 @@ func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, ses
|
|||||||
loaded.Embeddings = model.Embeddings
|
loaded.Embeddings = model.Embeddings
|
||||||
}
|
}
|
||||||
|
|
||||||
llmModel, err := llm.New(model.ModelPath, model.AdapterPaths, opts)
|
llmModel, err := llm.New(workDir, model.ModelPath, model.AdapterPaths, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -130,6 +130,7 @@ func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, ses
|
|||||||
llmModel.SetOptions(opts)
|
llmModel.SetOptions(opts)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
loaded.expireAt = time.Now().Add(sessionDuration)
|
loaded.expireAt = time.Now().Add(sessionDuration)
|
||||||
|
|
||||||
if loaded.expireTimer == nil {
|
if loaded.expireTimer == nil {
|
||||||
@ -150,6 +151,7 @@ func load(ctx context.Context, model *Model, reqOpts map[string]interface{}, ses
|
|||||||
loaded.digest = ""
|
loaded.digest = ""
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
loaded.expireTimer.Reset(sessionDuration)
|
loaded.expireTimer.Reset(sessionDuration)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -172,8 +174,11 @@ func GenerateHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
sessionDuration := defaultSessionDuration // TODO: set this duration from the request if specified
|
workDir := c.GetString("workDir")
|
||||||
if err := load(c.Request.Context(), model, req.Options, sessionDuration); err != nil {
|
|
||||||
|
// TODO: set this duration from the request if specified
|
||||||
|
sessionDuration := defaultSessionDuration
|
||||||
|
if err := load(c.Request.Context(), workDir, model, req.Options, sessionDuration); err != nil {
|
||||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -245,7 +250,9 @@ func EmbeddingHandler(c *gin.Context) {
|
|||||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := load(c.Request.Context(), model, req.Options, 5*time.Minute); err != nil {
|
|
||||||
|
workDir := c.GetString("workDir")
|
||||||
|
if err := load(c.Request.Context(), workDir, model, req.Options, 5*time.Minute); err != nil {
|
||||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -335,6 +342,8 @@ func CreateModelHandler(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
workDir := c.GetString("workDir")
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
@ -345,7 +354,7 @@ func CreateModelHandler(c *gin.Context) {
|
|||||||
ctx, cancel := context.WithCancel(c.Request.Context())
|
ctx, cancel := context.WithCancel(c.Request.Context())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
if err := CreateModel(ctx, req.Name, req.Path, fn); err != nil {
|
if err := CreateModel(ctx, workDir, req.Name, req.Path, fn); err != nil {
|
||||||
ch <- gin.H{"error": err.Error()}
|
ch <- gin.H{"error": err.Error()}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
@ -519,8 +528,20 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
workDir, err := os.MkdirTemp("", "ollama")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer os.RemoveAll(workDir)
|
||||||
|
|
||||||
r := gin.Default()
|
r := gin.Default()
|
||||||
r.Use(cors.New(config))
|
r.Use(
|
||||||
|
cors.New(config),
|
||||||
|
func(c *gin.Context) {
|
||||||
|
c.Set("workDir", workDir)
|
||||||
|
c.Next()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
r.GET("/", func(c *gin.Context) {
|
r.GET("/", func(c *gin.Context) {
|
||||||
c.String(http.StatusOK, "Ollama is running")
|
c.String(http.StatusOK, "Ollama is running")
|
||||||
@ -546,12 +567,10 @@ func Serve(ln net.Listener, allowOrigins []string) error {
|
|||||||
|
|
||||||
// listen for a ctrl+c and stop any loaded llm
|
// listen for a ctrl+c and stop any loaded llm
|
||||||
signals := make(chan os.Signal, 1)
|
signals := make(chan os.Signal, 1)
|
||||||
signal.Notify(signals, syscall.SIGINT)
|
signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
|
||||||
go func() {
|
go func() {
|
||||||
<-signals
|
<-signals
|
||||||
if loaded.llm != nil {
|
os.RemoveAll(workDir)
|
||||||
loaded.llm.Close()
|
|
||||||
}
|
|
||||||
os.Exit(0)
|
os.Exit(0)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user