.........

This commit is contained in:
Blake Mizerany 2024-08-14 01:22:23 -07:00
parent 0a8d6ea86d
commit e7254617e3
4 changed files with 50 additions and 11 deletions

View File

@ -6,8 +6,15 @@ import (
"github.com/spf13/cobra"
"github.com/ollama/ollama/cmd"
"net/http"
_ "net/http/pprof"
)
func main() {
go func() {
http.ListenAndServe("localhost:6060", nil)
}()
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
}

View File

@ -21,6 +21,7 @@ import (
"slices"
"strconv"
"strings"
"sync"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
@ -209,13 +210,25 @@ type RootFS struct {
DiffIDs []string `json:"diff_ids"`
}
var manifestCache struct {
sync.Mutex
cache map[string]*Manifest
}
func GetManifest(mp ModelPath) (*Manifest, string, error) {
fp, err := mp.GetManifestPath()
if err != nil {
return nil, "", err
manifestCache.Lock()
defer manifestCache.Unlock()
if manifestCache.cache == nil {
manifestCache.cache = make(map[string]*Manifest)
}
if _, err = os.Stat(fp); err != nil {
if manifest, ok := manifestCache.cache[mp.GetFullTagname()]; ok {
return manifest, "", nil
}
fp, err := mp.GetManifestPath()
if err != nil {
return nil, "", err
}
@ -233,6 +246,8 @@ func GetManifest(mp ModelPath) (*Manifest, string, error) {
return nil, "", err
}
manifestCache.cache[mp.GetFullTagname()] = manifest
return manifest, shaStr, nil
}

View File

@ -18,6 +18,7 @@ import (
"path/filepath"
"slices"
"strings"
"sync"
"syscall"
"time"
@ -42,6 +43,9 @@ var mode string = gin.DebugMode
type Server struct {
addr net.Addr
sched *Scheduler
mu sync.Mutex
contextLengthLookup map[string]int
}
func init() {
@ -343,11 +347,24 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return
}
kvData, err := getKVData(m.ModelPath, false)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
contextLength, err := func() (int, error) {
s.mu.Lock()
defer s.mu.Unlock()
if s.contextLengthLookup == nil {
s.contextLengthLookup = make(map[string]int)
}
contextLength, ok := s.contextLengthLookup[m.ModelPath]
if !ok {
kvData, err := getKVData(m.ModelPath, false)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return 0, err
}
contextLength = int(kvData.ContextLength())
s.contextLengthLookup[m.ModelPath] = int(kvData.ContextLength())
}
return contextLength, nil
}()
var count int
for i, s := range input {
@ -357,7 +374,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
return
}
ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
ctxLen := min(opts.NumCtx, int(contextLength))
if len(tokens) > ctxLen {
if !truncate {
c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})

View File

@ -66,7 +66,7 @@ func InitScheduler(ctx context.Context) *Scheduler {
pendingReqCh: make(chan *LlmRequest, maxQueue),
finishedReqCh: make(chan *LlmRequest, maxQueue),
expiredCh: make(chan *runnerRef, maxQueue),
unloadedCh: make(chan interface{}, maxQueue),
unloadedCh: make(chan any, maxQueue),
loaded: make(map[string]*runnerRef),
newServerFn: llm.NewLlamaServer,
getGpuFn: gpu.GetGPUInfo,