mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-07 03:35:21 +00:00
.........
This commit is contained in:
parent
0a8d6ea86d
commit
e7254617e3
7
main.go
7
main.go
@ -6,8 +6,15 @@ import (
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/ollama/ollama/cmd"
|
||||
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
)
|
||||
|
||||
func main() {
|
||||
go func() {
|
||||
http.ListenAndServe("localhost:6060", nil)
|
||||
}()
|
||||
|
||||
cobra.CheckErr(cmd.NewCLI().ExecuteContext(context.Background()))
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ import (
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/auth"
|
||||
@ -209,13 +210,25 @@ type RootFS struct {
|
||||
DiffIDs []string `json:"diff_ids"`
|
||||
}
|
||||
|
||||
var manifestCache struct {
|
||||
sync.Mutex
|
||||
cache map[string]*Manifest
|
||||
}
|
||||
|
||||
func GetManifest(mp ModelPath) (*Manifest, string, error) {
|
||||
fp, err := mp.GetManifestPath()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
manifestCache.Lock()
|
||||
defer manifestCache.Unlock()
|
||||
|
||||
if manifestCache.cache == nil {
|
||||
manifestCache.cache = make(map[string]*Manifest)
|
||||
}
|
||||
|
||||
if _, err = os.Stat(fp); err != nil {
|
||||
if manifest, ok := manifestCache.cache[mp.GetFullTagname()]; ok {
|
||||
return manifest, "", nil
|
||||
}
|
||||
|
||||
fp, err := mp.GetManifestPath()
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
@ -233,6 +246,8 @@ func GetManifest(mp ModelPath) (*Manifest, string, error) {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
manifestCache.cache[mp.GetFullTagname()] = manifest
|
||||
|
||||
return manifest, shaStr, nil
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ import (
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@ -42,6 +43,9 @@ var mode string = gin.DebugMode
|
||||
type Server struct {
|
||||
addr net.Addr
|
||||
sched *Scheduler
|
||||
|
||||
mu sync.Mutex
|
||||
contextLengthLookup map[string]int
|
||||
}
|
||||
|
||||
func init() {
|
||||
@ -343,11 +347,24 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
kvData, err := getKVData(m.ModelPath, false)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
contextLength, err := func() (int, error) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.contextLengthLookup == nil {
|
||||
s.contextLengthLookup = make(map[string]int)
|
||||
}
|
||||
contextLength, ok := s.contextLengthLookup[m.ModelPath]
|
||||
if !ok {
|
||||
kvData, err := getKVData(m.ModelPath, false)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
return 0, err
|
||||
}
|
||||
contextLength = int(kvData.ContextLength())
|
||||
s.contextLengthLookup[m.ModelPath] = int(kvData.ContextLength())
|
||||
}
|
||||
return contextLength, nil
|
||||
}()
|
||||
|
||||
var count int
|
||||
for i, s := range input {
|
||||
@ -357,7 +374,7 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
ctxLen := min(opts.NumCtx, int(kvData.ContextLength()))
|
||||
ctxLen := min(opts.NumCtx, int(contextLength))
|
||||
if len(tokens) > ctxLen {
|
||||
if !truncate {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "input length exceeds maximum context length"})
|
||||
|
@ -66,7 +66,7 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
pendingReqCh: make(chan *LlmRequest, maxQueue),
|
||||
finishedReqCh: make(chan *LlmRequest, maxQueue),
|
||||
expiredCh: make(chan *runnerRef, maxQueue),
|
||||
unloadedCh: make(chan interface{}, maxQueue),
|
||||
unloadedCh: make(chan any, maxQueue),
|
||||
loaded: make(map[string]*runnerRef),
|
||||
newServerFn: llm.NewLlamaServer,
|
||||
getGpuFn: gpu.GetGPUInfo,
|
||||
|
Loading…
x
Reference in New Issue
Block a user