mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-08 20:25:22 +00:00
runner.go: Update TODOs
This commit is contained in:
parent
46a7c682f2
commit
c8a1741d9b
@ -176,6 +176,8 @@ func (s *Server) shiftContext(seqIndex int) {
|
|||||||
slog.Debug("context limit hit - shifting", "limit", s.numCtx, "nPast", seq.nPast,
|
slog.Debug("context limit hit - shifting", "limit", s.numCtx, "nPast", seq.nPast,
|
||||||
"numKeep", seq.numKeep, "numLeft", numLeft, "numDiscard", numDiscard)
|
"numKeep", seq.numKeep, "numLeft", numLeft, "numDiscard", numDiscard)
|
||||||
|
|
||||||
|
// TODO (jessegross): KV cache removal can fail for certain types of models
|
||||||
|
// server.cpp doesn't handle this, though we can be more graceful
|
||||||
s.lc.KvCacheSeqRm(seqIndex, seq.numKeep, seq.numKeep+numDiscard)
|
s.lc.KvCacheSeqRm(seqIndex, seq.numKeep, seq.numKeep+numDiscard)
|
||||||
s.lc.KvCacheSeqAdd(seqIndex, seq.numKeep+numDiscard, seq.nPast, -numDiscard)
|
s.lc.KvCacheSeqAdd(seqIndex, seq.numKeep+numDiscard, seq.nPast, -numDiscard)
|
||||||
|
|
||||||
@ -327,13 +329,11 @@ func (s *Server) processBatch() {
|
|||||||
slog.Debug("sampled", "piece", piece)
|
slog.Debug("sampled", "piece", piece)
|
||||||
|
|
||||||
// if it's an end of sequence token, break
|
// if it's an end of sequence token, break
|
||||||
// TODO: just end this sequence
|
|
||||||
if s.model.TokenIsEog(token) {
|
if s.model.TokenIsEog(token) {
|
||||||
// TODO (jmorganca): we should send this back
|
// TODO (jmorganca): we should send this back
|
||||||
// as it's important for the /api/generate context
|
// as it's important for the /api/generate context
|
||||||
// seq.responses <- piece
|
// seq.responses <- piece
|
||||||
|
|
||||||
// TODO: end the sequence instead of quitting the pool
|
|
||||||
s.removeSequence(i, "stop")
|
s.removeSequence(i, "stop")
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user