From c8a1741d9ba8cd1e8e0a74a0d7ad77fe0ec04184 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 27 Aug 2024 17:05:23 -0700 Subject: [PATCH] runner.go: Update TODOs --- llama/runner/runner.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama/runner/runner.go b/llama/runner/runner.go index 0b25e42a..77f28ce8 100644 --- a/llama/runner/runner.go +++ b/llama/runner/runner.go @@ -176,6 +176,8 @@ func (s *Server) shiftContext(seqIndex int) { slog.Debug("context limit hit - shifting", "limit", s.numCtx, "nPast", seq.nPast, "numKeep", seq.numKeep, "numLeft", numLeft, "numDiscard", numDiscard) + // TODO (jessegross): KV cache removal can fail for certain types of models + // server.cpp doesn't handle this, though we can be more graceful s.lc.KvCacheSeqRm(seqIndex, seq.numKeep, seq.numKeep+numDiscard) s.lc.KvCacheSeqAdd(seqIndex, seq.numKeep+numDiscard, seq.nPast, -numDiscard) @@ -327,13 +329,11 @@ func (s *Server) processBatch() { slog.Debug("sampled", "piece", piece) // if it's an end of sequence token, break - // TODO: just end this sequence if s.model.TokenIsEog(token) { // TODO (jmorganca): we should send this back // as it's important for the /api/generate context // seq.responses <- piece - // TODO: end the sequence instead of quitting the pool s.removeSequence(i, "stop") continue }