mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-07 11:45:21 +00:00
runner.go: Hold mutex for entire time when processing batch
It is not safe to hold a mutex only while we are waiting for the condition variable to signal that a new sequence has been added. It's possible that a sequence could be added in the middle of batch processing. For example, if a new sequence is added while Decode() is running, it will get picked up for sampling, despite not having been added to the original batch. This change holds a mutex for the majority of the time when active processing is happening, releasing it only for a brief period each time around the loop. Depending on the workload and the scheduler is may result in unfairness between different requests. However, this was not actually observed in testing. This addresses the correctness issue - better performance and fairness can be achieved with additional improvements in the future.
This commit is contained in:
parent
8e1554c91d
commit
53b600921e
@ -198,9 +198,6 @@ func incompleteUnicode(token string) bool {
|
||||
}
|
||||
|
||||
func (s *Server) run(ctx context.Context) {
|
||||
batch := llama.NewBatch(s.batchSize*len(s.seqs), 0, len(s.seqs))
|
||||
defer batch.Free()
|
||||
|
||||
// build up stop sequences as we recognize them
|
||||
// TODO (jmorganca): simplify this
|
||||
pieces := make([][]string, s.parallel)
|
||||
@ -210,12 +207,22 @@ func (s *Server) run(ctx context.Context) {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
slog.Debug("Processing batch", "seqs", len(s.seqs))
|
||||
pieces = s.processBatch(pieces)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Server) processBatch(pieces [][]string) [][]string {
|
||||
batch := llama.NewBatch(s.batchSize*len(s.seqs), 0, len(s.seqs))
|
||||
defer batch.Free()
|
||||
|
||||
s.mu.Lock()
|
||||
for s.allNil() {
|
||||
s.cond.Wait() // Wait until an item is added
|
||||
}
|
||||
s.mu.Unlock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
slog.Debug("Processing batch", "seqs", len(s.seqs))
|
||||
|
||||
for i, seq := range s.seqs {
|
||||
if seq == nil {
|
||||
@ -254,7 +261,7 @@ func (s *Server) run(ctx context.Context) {
|
||||
}
|
||||
|
||||
if batch.NumTokens() == 0 {
|
||||
continue
|
||||
return pieces
|
||||
}
|
||||
|
||||
err := s.lc.Decode(batch)
|
||||
@ -359,9 +366,7 @@ func (s *Server) run(ctx context.Context) {
|
||||
pieces[i] = []string{}
|
||||
}
|
||||
|
||||
batch.Clear()
|
||||
}
|
||||
}
|
||||
return pieces
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
|
Loading…
x
Reference in New Issue
Block a user