mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-07 11:45:21 +00:00
Apply 01-cache.diff
This commit is contained in:
parent
58d95cc9bd
commit
0a0e9f3e0f
4
llm/ext_server/server.cpp
vendored
4
llm/ext_server/server.cpp
vendored
@ -1007,13 +1007,15 @@ struct llama_server_context
|
|||||||
slot.n_sent_text += result.text_to_send.size();
|
slot.n_sent_text += result.text_to_send.size();
|
||||||
// add the token to slot queue and cache
|
// add the token to slot queue and cache
|
||||||
}
|
}
|
||||||
slot.add_token_string(result);
|
|
||||||
if (slot.params.stream)
|
if (slot.params.stream)
|
||||||
{
|
{
|
||||||
send_partial_response(slot, result);
|
send_partial_response(slot, result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slot.add_token_string(result);
|
||||||
|
|
||||||
if (incomplete)
|
if (incomplete)
|
||||||
{
|
{
|
||||||
slot.has_next_token = true;
|
slot.has_next_token = true;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user