Apply 01-cache.diff

2025-06-07 11:45:21 +00:00 · 2024-03-19 09:49:24 +01:00 · 2024-03-19 09:49:24 +01:00 · 0a0e9f3e0f
commit 0a0e9f3e0f
parent 58d95cc9bd
1 changed files with 3 additions and 1 deletions
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@ -1007,13 +1007,15 @@ struct llama_server_context
                slot.n_sent_text += result.text_to_send.size();
                // add the token to slot queue and cache
            }
-            slot.add_token_string(result);
+
            if (slot.params.stream)
            {
                send_partial_response(slot, result);
            }
        }
        slot.add_token_string(result);
        if (incomplete)
        {
            slot.has_next_token = true;