mirror of
https://github.com/tcsenpai/ollama.git
synced 2025-06-06 19:25:21 +00:00
server.cpp: cleanup cross attention state
This commit is contained in:
parent
7d5e0ff80e
commit
71e76f8c90
12
llm/ext_server/server.cpp
vendored
12
llm/ext_server/server.cpp
vendored
@ -729,6 +729,10 @@ struct llama_server_context
|
|||||||
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for mllama architecture, which processes images differently than llava
|
||||||
|
char arch_str[256];
|
||||||
|
llama_model_meta_val_str(model, "general.architecture", arch_str, 256);
|
||||||
|
bool is_mllama = strcmp(arch_str, "mllama") == 0;
|
||||||
if (multimodal)
|
if (multimodal)
|
||||||
{
|
{
|
||||||
const auto &images_data = data.find("image_data");
|
const auto &images_data = data.find("image_data");
|
||||||
@ -738,11 +742,6 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
const std::vector<uint8_t> image_buffer = base64_decode(img["data"].get<std::string>());
|
const std::vector<uint8_t> image_buffer = base64_decode(img["data"].get<std::string>());
|
||||||
|
|
||||||
// Check for mllama architecture, which processes images differently than llava
|
|
||||||
char arch_str[256];
|
|
||||||
llama_model_meta_val_str(model, "general.architecture", arch_str, 256);
|
|
||||||
bool is_mllama = strcmp(arch_str, "mllama") == 0;
|
|
||||||
|
|
||||||
if (is_mllama) {
|
if (is_mllama) {
|
||||||
LOG_INFO("MLLAMA architecture detected, processing first image", {{"slot_id", slot->id}});
|
LOG_INFO("MLLAMA architecture detected, processing first image", {{"slot_id", slot->id}});
|
||||||
|
|
||||||
@ -820,6 +819,8 @@ struct llama_server_context
|
|||||||
slot->params.input_suffix = prompt.substr(begin_prefix);
|
slot->params.input_suffix = prompt.substr(begin_prefix);
|
||||||
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
llama_set_cross_attn_state(ctx, nullptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1496,6 +1497,7 @@ struct llama_server_context
|
|||||||
{
|
{
|
||||||
if (slot.task_id == task.target_id)
|
if (slot.task_id == task.target_id)
|
||||||
{
|
{
|
||||||
|
slot.reset();
|
||||||
slot.release();
|
slot.release();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user