server gpu percentage for vllm

2025-07-24 18:30:12 +00:00 · 2025-04-08 20:58:32 +02:00 · 2025-04-08 20:58:32 +02:00 · ed2a9cc204
commit ed2a9cc204
parent 9a1b2b93f6
1 changed files with 1 additions and 0 deletions
--- a/server/sources/vllm_handler.py
+++ b/server/sources/vllm_handler.py
@ -51,6 +51,7 @@ class Vllm(GeneratorLLM):
            sampling_params = SamplingParams(
                temperature=0.7,
                max_tokens=512,
+                gpu_memory_utilization=0.5,
                stream=True  # Enable streaming
            )
            outputs = self.llm.generate(prompt, sampling_params, use_tqdm=False)