From 557f7aa33300d5db02d9d5553912e2b18dbb9c7b Mon Sep 17 00:00:00 2001 From: martin legrand Date: Sat, 29 Mar 2025 18:20:47 +0100 Subject: [PATCH] perf: trying to improve perf --- server/app.py | 11 +---------- server/requirements.txt | 1 + sources/llm_provider.py | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/server/app.py b/server/app.py index dcd0775..2245c66 100644 --- a/server/app.py +++ b/server/app.py @@ -1,6 +1,7 @@ #!/usr/bin python3 import argparse +import time from flask import Flask, jsonify, request from sources.llamacpp_handler import LlamacppLLM @@ -36,16 +37,6 @@ def setup(): generator.set_model(model) return jsonify({"message": "Model set"}), 200 -@app.route('/get_complete_sentence', methods=['GET']) -def get_complete_sentence(): - if not generator: - return jsonify({"error": "Generator not initialized"}), 404 - while True: - status = generator.get_status() - if status["is_complete"]: - return jsonify(status) - return None - @app.route('/get_updated_sentence') def get_updated_sentence(): if not generator: diff --git a/server/requirements.txt b/server/requirements.txt index 8272fe5..c97777d 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -1,3 +1,4 @@ flask>=2.3.0 ollama>=0.4.7 +gunicorn==19.10.0 llama-cpp-python \ No newline at end of file diff --git a/sources/llm_provider.py b/sources/llm_provider.py index 749ab13..79034fd 100644 --- a/sources/llm_provider.py +++ b/sources/llm_provider.py @@ -120,7 +120,7 @@ class Provider: requests.post(route_gen, json={"messages": history}) is_complete = False while not is_complete: - response = requests.get(f"http://{self.server_ip}/get_complete_sentence") + response = requests.get(f"http://{self.server_ip}/get_updated_sentence") if "error" in response.json(): pretty_print(response.json()["error"], color="failure") break