perf: trying to improve perf

This commit is contained in:
martin legrand 2025-03-29 18:20:47 +01:00
parent e0eee90202
commit 557f7aa333
3 changed files with 3 additions and 11 deletions

View File

@ -1,6 +1,7 @@
#!/usr/bin python3
import argparse
import time
from flask import Flask, jsonify, request
from sources.llamacpp_handler import LlamacppLLM
@ -36,16 +37,6 @@ def setup():
generator.set_model(model)
return jsonify({"message": "Model set"}), 200
@app.route('/get_complete_sentence', methods=['GET'])
def get_complete_sentence():
if not generator:
return jsonify({"error": "Generator not initialized"}), 404
while True:
status = generator.get_status()
if status["is_complete"]:
return jsonify(status)
return None
@app.route('/get_updated_sentence')
def get_updated_sentence():
if not generator:

View File

@ -1,3 +1,4 @@
flask>=2.3.0
ollama>=0.4.7
gunicorn==19.10.0
llama-cpp-python

View File

@ -120,7 +120,7 @@ class Provider:
requests.post(route_gen, json={"messages": history})
is_complete = False
while not is_complete:
response = requests.get(f"http://{self.server_ip}/get_complete_sentence")
response = requests.get(f"http://{self.server_ip}/get_updated_sentence")
if "error" in response.json():
pretty_print(response.json()["error"], color="failure")
break