mirror of
https://github.com/tcsenpai/agenticSeek.git
synced 2025-06-06 11:05:26 +00:00
Feat : llm server config
This commit is contained in:
parent
d011696ab0
commit
c0172ba986
@ -2,25 +2,43 @@ from flask import Flask, jsonify, request
|
||||
import threading
|
||||
import ollama
|
||||
import logging
|
||||
import json
|
||||
|
||||
log = logging.getLogger('werkzeug')
|
||||
log.setLevel(logging.ERROR)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
model = 'deepseek-r1:14b'
|
||||
|
||||
# Shared state with thread-safe locks
|
||||
|
||||
class Config:
|
||||
def __init__(self):
|
||||
self.model = None
|
||||
self.known_models = []
|
||||
self.allowed_models = []
|
||||
self.model_name = None
|
||||
|
||||
def load(self):
|
||||
with open('config.json', 'r') as f:
|
||||
data = json.load(f)
|
||||
self.known_models = data['known_models']
|
||||
self.model_name = data['model_name']
|
||||
|
||||
def validate_model(self, model):
|
||||
if model not in self.known_models:
|
||||
raise ValueError(f"Model {model} is not known")
|
||||
|
||||
class GenerationState:
|
||||
def __init__(self):
|
||||
self.lock = threading.Lock()
|
||||
self.last_complete_sentence = ""
|
||||
self.current_buffer = ""
|
||||
self.is_generating = False
|
||||
self.model = None
|
||||
|
||||
state = GenerationState()
|
||||
|
||||
def generate_response(history, model):
|
||||
def generate_response(history):
|
||||
global state
|
||||
try:
|
||||
with state.lock:
|
||||
@ -29,21 +47,18 @@ def generate_response(history, model):
|
||||
state.current_buffer = ""
|
||||
|
||||
stream = ollama.chat(
|
||||
model=model,
|
||||
model=state.model,
|
||||
messages=history,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for chunk in stream:
|
||||
content = chunk['message']['content']
|
||||
print(content, end='', flush=True)
|
||||
|
||||
with state.lock:
|
||||
state.current_buffer += content
|
||||
|
||||
except ollama.ResponseError as e:
|
||||
if e.status_code == 404:
|
||||
ollama.pull(model)
|
||||
ollama.pull(state.model)
|
||||
with state.lock:
|
||||
state.is_generating = False
|
||||
print(f"Error: {e}")
|
||||
@ -62,7 +77,7 @@ def start_generation():
|
||||
|
||||
history = data.get('messages', [])
|
||||
# Start generation in background thread
|
||||
threading.Thread(target=generate_response, args=(history, model)).start()
|
||||
threading.Thread(target=generate_response, args=(history, state.model)).start()
|
||||
return jsonify({"message": "Generation started"}), 202
|
||||
|
||||
@app.route('/get_updated_sentence')
|
||||
@ -75,4 +90,8 @@ def get_updated_sentence():
|
||||
})
|
||||
|
||||
if __name__ == '__main__':
|
||||
config = Config()
|
||||
config.load()
|
||||
config.validate_model(config.model_name)
|
||||
state.model = config.model_name
|
||||
app.run(host='0.0.0.0', port=5000, debug=False, threaded=True)
|
Loading…
x
Reference in New Issue
Block a user