From 8dde9f19a4e820eedb866c6803d8c4efd3e30fb4 Mon Sep 17 00:00:00 2001 From: martin legrand Date: Mon, 24 Mar 2025 10:41:43 +0100 Subject: [PATCH] fix: server script --- README.md | 4 +++- server/config.json | 30 -------------------------- server/{server.py => server_ollama.py} | 7 +++++- 3 files changed, 9 insertions(+), 32 deletions(-) delete mode 100644 server/config.json rename server/{server.py => server_ollama.py} (90%) diff --git a/README.md b/README.md index ab8826f..6dd99b6 100644 --- a/README.md +++ b/README.md @@ -212,6 +212,8 @@ If you have a powerful computer or a server that you can use, but you want to us ### 1️⃣ **Set up and start the server scripts** +You need to have ollama installed on the server (We will integrate VLLM and llama.cpp soon). + On your "server" that will run the AI model, get the ip address ```sh @@ -223,7 +225,7 @@ Note: For Windows or macOS, use ipconfig or ifconfig respectively to find the IP Clone the repository and then, run the script `stream_llm.py` in `server/` ```sh -python3 server_ollama.py +python3 server_ollama.py --model "deepseek-r1:32b" ``` ### 2️⃣ **Run it** diff --git a/server/config.json b/server/config.json deleted file mode 100644 index b976680..0000000 --- a/server/config.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "model_name": "deepseek-r1:14b", - "known_models": [ - "qwq:32b", - "deepseek-r1:1.5b", - "deepseek-r1:7b", - "deepseek-r1:14b", - "deepseek-r1:32b", - "deepseek-r1:70b", - "deepseek-r1:671b", - "deepseek-coder:1.3b", - "deepseek-coder:6.7b", - "deepseek-coder:33b", - "llama2-uncensored:7b", - "llama2-uncensored:70b", - "llama3.1:8b", - "llama3.1:70b", - "llama3.3:70b", - "llama3:8b", - "llama3:70b", - "i4:14b", - "mistral:7b", - "mistral:70b", - "mistral:33b", - "qwen1:7b", - "qwen1:14b", - "qwen1:32b", - "qwen1:70b" - ] -} \ No newline at end of file diff --git a/server/server.py b/server/server_ollama.py similarity index 90% rename from server/server.py rename to server/server_ollama.py index 645cba0..91c8dca 100644 --- a/server/server.py +++ b/server/server_ollama.py @@ -6,13 +6,18 @@ from flask import Flask, jsonify, request import threading import ollama import logging +import argparse log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) +parser = argparse.ArgumentParser(description='AgenticSeek server script') +parser.add_argument('--model', type=str, help='Model to use. eg: deepseek-r1:14b', required=True) +args = parser.parse_args() + app = Flask(__name__) -model = 'deepseek-r1:14b' +model = args.model # Shared state with thread-safe locks class GenerationState: