allow connecting to remote Ollama server

2025-06-05 02:25:27 +00:00 · 2025-05-06 18:03:14 +02:00 · 2025-05-06 18:03:14 +02:00 · 94fb15359b
commit 94fb15359b
parent 3d1b3d02d9
1 changed files with 51 additions and 41 deletions
--- a/sources/llm_provider.py
+++ b/sources/llm_provider.py
@ -1,20 +1,19 @@
-
 import os
-import time
-import ollama
-from ollama import chat
-import requests
-import subprocess
-import ipaddress
-import httpx
-import socket
 import platform
+import socket
+import subprocess
+import time
 from urllib.parse import urlparse
-from dotenv import load_dotenv, set_key
+
+import httpx
+import requests
+from dotenv import load_dotenv
+from ollama import Client as OllamaClient
 from openai import OpenAI
-from typing import List, Tuple, Type, Dict
-from sources.utility import pretty_print, animate_thinking
+
 from sources.logger import Logger
+from sources.utility import pretty_print, animate_thinking
+

 class Provider:
    def __init__(self, provider_name, model, server_address="127.0.0.1:5000", is_local=False):
@ -22,6 +21,7 @@ class Provider:
        self.model = model
        self.is_local = is_local
        self.server_ip = server_address
+        self.server_address = server_address
        self.available_providers = {
            "ollama": self.ollama_fn,
            "server": self.server_fn,
@ -73,7 +73,8 @@ class Provider:
        except AttributeError as e:
            raise NotImplementedError(f"{str(e)}\nIs {self.provider_name} implemented ?")
        except ModuleNotFoundError as e:
-            raise ModuleNotFoundError(f"{str(e)}\nA import related to provider {self.provider_name} was not found. Is it installed ?")
+            raise ModuleNotFoundError(
+                f"{str(e)}\nA import related to provider {self.provider_name} was not found. Is it installed ?")
        except Exception as e:
            if "try again later" in str(e).lower():
                return f"{self.provider_name} server is overloaded. Please try again later."
@ -106,7 +107,6 @@ class Provider:
        except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
            return False

-
    def server_fn(self, history, verbose=False):
        """
        Use a remote server with LLM to generate text.
@ -141,36 +141,45 @@ class Provider:
                    pretty_print(f"An error occurred: {str(e)}", color="failure")
                    break
        except KeyError as e:
-            raise Exception(f"{str(e)}\nError occured with server route. Are you using the correct address for the config.ini provider?") from e
+            raise Exception(
+                f"{str(e)}\nError occured with server route. Are you using the correct address for the config.ini provider?") from e
        except Exception as e:
            raise e
        return thought

    def ollama_fn(self, history, verbose=False):
        """
-        Use local ollama server to generate text.
+        Use local or remote Ollama server to generate text.
        """
        thought = ""
+        host = "http://localhost:11434" if self.is_local else f"http://{self.server_address}"
+        client = OllamaClient(host=host)
+
        try:
-            stream = chat(
+            stream = client.chat(
                model=self.model,
                messages=history,
                stream=True,
            )
            for chunk in stream:
                if verbose:
-                print(chunk['message']['content'], end='', flush=True)
-              thought += chunk['message']['content']
+                    print(chunk["message"]["content"], end="", flush=True)
+                thought += chunk["message"]["content"]
        except httpx.ConnectError as e:
-            raise Exception("\nOllama connection failed. provider should not be set to ollama if server address is not localhost") from e
-        except ollama.ResponseError as e:
-            if e.status_code == 404:
+            raise Exception(
+                f"\nOllama connection failed at {host}. Check if the server is running."
+            ) from e
+        except Exception as e:
+            if hasattr(e, 'status_code') and e.status_code == 404:
                animate_thinking(f"Downloading {self.model}...")
-                ollama.pull(self.model)
+                client.pull(self.model)
                self.ollama_fn(history, verbose)
            if "refused" in str(e).lower():
-                raise Exception("Ollama connection failed. is the server running ?") from e
+                raise Exception(
+                    f"Ollama connection refused at {host}. Is the server running?"
+                ) from e
            raise e
+
        return thought

    def huggingface_fn(self, history, verbose=False):
@ -349,6 +358,7 @@ class Provider:
        """
        return thought

+
 if __name__ == "__main__":
    provider = Provider("server", "deepseek-r1:32b", " x.x.x.x:8080")
    res = provider.respond(["user", "Hello, how are you?"])