unified more elegant approach

2025-06-06 19:15:23 +00:00 · 2024-09-17 12:19:25 +02:00 · 2024-09-17 12:19:25 +02:00 · 06cba4f3ca
commit 06cba4f3ca
parent d838d55334
2 changed files with 108 additions and 105 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,5 @@
 .env
 venv
-.venv
+.venv
 __pycache__/
 .venv/
--- a/api_handlers.py
+++ b/api_handlers.py
@ -2,123 +2,124 @@ import json
 import requests
 import groq
 import time
 from abc import ABC, abstractmethod
-class OllamaHandler:
+class BaseHandler(ABC):
-    def __init__(self, url, model):
+    def __init__(self):
-        self.url = url
+        self.max_attempts = 3
-        self.model = model
+        self.retry_delay = 1
    @abstractmethod
    def _make_request(self, messages, max_tokens):
        pass
    def make_api_call(self, messages, max_tokens, is_final_answer=False):
-        for attempt in range(3):
+        for attempt in range(self.max_attempts):
            try:
-                response = requests.post(
+                response = self._make_request(messages, max_tokens)
-                    f"{self.url}/api/chat",
+                return self._process_response(response, is_final_answer)
                    json={
                        "model": self.model,
                        "messages": messages,
                        "stream": False,
                        "format": "json",
                        "options": {
                            "num_predict": max_tokens,
                            "temperature": 0.2
                        }
                    }
                )
                response.raise_for_status()
                return json.loads(response.json()["message"]["content"])
            except Exception as e:
-                if attempt == 2:
+                if attempt == self.max_attempts - 1:
                    return self._error_response(str(e), is_final_answer)
-                time.sleep(1)
+                time.sleep(self.retry_delay)
-    def _error_response(self, error_msg, is_final_answer):
+    def _process_response(self, response, is_final_answer):
-        if is_final_answer:
+        return json.loads(response)
            return {"title": "Error", "content": f"Failed to generate final answer after 3 attempts. Error: {error_msg}"}
        else:
            return {"title": "Error", "content": f"Failed to generate step after 3 attempts. Error: {error_msg}", "next_action": "final_answer"}
 class PerplexityHandler:
    def __init__(self, api_key, model):
        self.api_key = api_key
        self.model = model
    def make_api_call(self, messages, max_tokens, is_final_answer=False):
        # Quick dirty fix for API calls in perplexity that removes the assistant message
        #messages[0]["content"] = messages[0]["content"] + " You will always respond ONLY with JSON with the following format: {'title': 'Title of the step', 'content': 'Content of the step', 'next_action': 'continue' or 'final_answer'}. You are not allowed to respond with anything else or any additional text. "
        if not is_final_answer:
            for i in range(len(messages)):
                if messages[i]["role"] == "assistant":
                    messages.pop(i)     
        for attempt in range(3):
            try:
                url = "https://api.perplexity.ai/chat/completions"
                payload = {"model": self.model, "messages": messages}
                headers = {
                    "Authorization": f"Bearer {self.api_key}",
                    "Content-Type": "application/json",
                }
                response = requests.post(url, json=payload, headers=headers)
                # Add specific handling for 400 error
                if response.status_code == 400:
                    error_content = response.json()
                    print(f"HTTP 400 Error: {error_content}")
                    return self._error_response(f"HTTP 400 Error: {error_content}", is_final_answer)
                response.raise_for_status()
                content = response.json()["choices"][0]["message"]["content"]
                print("Content: ", content)
                return json.loads(content)
            except json.JSONDecodeError:
                print("Warning: content is not a valid JSON, returning raw response")
                # Better detection of final answer in the raw response for Perplexity
                forced_final_answer = False
                if '"next_action": "final_answer"' in content.lower().strip():
                    forced_final_answer = True
                print("Forced final answer: ", forced_final_answer)
                return {
                    "title": "Raw Response",
                    "content": content,
                    "next_action": "final_answer" if (is_final_answer|forced_final_answer) else "continue"
                }
            except requests.exceptions.RequestException as e:
                print(f"Request failed: {e}")
                if attempt == 2:
                    return self._error_response(str(e), is_final_answer)
                time.sleep(1)
    def _error_response(self, error_msg, is_final_answer):
        return {
            "title": "Error",
-            "content": f"API request failed after 3 attempts. Error: {error_msg}",
+            "content": f"Failed to generate {'final answer' if is_final_answer else 'step'} after {self.max_attempts} attempts. Error: {error_msg}",
-            "next_action": "final_answer",
+            "next_action": "final_answer" if is_final_answer else "continue"
        }
-class GroqHandler:
+class OllamaHandler(BaseHandler):
    def __init__(self, url, model):
        super().__init__()
        self.url = url
        self.model = model
    def _make_request(self, messages, max_tokens):
        response = requests.post(
            f"{self.url}/api/chat",
            json={
                "model": self.model,
                "messages": messages,
                "stream": False,
                "format": "json",
                "options": {
                    "num_predict": max_tokens,
                    "temperature": 0.2
                }
            }
        )
        response.raise_for_status()
        return response.json()["message"]["content"]
 class PerplexityHandler(BaseHandler):
    def __init__(self, api_key, model):
        super().__init__()
        self.api_key = api_key
        self.model = model
    def _clean_messages(self, messages):
        cleaned_messages = []
        last_role = None
        for message in messages:
            if message["role"] == "system":
                cleaned_messages.append(message)
            elif message["role"] != last_role:
                cleaned_messages.append(message)
                last_role = message["role"]
            elif message["role"] == "user":
                cleaned_messages[-1]["content"] += "\n" + message["content"]
        # If the last message is an assistant message, delete it
        if cleaned_messages and cleaned_messages[-1]["role"] == "assistant":
            cleaned_messages.pop()  
        return cleaned_messages
    def _make_request(self, messages, max_tokens):
        cleaned_messages = self._clean_messages(messages)
        url = "https://api.perplexity.ai/chat/completions"
        payload = {"model": self.model, "messages": cleaned_messages}
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
        try:
            response = requests.post(url, json=payload, headers=headers)
            response.raise_for_status()
            return response.json()["choices"][0]["message"]["content"]
        except requests.exceptions.HTTPError as http_err:
            if response.status_code == 400:
                error_message = response.json().get("error", {}).get("message", "Unknown error")
                raise ValueError(f"Bad request (400): {error_message}")
            raise  # Re-raise the exception if it's not a 400 error
    def _process_response(self, response, is_final_answer):
        try:
            return super()._process_response(response, is_final_answer)
        except json.JSONDecodeError:
            print("Warning: content is not a valid JSON, returning raw response")
            forced_final_answer = '"next_action": "final_answer"' in response.lower().strip()
            return {
                "title": "Raw Response",
                "content": response,
                "next_action": "final_answer" if (is_final_answer or forced_final_answer) else "continue"
            }
 class GroqHandler(BaseHandler):
    def __init__(self):
        super().__init__()
        self.client = groq.Groq()
-    def make_api_call(self, messages, max_tokens, is_final_answer=False):
+    def _make_request(self, messages, max_tokens):
-        for attempt in range(3):
+        response = self.client.chat.completions.create(
-            try:
+            model="llama-3.1-70b-versatile",
-                response = self.client.chat.completions.create(
+            messages=messages,
-                    model="llama-3.1-70b-versatile",
+            max_tokens=max_tokens,
-                    messages=messages,
+            temperature=0.2,
-                    max_tokens=max_tokens,
+            response_format={"type": "json_object"}
-                    temperature=0.2,
+        )
-                    response_format={"type": "json_object"}
+        return response.choices[0].message.content
                )
                return json.loads(response.choices[0].message.content)
            except Exception as e:
                if attempt == 2:
                    return self._error_response(str(e), is_final_answer)
                time.sleep(1)
    def _error_response(self, error_msg, is_final_answer):
        if is_final_answer:
            return {"title": "Error", "content": f"Failed to generate final answer after 3 attempts. Error: {error_msg}"}
        else:
            return {"title": "Error", "content": f"Failed to generate step after 3 attempts. Error: {error_msg}", "next_action": "final_answer"}