mirror of
https://github.com/tcsenpai/multi1.git
synced 2025-07-29 14:01:45 +00:00
litellm handler that supports most llms
This commit is contained in:
parent
f4de5beac9
commit
0c032babe3
@ -1,5 +1,10 @@
|
||||
import json
|
||||
from socket import timeout
|
||||
from unittest.mock import Base
|
||||
import os
|
||||
import requests
|
||||
import litellm
|
||||
import re
|
||||
import groq
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
@ -13,10 +18,10 @@ class BaseHandler(ABC):
|
||||
def _make_request(self, messages, max_tokens):
|
||||
pass
|
||||
|
||||
def make_api_call(self, messages, max_tokens, is_final_answer=False):
|
||||
def make_api_call(self, messages, max_tokens, is_final_answer=False, **kwargs):
|
||||
for attempt in range(self.max_attempts):
|
||||
try:
|
||||
response = self._make_request(messages, max_tokens)
|
||||
response = self._make_request(messages, max_tokens, **kwargs)
|
||||
return self._process_response(response, is_final_answer)
|
||||
except Exception as e:
|
||||
if attempt == self.max_attempts - 1:
|
||||
@ -33,72 +38,28 @@ class BaseHandler(ABC):
|
||||
"next_action": "final_answer" if is_final_answer else "continue"
|
||||
}
|
||||
|
||||
class OllamaHandler(BaseHandler):
|
||||
def __init__(self, url, model):
|
||||
super().__init__()
|
||||
self.url = url
|
||||
self.model = model
|
||||
|
||||
def _make_request(self, messages, max_tokens):
|
||||
response = requests.post(
|
||||
f"{self.url}/api/chat",
|
||||
json={
|
||||
"model": self.model,
|
||||
"messages": messages,
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"options": {
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.2
|
||||
}
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["message"]["content"]
|
||||
|
||||
class PerplexityHandler(BaseHandler):
|
||||
class LitellmHandler(BaseHandler):
|
||||
def __init__(self, api_key, model):
|
||||
super().__init__()
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
|
||||
def _clean_messages(self, messages):
|
||||
cleaned_messages = []
|
||||
last_role = None
|
||||
for message in messages:
|
||||
if message["role"] == "system":
|
||||
cleaned_messages.append(message)
|
||||
elif message["role"] != last_role:
|
||||
cleaned_messages.append(message)
|
||||
last_role = message["role"]
|
||||
elif message["role"] == "user":
|
||||
cleaned_messages[-1]["content"] += "\n" + message["content"]
|
||||
# If the last message is an assistant message, delete it
|
||||
if cleaned_messages and cleaned_messages[-1]["role"] == "assistant":
|
||||
cleaned_messages.pop()
|
||||
return cleaned_messages
|
||||
|
||||
def _make_request(self, messages, max_tokens):
|
||||
cleaned_messages = self._clean_messages(messages)
|
||||
|
||||
url = "https://api.perplexity.ai/chat/completions"
|
||||
payload = {"model": self.model, "messages": cleaned_messages}
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
def _make_request(self, messages, max_tokens, **kwargs):
|
||||
try:
|
||||
response = requests.post(url, json=payload, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response.json()["choices"][0]["message"]["content"]
|
||||
except requests.exceptions.HTTPError as http_err:
|
||||
if response.status_code == 400:
|
||||
error_message = response.json().get("error", {}).get("message", "Unknown error")
|
||||
raise ValueError(f"Bad request (400): {error_message}")
|
||||
temperature = kwargs.get("temperature", 0.2)
|
||||
max_tokens = kwargs.get("max_tokens", 512)
|
||||
timeout = kwargs.get('timeout', 30.0)
|
||||
response = litellm.completion(messages=messages, model=self.model, max_tokens=max_tokens, temperature=temperature, api_key=self.api_key, timeout=timeout)
|
||||
return response.choices[0].message.content
|
||||
except Exception as e:
|
||||
raise # Re-raise the exception if it's not a 400 error
|
||||
|
||||
def _process_response(self, response, is_final_answer):
|
||||
try:
|
||||
response = str(response).strip()
|
||||
if response.startswith('```'):
|
||||
response = self._remove_code_blocks(response)
|
||||
return super()._process_response(response, is_final_answer)
|
||||
except json.JSONDecodeError:
|
||||
print("Warning: content is not a valid JSON, returning raw response")
|
||||
@ -108,18 +69,7 @@ class PerplexityHandler(BaseHandler):
|
||||
"content": response,
|
||||
"next_action": "final_answer" if (is_final_answer or forced_final_answer) else "continue"
|
||||
}
|
||||
|
||||
class GroqHandler(BaseHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.client = groq.Groq()
|
||||
|
||||
def _make_request(self, messages, max_tokens):
|
||||
response = self.client.chat.completions.create(
|
||||
model="llama-3.1-70b-versatile",
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
temperature=0.2,
|
||||
response_format={"type": "json_object"}
|
||||
)
|
||||
return response.choices[0].message.content
|
||||
def _remove_code_blocks(self, text):
|
||||
cleaned_text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
|
||||
cleaned_text = cleaned_text.strip()
|
||||
return cleaned_text
|
Loading…
x
Reference in New Issue
Block a user