diff --git a/.gitignore b/.gitignore index 98a4702..3dab6f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,23 @@ -__pycache__ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +env/ +venv/ +ENV/ + +# Streamlit +.streamlit/ + +# Environment variables .env -venv -.venv \ No newline at end of file + +# IDEs +.vscode/ +.idea/ + +# OS generated files +.DS_Store +Thumbs.db \ No newline at end of file diff --git a/README.md b/README.md index 8ce90bb..502c276 100644 --- a/README.md +++ b/README.md @@ -14,9 +14,9 @@ - [ ] Add more providers - [ ] Use something like LiteLLM to unify models code and avoid repeating code for each provider -## Original G1 Example video +## Example -[Video Demo](https://github.com/user-attachments/assets/db2a221f-f8eb-48c3-b5a7-8399c6300243) +![Simple Math](examples/maths.png) ## Description @@ -36,32 +36,12 @@ At each step, the LLM can choose to continue to another reasoning step, or provi The reasoning ability of the LLM is therefore improved through combining Chain-of-Thought with the requirement to try multiple methods, explore alternative answers, question previous draft solutions, and consider the LLM’s limitations. This alone, without any training, is sufficient to achieve ~70% accuracy on the Strawberry problem (n=10, "How many Rs are in strawberry?"). Without prompting, Llama-3.1-70b had 0% accuracy and ChatGPT-4o had 30% accuracy. -### Examples +### Disclaimer > [!IMPORTANT] > multi1 is not perfect, but it can perform significantly better than LLMs out-of-the-box. From initial testing, multi1 accurately solves simple logic problems 60-80% of the time that usually stump LLMs. However, accuracy has yet to be formally evaluated. See examples below. -##### How many Rs are in strawberry? -Prompt: How many Rs are in strawberry? - -Result: - -![Strawberry example](examples/strawberry.png) - ---- - -Prompt: Which is larger, .9 or .11? - -Result: - -![0.9 or 0.11 example](examples/math.png) - - -Prompt: In the context of Lie Group and Lie Algebra, let $R \in E$ be an irreducible root system. Show that then $E$ is an irreducible representation of the Weyl group $W$. - -![](examples/lie.1.png) - ### Quickstart diff --git a/__pycache__/api_handlers.cpython-310.pyc b/__pycache__/api_handlers.cpython-310.pyc index 97b3881..275a2fc 100644 Binary files a/__pycache__/api_handlers.cpython-310.pyc and b/__pycache__/api_handlers.cpython-310.pyc differ diff --git a/__pycache__/utils.cpython-310.pyc b/__pycache__/utils.cpython-310.pyc index eed806d..b607340 100644 Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ diff --git a/api_handlers.py b/app/api_handlers.py similarity index 100% rename from api_handlers.py rename to app/api_handlers.py diff --git a/app/config_menu.py b/app/config_menu.py new file mode 100644 index 0000000..16d859b --- /dev/null +++ b/app/config_menu.py @@ -0,0 +1,49 @@ +import streamlit as st +import os +from dotenv import load_dotenv, set_key + +def load_env_vars(): + load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env")) + return { + 'OLLAMA_URL': os.getenv('OLLAMA_URL', 'http://localhost:11434'), + 'OLLAMA_MODEL': os.getenv('OLLAMA_MODEL', 'mistral'), + 'PERPLEXITY_API_KEY': os.getenv('PERPLEXITY_API_KEY', ''), + 'PERPLEXITY_MODEL': os.getenv('PERPLEXITY_MODEL', 'mistral-7b-instruct'), + 'GROQ_API_KEY': os.getenv('GROQ_API_KEY', ''), + 'GROQ_MODEL': os.getenv('GROQ_MODEL', 'mixtral-8x7b-32768') + } + +def save_env_vars(config): + env_path = os.path.join(os.path.dirname(__file__), "..", ".env") + for key, value in config.items(): + set_key(env_path, key, value) + +def config_menu(): + st.sidebar.markdown("## 🛠️ Configuration") + + config = load_env_vars() + + with st.sidebar.expander("Edit Configuration"): + new_config = {} + new_config['OLLAMA_URL'] = st.text_input("Ollama URL", value=config['OLLAMA_URL']) + new_config['OLLAMA_MODEL'] = st.text_input("Ollama Model", value=config['OLLAMA_MODEL']) + new_config['PERPLEXITY_API_KEY'] = st.text_input("Perplexity API Key", value=config['PERPLEXITY_API_KEY'], type="password") + new_config['PERPLEXITY_MODEL'] = st.text_input("Perplexity Model", value=config['PERPLEXITY_MODEL']) + new_config['GROQ_API_KEY'] = st.text_input("Groq API Key", value=config['GROQ_API_KEY'], type="password") + new_config['GROQ_MODEL'] = st.text_input("Groq Model", value=config['GROQ_MODEL']) + + if st.button("Save Configuration"): + save_env_vars(new_config) + st.success("Configuration saved successfully!") + + return config + +def display_config(backend, config): + st.sidebar.markdown("## 🛠️ Current Configuration") + if backend == "Ollama": + st.sidebar.markdown(f"- 🖥️ Ollama URL: `{config['OLLAMA_URL']}`") + st.sidebar.markdown(f"- 🤖 Ollama Model: `{config['OLLAMA_MODEL']}`") + elif backend == "Perplexity AI": + st.sidebar.markdown(f"- 🧠 Perplexity AI Model: `{config['PERPLEXITY_MODEL']}`") + else: # Groq + st.sidebar.markdown(f"- ⚡ Groq Model: `{config['GROQ_MODEL']}`") diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..696268a --- /dev/null +++ b/app/main.py @@ -0,0 +1,69 @@ +import streamlit as st +from dotenv import load_dotenv +from api_handlers import OllamaHandler, PerplexityHandler, GroqHandler +from utils import generate_response +from config_menu import config_menu, display_config +import os + +# Load environment variables +load_dotenv() + +def load_css(): + with open(os.path.join(os.path.dirname(__file__), "..", "static", "styles.css")) as f: + st.markdown(f'', unsafe_allow_html=True) + +def setup_page(): + st.set_page_config(page_title="multi1 - Unified AI Reasoning Chains", page_icon="🧠", layout="wide") + load_css() + st.markdown(""" +

+ 🧠 multi1 - Unified AI Reasoning Chains +

+ """, unsafe_allow_html=True) + st.markdown(""" +

+ This app demonstrates AI reasoning chains using different backends: Ollama, Perplexity AI, and Groq. + Choose a backend and enter your query to see the step-by-step reasoning process. +

+ """, unsafe_allow_html=True) + +def get_api_handler(backend, config): + if backend == "Ollama": + return OllamaHandler(config['OLLAMA_URL'], config['OLLAMA_MODEL']) + elif backend == "Perplexity AI": + return PerplexityHandler(config['PERPLEXITY_API_KEY'], config['PERPLEXITY_MODEL']) + else: # Groq + return GroqHandler(config['GROQ_API_KEY'], config['GROQ_MODEL']) + +def main(): + setup_page() + + st.sidebar.markdown('', unsafe_allow_html=True) + config = config_menu() + + backend = st.sidebar.selectbox("Choose AI Backend", ["Ollama", "Perplexity AI", "Groq"]) + display_config(backend, config) + api_handler = get_api_handler(backend, config) + + user_query = st.text_input("💬 Enter your query:", placeholder="e.g., How many 'R's are in the word strawberry?") + + if user_query: + st.write("🔍 Generating response...") + response_container = st.empty() + time_container = st.empty() + + for steps, total_thinking_time in generate_response(user_query, api_handler): + with response_container.container(): + for title, content, _ in steps: + if title.startswith("Final Answer"): + st.markdown(f'

🎯 {title}

', unsafe_allow_html=True) + st.markdown(f'
{content}
', unsafe_allow_html=True) + else: + with st.expander(f"📝 {title}", expanded=True): + st.markdown(f'
{content}
', unsafe_allow_html=True) + + if total_thinking_time is not None: + time_container.markdown(f'

⏱️ Total thinking time: {total_thinking_time:.2f} seconds

', unsafe_allow_html=True) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/utils.py b/app/utils.py similarity index 100% rename from utils.py rename to app/utils.py diff --git a/examples/lie.1.png b/examples/lie.1.png deleted file mode 100644 index f990d3d..0000000 Binary files a/examples/lie.1.png and /dev/null differ diff --git a/examples/math.png b/examples/math.png deleted file mode 100644 index a20f324..0000000 Binary files a/examples/math.png and /dev/null differ diff --git a/examples/maths.png b/examples/maths.png new file mode 100644 index 0000000..cd8f6e9 Binary files /dev/null and b/examples/maths.png differ diff --git a/examples/strawberry.png b/examples/strawberry.png deleted file mode 100644 index a8f677b..0000000 Binary files a/examples/strawberry.png and /dev/null differ diff --git a/main.py b/main.py deleted file mode 100644 index fb21acc..0000000 --- a/main.py +++ /dev/null @@ -1,71 +0,0 @@ -import streamlit as st -from dotenv import load_dotenv -from api_handlers import OllamaHandler, PerplexityHandler, GroqHandler -from utils import generate_response, load_env_vars - -# Load environment variables and configuration -load_dotenv() -config = load_env_vars() - -def setup_page(): - st.set_page_config(page_title="multi1 - Unified AI Reasoning Chains", page_icon="🧠", layout="wide") - st.markdown(""" -

- 🧠 multi1 - Unified AI Reasoning Chains -

- """, unsafe_allow_html=True) - st.markdown(""" -

- This app demonstrates AI reasoning chains using different backends: Ollama, Perplexity AI, and Groq. - Choose a backend and enter your query to see the step-by-step reasoning process. -

- """, unsafe_allow_html=True) - -def get_api_handler(backend): - if backend == "Ollama": - return OllamaHandler(config['OLLAMA_URL'], config['OLLAMA_MODEL']) - elif backend == "Perplexity AI": - return PerplexityHandler(config['PERPLEXITY_API_KEY'], config['PERPLEXITY_MODEL']) - else: # Groq - return GroqHandler() - -def display_config(backend): - st.sidebar.markdown("## 🛠️ Current Configuration") - if backend == "Ollama": - st.sidebar.markdown(f"- 🖥️ Ollama URL: `{config['OLLAMA_URL']}`") - st.sidebar.markdown(f"- 🤖 Ollama Model: `{config['OLLAMA_MODEL']}`") - elif backend == "Perplexity AI": - st.sidebar.markdown(f"- 🧠 Perplexity AI Model: `{config['PERPLEXITY_MODEL']}`") - else: # Groq - st.sidebar.markdown("- ⚡ Using Groq API") - -def main(): - setup_page() - - st.sidebar.markdown("

⚙️ Settings

", unsafe_allow_html=True) - backend = st.sidebar.selectbox("Choose AI Backend", ["Ollama", "Perplexity AI", "Groq"]) - display_config(backend) - api_handler = get_api_handler(backend) - - user_query = st.text_input("💬 Enter your query:", placeholder="e.g., How many 'R's are in the word strawberry?") - - if user_query: - st.write("🔍 Generating response...") - response_container = st.empty() - time_container = st.empty() - - for steps, total_thinking_time in generate_response(user_query, api_handler): - with response_container.container(): - for title, content, _ in steps: - if title.startswith("Final Answer"): - st.markdown(f"

🎯 {title}

", unsafe_allow_html=True) - st.markdown(f"
{content}
", unsafe_allow_html=True) - else: - with st.expander(f"📝 {title}", expanded=True): - st.markdown(f"
{content}
", unsafe_allow_html=True) - - if total_thinking_time is not None: - time_container.markdown(f"

⏱️ Total thinking time: {total_thinking_time:.2f} seconds

", unsafe_allow_html=True) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/static/styles.css b/static/styles.css new file mode 100644 index 0000000..d2ee572 --- /dev/null +++ b/static/styles.css @@ -0,0 +1,29 @@ +body { + font-family: -apple-system, BlinkMacSystemFont, sans-serif; +} + +h1, h2, h3, h4, h5, h6 { + font-family: -apple-system, BlinkMacSystemFont, sans-serif; +} + +.main-title { + text-align: center; +} + +.main-description { + text-align: center; + font-size: 1.1em; +} + +.sidebar-title { + font-family: -apple-system, BlinkMacSystemFont, sans-serif; +} + +.expander-title { + font-family: -apple-system, BlinkMacSystemFont, sans-serif; +} + +.thinking-time { + font-family: -apple-system, BlinkMacSystemFont, sans-serif; + font-weight: bold; +}