import json
import time
import os

def generate_response(prompt, api_handler):
    # Load the system prompt from an external file
    with open('system_prompt.txt', 'r') as file:
        SYSTEM_PROMPT = file.read()
    
    # Initialize the conversation with system prompt, user input, and an initial assistant response
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": "Understood. I will now create a detailed reasoning chain following the given instructions, starting with a thorough problem decomposition."},
    ]

    steps = []
    step_count = 1
    total_thinking_time = 0

    # Main loop for generating reasoning steps
    while True:
        # Measure time taken for each API call
        start_time = time.time()
        step_data = api_handler.make_api_call(messages, 300)
        end_time = time.time()
        thinking_time = end_time - start_time
        total_thinking_time += thinking_time

        # Store each step's information
        steps.append((f"Step {step_count}: {step_data['title']}", step_data["content"], thinking_time))

        # Add the assistant's response to the conversation
        messages.append({"role": "assistant", "content": json.dumps(step_data)})
        print("Next reasoning step: ", step_data["next_action"])
        
        # Break the loop if it's the final answer or if step count exceeds 10
        if step_data["next_action"].lower().strip() == "final_answer" or step_count > 10:
            break

        step_count += 1

        # Yield intermediate results
        yield steps, None

    # Request final answer
    messages.append({
        "role": "user",
        "content": "Please provide the final answer based on your reasoning above.",
    })

    # Generate and time the final answer
    start_time = time.time()
    final_data = api_handler.make_api_call(messages, 200, is_final_answer=True)
    end_time = time.time()
    thinking_time = end_time - start_time
    total_thinking_time += thinking_time

    # Add final answer to steps
    steps.append(("Final Answer", final_data["content"], thinking_time))

    # Yield final results
    yield steps, total_thinking_time


def load_env_vars():
    # Load environment variables with default values
    return {
        "OLLAMA_URL": os.getenv("OLLAMA_URL", "http://localhost:11434"),
        "OLLAMA_MODEL": os.getenv("OLLAMA_MODEL", "llama3.1:70b"),
        "PERPLEXITY_API_KEY": os.getenv("PERPLEXITY_API_KEY"),
        "PERPLEXITY_MODEL": os.getenv("PERPLEXITY_MODEL", "llama-3.1-sonar-small-128k-online"),
    }