llm_rng_project/tests/test_entropy.py
2025-01-13 15:07:30 +01:00

49 lines
1.9 KiB
Python

import unittest
import numpy as np
import logging
from tqdm import tqdm
from llm_rng.rng_generator import LLMRNG
from llm_rng.prompt_generator import PromptGenerator
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class TestEntropy(unittest.TestCase):
def setUp(self):
logger.info("Initializing test environment...")
self.rng = LLMRNG()
self.prompt_gen = PromptGenerator()
def test_entropy_of_generated_numbers(self):
samples = []
n_samples = 50 # Reduced from 1000 for faster testing
logger.info(f"Generating {n_samples} random numbers for entropy test...")
for i in tqdm(range(n_samples), desc="Generating numbers"):
prompt = self.prompt_gen.get_random_prompt()
try:
number = self.rng.generate_random_number(prompt, 1, 100)
samples.append(number)
if i % 10 == 0: # Log every 10th number
logger.info(f"Generated number {i}: {number}")
except Exception as e:
logger.error(f"Error generating number: {str(e)}")
# Calculate and display distribution statistics
logger.info("\nCalculating distribution statistics...")
unique_values = np.unique(samples, return_counts=True)
for value, count in zip(unique_values[0], unique_values[1]):
logger.info(f"Value {value} appeared {count} times")
# Calculate Shannon entropy
value_counts = unique_values[1]
probabilities = value_counts / len(samples)
entropy = -np.sum(probabilities * np.log2(probabilities))
logger.info(f"\nFinal entropy value: {entropy}")
# Reduced entropy threshold due to smaller sample size
min_entropy = 3
logger.info(f"Checking if entropy ({entropy}) > {min_entropy}")
self.assertGreater(entropy, min_entropy)