First commit

tcsenpai/src

Fork 0

mirror of https://github.com/tcsenpai/src.git synced 2025-06-05 18:55:33 +00:00

Browse Source

This commit is contained in:

tcsenpai

2024-11-25 14:38:35 +01:00

commit d2cea458d4

16 changed files with 1580 additions and 0 deletions

2

.gitignore vendored Normal file

View File

		`@ -0,0 +1,2 @@`
							`__pycache__`
							`*.db`

									
										5

pyflaredb/__init__.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,5 @@

					from .core import PyFlareDB

					from .table import Table, Column

					from .versioning import Version, VersionStore

					__all__ = ['PyFlareDB', 'Table', 'Column', 'Version', 'VersionStore']

									
										125

pyflaredb/benchmark/suite.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,125 @@

					import time

					from typing import List, Dict, Any

					import random

					import string

					from ..core import PyFlareDB

					class BenchmarkSuite:

					    def __init__(self, db: PyFlareDB):

					        self.db = db

					    def run_benchmark(self, num_records: int = 10000):

					        """Run comprehensive benchmark"""

					        results = {

					            "insert": self._benchmark_insert(num_records),

					            "select": self._benchmark_select(num_records),

					            "index": self._benchmark_index_performance(num_records),

					            "complex_query": self._benchmark_complex_queries(num_records),

					        }

					        return results

					    def _benchmark_insert(self, num_records: int) -> Dict[str, float]:

					        start_time = time.time()

					        batch_times = []

					        for i in range(0, num_records, 1000):

					            batch_start = time.time()

					            self._insert_batch(min(1000, num_records - i))

					            batch_times.append(time.time() - batch_start)

					        total_time = time.time() - start_time

					        return {

					            "total_time": total_time,

					            "records_per_second": num_records / total_time,

					            "avg_batch_time": sum(batch_times) / len(batch_times),

					        }

					    def _insert_batch(self, size: int):

					        """Insert a batch of random records"""

					        tx_id = None

					        try:

					            tx_id = self.db.transaction_manager.begin_transaction()

					            for _ in range(size):

					                query = (

					                    "INSERT INTO users (id, username, email, age) "

					                    f"VALUES ('{self._random_string(10)}', "

					                    f"'{self._random_string(8)}', "

					                    f"'{self._random_string(8)}@example.com', "

					                    f"{random.randint(18, 80)})"

					                )

					                try:

					                    self.db.execute(query)

					                except Exception as e:

					                    print(f"Failed to insert record: {e}")

					                    print(f"Query was: {query}")

					                    raise

					            self.db.transaction_manager.commit(tx_id)

					        except Exception as e:

					            if tx_id is not None:

					                try:

					                    self.db.transaction_manager.rollback(tx_id)

					                except ValueError:

					                    pass

					            raise

					    def _benchmark_select(self, num_records: int) -> Dict[str, float]:

					        """Benchmark SELECT queries"""

					        queries = [

					            "SELECT * FROM users WHERE age > 30",

					            "SELECT username, email FROM users WHERE age < 25",

					            "SELECT COUNT(*) FROM users",

					        ]

					        results = {}

					        for query in queries:

					            start_time = time.time()

					            try:

					                self.db.execute(query)

					                query_time = time.time() - start_time

					                results[query] = query_time

					            except Exception as e:

					                results[query] = f"Error: {e}"

					        return results

					    def _benchmark_index_performance(self, num_records: int) -> Dict[str, float]:

					        """Benchmark index performance"""

					        # TODO: Implement index benchmarking

					        return {"index_creation_time": 0.0}

					    def _benchmark_complex_queries(self, num_records: int) -> Dict[str, float]:

					        """Benchmark complex queries"""

					        complex_queries = [

					            """

					            SELECT username, COUNT(*) as count 

					            FROM users 

					            GROUP BY username

					            """,

					            """

					            SELECT * FROM users 

					            WHERE age > 30 

					            ORDER BY username DESC 

					            LIMIT 10

					            """,

					        ]

					        results = {}

					        for query in complex_queries:

					            start_time = time.time()

					            try:

					                self.db.execute(query)

					                query_time = time.time() - start_time

					                results[query.strip()] = query_time

					            except Exception as e:

					                results[query.strip()] = f"Error: {e}"

					        return results

					    @staticmethod

					    def _random_string(length: int) -> str:

					        """Generate a random string of specified length"""

					        return "".join(random.choices(string.ascii_letters + string.digits, k=length))

									
										39

pyflaredb/cache/query_cache.py
									
										vendored
									
										Normal file
									
											View File
											
					@ -0,0 +1,39 @@

					from typing import Dict, Any, Optional

					import time

					import hashlib

					from collections import OrderedDict

					class QueryCache:

					    def __init__(self, capacity: int = 1000, ttl: int = 300):

					        self.capacity = capacity

					        self.ttl = ttl

					        self.cache = OrderedDict()

					    def get(self, query: str) -> Optional[Any]:

					        """Get cached query result"""

					        query_hash = self._hash_query(query)

					        if query_hash in self.cache:

					            entry = self.cache[query_hash]

					            if time.time() - entry['timestamp'] < self.ttl:

					                self.cache.move_to_end(query_hash)

					                return entry['result']

					            else:

					                del self.cache[query_hash]

					        return None

					    def set(self, query: str, result: Any):

					        """Cache query result"""

					        if len(self.cache) >= self.capacity:

					            self.cache.popitem(last=False)

					        query_hash = self._hash_query(query)

					        self.cache[query_hash] = {

					            'result': result,

					            'timestamp': time.time()

					        }

					    def _hash_query(self, query: str) -> str:

					        return hashlib.sha256(query.encode()).hexdigest()

					    def clear(self):

					        self.cache.clear()

									
										90

pyflaredb/core.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,90 @@

					from typing import Dict, List, Any, Optional

					from .table import Table

					from .sql.parser import SQLParser, SelectStatement, InsertStatement

					from .sql.executor import QueryExecutor

					from .sql.optimizer import QueryOptimizer

					from .sql.statistics import TableStatistics

					from .transaction import TransactionManager

					class PyFlareDB:

					    def __init__(self, db_path: str):

					        """Initialize the database"""

					        self.db_path = db_path

					        self.tables: Dict[str, Table] = {}

					        self.parser = SQLParser()

					        self.statistics = TableStatistics()

					        self.optimizer = QueryOptimizer(self.tables, self.statistics)

					        self.executor = QueryExecutor(self.tables)

					        self.transaction_manager = TransactionManager()

					        self._query_cache = {}

					    def begin_transaction(self) -> str:

					        """Begin a new transaction"""

					        return self.transaction_manager.begin_transaction()

					    def commit_transaction(self, tx_id: str) -> bool:

					        """Commit a transaction"""

					        return self.transaction_manager.commit(tx_id)

					    def rollback_transaction(self, tx_id: str) -> bool:

					        """Rollback a transaction"""

					        return self.transaction_manager.rollback(tx_id)

					    def create_table(self, table: Table) -> None:

					        """Create a new table"""

					        if table.name in self.tables:

					            raise ValueError(f"Table {table.name} already exists")

					        self.tables[table.name] = table

					        self.statistics.collect_statistics(table)

					    def drop_table(self, table_name: str) -> None:

					        """Drop a table"""

					        if table_name not in self.tables:

					            raise ValueError(f"Table {table_name} does not exist")

					        del self.tables[table_name]

					    def execute(

					        self, sql: str, tx_id: Optional[str] = None

					    ) -> Optional[List[Dict[str, Any]]]:

					        """Execute a SQL query"""

					        try:

					            # Check query cache for non-transactional SELECT queries

					            if tx_id is None and sql in self._query_cache:

					                return self._query_cache[sql]

					            # Parse SQL

					            if sql.strip().upper().startswith("SELECT"):

					                statement = self.parser.parse_select(sql)

					            elif sql.strip().upper().startswith("INSERT"):

					                statement = self.parser.parse_insert(sql)

					            else:

					                raise ValueError("Unsupported SQL statement type")

					            # Get transaction if provided

					            tx = None

					            if tx_id:

					                tx = self.transaction_manager.get_transaction(tx_id)

					                if not tx:

					                    raise ValueError(f"Transaction {tx_id} does not exist")

					            # Optimize query plan

					            optimized_plan = self.optimizer.optimize(statement)

					            # Execute query

					            result = self.executor.execute(optimized_plan, transaction=tx)

					            # Cache SELECT results for non-transactional queries

					            if tx_id is None and isinstance(statement, SelectStatement):

					                self._query_cache[sql] = result

					            return result

					        except Exception as e:

					            # Clear cache on error

					            self._query_cache.clear()

					            raise e

					    def clear_cache(self) -> None:

					        """Clear the query cache"""

					        self._query_cache.clear()

									
										137

pyflaredb/indexing/btree.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,137 @@

					from typing import Any, Dict, List, Optional, Tuple

					from dataclasses import dataclass

					from collections import deque

					@dataclass

					class Node:

					    keys: List[Any]

					    values: List[List[int]]  # List of row IDs for each key (handling duplicates)

					    children: List['Node']

					    is_leaf: bool = True

					class BTreeIndex:

					    def __init__(self, order: int = 100):

					        self.root = Node([], [], [])

					        self.order = order  # Maximum number of children per node

					    def insert(self, key: Any, row_id: int) -> None:

					        """Insert a key-value pair into the B-tree"""

					        if len(self.root.keys) == (2 * self.order) - 1:

					            # Split root if full

					            new_root = Node([], [], [], False)

					            new_root.children.append(self.root)

					            self._split_child(new_root, 0)

					            self.root = new_root

					        self._insert_non_full(self.root, key, row_id)

					    def search(self, key: Any) -> List[int]:

					        """Search for a key and return all matching row IDs"""

					        return self._search_node(self.root, key)

					    def range_search(self, start_key: Any, end_key: Any) -> List[int]:

					        """Perform a range search and return all matching row IDs"""

					        result = []

					        self._range_search_node(self.root, start_key, end_key, result)

					        return result

					    def _split_child(self, parent: Node, child_index: int) -> None:

					        """Split a full child node"""

					        order = self.order

					        child = parent.children[child_index]

					        new_node = Node([], [], [], child.is_leaf)

					        # Move the median key to the parent

					        median = order - 1

					        parent.keys.insert(child_index, child.keys[median])

					        parent.values.insert(child_index, child.values[median])

					        parent.children.insert(child_index + 1, new_node)

					        # Move half of the keys to the new node

					        new_node.keys = child.keys[median + 1:]

					        new_node.values = child.values[median + 1:]

					        child.keys = child.keys[:median]

					        child.values = child.values[:median]

					        # Move children if not a leaf

					        if not child.is_leaf:

					            new_node.children = child.children[median + 1:]

					            child.children = child.children[:median + 1]

					    def _insert_non_full(self, node: Node, key: Any, row_id: int) -> None:

					        """Insert into a non-full node"""

					        i = len(node.keys) - 1

					        if node.is_leaf:

					            # Insert into leaf node

					            while i >= 0 and self._compare_keys(key, node.keys[i]) < 0:

					                i -= 1

					            i += 1

					            # Handle duplicate keys

					            if i > 0 and self._compare_keys(key, node.keys[i-1]) == 0:

					                node.values[i-1].append(row_id)

					            else:

					                node.keys.insert(i, key)

					                node.values.insert(i, [row_id])

					        else:

					            # Find the child to insert into

					            while i >= 0 and self._compare_keys(key, node.keys[i]) < 0:

					                i -= 1

					            i += 1

					            if len(node.children[i].keys) == (2 * self.order) - 1:

					                self._split_child(node, i)

					                if self._compare_keys(key, node.keys[i]) > 0:

					                    i += 1

					            self._insert_non_full(node.children[i], key, row_id)

					    def _search_node(self, node: Node, key: Any) -> List[int]:

					        """Search for a key in a node"""

					        i = 0

					        while i < len(node.keys) and self._compare_keys(key, node.keys[i]) > 0:

					            i += 1

					        if i < len(node.keys) and self._compare_keys(key, node.keys[i]) == 0:

					            return node.values[i]

					        elif node.is_leaf:

					            return []

					        else:

					            return self._search_node(node.children[i], key)

					    def _range_search_node(self, node: Node, start_key: Any, end_key: Any, result: List[int]) -> None:

					        """Perform range search on a node"""

					        i = 0

					        while i < len(node.keys) and self._compare_keys(start_key, node.keys[i]) > 0:

					            i += 1

					        if node.is_leaf:

					            while i < len(node.keys) and self._compare_keys(node.keys[i], end_key) <= 0:

					                result.extend(node.values[i])

					                i += 1

					        else:

					            if i < len(node.keys):

					                self._range_search_node(node.children[i], start_key, end_key, result)

					            while i < len(node.keys) and self._compare_keys(node.keys[i], end_key) <= 0:

					                result.extend(node.values[i])

					                i += 1

					                if i < len(node.children):

					                    self._range_search_node(node.children[i], start_key, end_key, result)

					    @staticmethod

					    def _compare_keys(key1: Any, key2: Any) -> int:

					        """Compare two keys, handling different types"""

					        if key1 is None or key2 is None:

					            if key1 is None and key2 is None:

					                return 0

					            return -1 if key1 is None else 1

					        try:

					            if key1 < key2:

					                return -1

					            elif key1 > key2:

					                return 1

					            return 0

					        except TypeError:

					            # Handle incomparable types

					            return 0

									
										33

pyflaredb/monitoring/metrics.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,33 @@

					from typing import Dict, List

					import time

					from collections import deque

					import threading

					class PerformanceMetrics:

					    def __init__(self, window_size: int = 1000):

					        self.window_size = window_size

					        self.query_times: Dict[str, deque] = {}

					        self.lock = threading.Lock()

					    def record_query(self, query_type: str, execution_time: float):

					        """Record query execution time"""

					        with self.lock:

					            if query_type not in self.query_times:

					                self.query_times[query_type] = deque(maxlen=self.window_size)

					            self.query_times[query_type].append(execution_time)

					    def get_metrics(self) -> Dict[str, Dict[str, float]]:

					        """Get performance metrics"""

					        metrics = {}

					        with self.lock:

					            for query_type, times in self.query_times.items():

					                if not times:

					                    continue

					                metrics[query_type] = {

					                    "avg_time": sum(times) / len(times),

					                    "max_time": max(times),

					                    "min_time": min(times),

					                    "count": len(times),

					                }

					        return metrics

									
										245

pyflaredb/sql/executor.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,245 @@

					from typing import List, Dict, Any, Callable, Tuple, Optional

					import operator

					from ..table import Table

					from .parser import SelectStatement, InsertStatement

					from ..transaction import Transaction

					class QueryExecutor:

					    def __init__(self, tables: Dict[str, Table]):

					        self.tables = tables

					        self._compiled_conditions = {}

					        self._comparison_ops = {

					            '>': operator.gt,

					            '<': operator.lt,

					            '>=': operator.ge,

					            '<=': operator.le,

					            '=': operator.eq,

					            '!=': operator.ne

					        }

					    def _parse_where_clause(self, where_clause: str) -> List[Tuple[str, str, str]]:

					        """Parse WHERE clause into list of (field, operator, value) tuples"""

					        conditions = []

					        # Split on AND if present

					        subclauses = [c.strip() for c in where_clause.split(' AND ')]

					        for subclause in subclauses:

					            # Find the operator

					            operator_found = None

					            for op in ['>=', '<=', '>', '<', '=', '!=']:

					                if op in subclause:

					                    operator_found = op

					                    field, value = subclause.split(op)

					                    conditions.append((field.strip(), op, value.strip()))

					                    break

					            if not operator_found:

					                raise ValueError(f"Invalid condition: {subclause}")

					        return conditions

					    def execute(self, statement, transaction: Optional[Transaction] = None):

					        """Execute a parsed SQL statement"""

					        if isinstance(statement, SelectStatement):

					            return self._execute_select(statement, transaction)

					        elif isinstance(statement, InsertStatement):

					            return self._execute_insert(statement, transaction)

					        elif statement is None:

					            raise ValueError("No statement to execute")

					        else:

					            raise ValueError(f"Unsupported statement type: {type(statement)}")

					    def _execute_select(self, stmt: SelectStatement, transaction: Optional[Transaction] = None) -> List[Dict[str, Any]]:

					        if stmt.table_name not in self.tables:

					            raise ValueError(f"Table {stmt.table_name} does not exist")

					        table = self.tables[stmt.table_name]

					        # If in transaction, check for locks

					        if transaction and table.name in transaction.locks:

					            # Handle transaction isolation level logic here

					            pass

					        # Handle COUNT(*) separately

					        if len(stmt.columns) == 1 and stmt.columns[0].lower() == "count(*)":

					            return [{"count": len(table.data)}]

					        # Try to use index for WHERE clause

					        if stmt.where_clause:

					            try:

					                conditions = self._parse_where_clause(stmt.where_clause)

					                # Check if we can use an index for any condition

					                for field, op, value in conditions:

					                    if field in table._indexes:

					                        # Convert value to proper type

					                        column = next((col for col in table.columns if col.name == field), None)

					                        if column:

					                            try:

					                                if column.data_type == "integer":

					                                    value = int(value)

					                                elif column.data_type == "float":

					                                    value = float(value)

					                            except (ValueError, TypeError):

					                                continue

					                        # Use index for lookup

					                        if op == '=':

					                            results = table.find_by_index(field, value)

					                        elif op in {'>', '>='}:

					                            results = table.range_search(field, value, None)

					                        elif op in {'<', '<='}:

					                            results = table.range_search(field, None, value)

					                        else:  # op == '!='

					                            # For inequality, we still need to scan

					                            results = table.data

					                        # Apply remaining conditions

					                        filtered_results = []

					                        for row in results:

					                            if self._matches_all_conditions(row, conditions):

					                                filtered_results.append(row)

					                        return self._process_results(filtered_results, stmt)

					            except ValueError:

					                # If WHERE clause parsing fails, fall back to table scan

					                pass

					        # Fall back to full table scan

					        return self._table_scan(table, stmt)

					    def _matches_all_conditions(self, row: Dict[str, Any], conditions: List[Tuple[str, str, str]]) -> bool:

					        """Check if row matches all conditions"""

					        for field, op, value in conditions:

					            row_value = row.get(field)

					            if row_value is None:

					                return False

					            # Convert value to proper type based on row_value

					            try:

					                if isinstance(row_value, int):

					                    value = int(value)

					                elif isinstance(row_value, float):

					                    value = float(value)

					            except (ValueError, TypeError):

					                return False

					            # Apply comparison

					            op_func = self._comparison_ops[op]

					            try:

					                if not op_func(row_value, value):

					                    return False

					            except TypeError:

					                return False

					        return True

					    def _table_scan(self, table: Table, stmt: SelectStatement) -> List[Dict[str, Any]]:

					        """Perform a full table scan with filtering"""

					        results = []

					        # Parse WHERE conditions if present

					        conditions = []

					        if stmt.where_clause:

					            try:

					                conditions = self._parse_where_clause(stmt.where_clause)

					            except ValueError:

					                # If parsing fails, return empty result

					                return []

					        # Process rows

					        for row in table.data:

					            # Apply WHERE clause

					            if conditions and not self._matches_all_conditions(row, conditions):

					                continue

					            # Select requested columns

					            if "*" in stmt.columns:

					                results.append(row.copy())

					            else:

					                filtered_row = {}

					                for col in stmt.columns:

					                    if "count(" in col.lower():

					                        filtered_row[col] = len(results)

					                    else:

					                        filtered_row[col] = row.get(col)

					                results.append(filtered_row)

					        return self._process_results(results, stmt)

					    def _process_results(self, rows: List[Dict[str, Any]], stmt: SelectStatement) -> List[Dict[str, Any]]:

					        """Process result rows according to SELECT statement"""

					        results = []

					        for row in rows:

					            if "*" in stmt.columns:

					                results.append(row.copy())

					            else:

					                filtered_row = {}

					                for col in stmt.columns:

					                    if "count(" in col.lower():

					                        filtered_row[col] = len(results)

					                    else:

					                        filtered_row[col] = row.get(col)

					                results.append(filtered_row)

					        # Handle ORDER BY

					        if stmt.order_by:

					            for order_clause in stmt.order_by:

					                reverse = order_clause.direction.value == "DESC"

					                results.sort(

					                    key=lambda x: (x.get(order_clause.column) is None, x.get(order_clause.column)),

					                    reverse=reverse

					                )

					        # Handle LIMIT

					        if stmt.limit is not None:

					            results = results[:stmt.limit]

					        return results

					    def _execute_insert(self, stmt: InsertStatement, transaction: Optional[Transaction] = None) -> bool:

					        if stmt.table_name not in self.tables:

					            raise ValueError(f"Table {stmt.table_name} does not exist")

					        table = self.tables[stmt.table_name]

					        # If in transaction, acquire lock and track changes

					        if transaction:

					            transaction.locks.add(table.name)

					            # Track the changes for potential rollback

					            transaction.changes.append({

					                'type': 'INSERT',

					                'table': table.name,

					                'data': dict(zip(stmt.columns, stmt.values))

					            })

					        # Create dictionary of column-value pairs

					        row_data = {}

					        for col_name, value in zip(stmt.columns, stmt.values):

					            # Find the column definition

					            column = next((col for col in table.columns if col.name == col_name), None)

					            if not column:

					                raise ValueError(f"Column {col_name} does not exist")

					            # Convert value based on column type

					            if value is not None:

					                try:

					                    if column.data_type == "integer":

					                        row_data[col_name] = int(value)

					                    elif column.data_type == "float":

					                        row_data[col_name] = float(value)

					                    elif column.data_type == "boolean":

					                        if isinstance(value, str):

					                            row_data[col_name] = value.lower() == 'true'

					                        else:

					                            row_data[col_name] = bool(value)

					                    else:  # string type

					                        row_data[col_name] = str(value)

					                except (ValueError, TypeError):

					                    raise ValueError(f"Invalid value for column {column.name}: {value}")

					            else:

					                row_data[col_name] = None

					        # Insert the data

					        return table.insert(row_data)

									
										51

pyflaredb/sql/optimizer.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,51 @@

					from typing import List, Dict, Any, Union

					from dataclasses import dataclass

					from enum import Enum

					from .parser import SelectStatement, InsertStatement

					from pyflaredb.sql.statistics import TableStatistics

					from pyflaredb.table import Table

					class JoinStrategy(Enum):

					    NESTED_LOOP = "nested_loop"

					    HASH_JOIN = "hash_join"

					    MERGE_JOIN = "merge_join"

					class ScanType(Enum):

					    SEQUENTIAL = "sequential"

					    INDEX = "index"

					@dataclass

					class QueryPlan:

					    operation: str

					    strategy: Union[JoinStrategy, ScanType]

					    estimated_cost: float

					    children: List["QueryPlan"] = None

					class QueryOptimizer:

					    def __init__(self, tables: Dict[str, "Table"], statistics: "TableStatistics"):

					        self.tables = tables

					        self.statistics = statistics

					    def optimize(self, statement) -> Any:

					        """Generate an optimized query plan"""

					        if isinstance(statement, SelectStatement):

					            return self._optimize_select(statement)

					        elif isinstance(statement, InsertStatement):

					            return statement  # No optimization needed for simple inserts

					        return statement  # Return original statement if no optimization is needed

					    def _optimize_select(self, stmt: SelectStatement) -> SelectStatement:

					        """Optimize SELECT query execution"""

					        # For now, return the original statement

					        # TODO: Implement actual optimization strategies

					        return stmt

					    def _estimate_cost(self, plan: QueryPlan) -> float:

					        """Estimate the cost of a query plan"""

					        # Implementation for cost estimation

					        pass

									
										176

pyflaredb/sql/parser.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,176 @@

					from dataclasses import dataclass

					from typing import List, Optional, Any

					from enum import Enum

					class OrderDirection(Enum):

					    ASC = "ASC"

					    DESC = "DESC"

					@dataclass

					class OrderByClause:

					    column: str

					    direction: OrderDirection = OrderDirection.ASC

					@dataclass

					class SelectStatement:

					    table_name: str

					    columns: List[str]

					    where_clause: Optional[str] = None

					    group_by: Optional[List[str]] = None

					    order_by: Optional[List[OrderByClause]] = None

					    limit: Optional[int] = None

					@dataclass

					class InsertStatement:

					    table_name: str

					    columns: List[str]

					    values: List[Any]

					class SQLParser:

					    @staticmethod

					    def parse_insert(sql: str) -> InsertStatement:

					        """Parse INSERT statement"""

					        # Remove newlines and extra spaces

					        sql = ' '.join(sql.split())

					        # Extract table name

					        table_start = sql.find("INTO") + 4

					        table_end = sql.find("(", table_start)

					        table_name = sql[table_start:table_end].strip()

					        # Extract columns

					        cols_start = sql.find("(", table_end) + 1

					        cols_end = sql.find(")", cols_start)

					        columns = [col.strip() for col in sql[cols_start:cols_end].split(",")]

					        # Extract values

					        values_start = sql.find("VALUES", cols_end) + 6

					        values_start = sql.find("(", values_start) + 1

					        values_end = sql.find(")", values_start)

					        values_str = sql[values_start:values_end]

					        # Parse values while respecting quotes

					        values = []

					        current_value = ""

					        in_quotes = False

					        quote_char = None

					        for char in values_str:

					            if char in ["'", '"']:

					                if not in_quotes:

					                    in_quotes = True

					                    quote_char = char

					                elif quote_char == char:

					                    in_quotes = False

					                    quote_char = None

					                current_value += char

					            elif char == ',' and not in_quotes:

					                values.append(current_value.strip())

					                current_value = ""

					            else:

					                current_value += char

					        if current_value:

					            values.append(current_value.strip())

					        # Clean up values

					        cleaned_values = []

					        for value in values:

					            value = value.strip()

					            if value.startswith(("'", '"')) and value.endswith(("'", '"')):

					                # String value - keep quotes

					                cleaned_values.append(value)

					            elif value.lower() == 'true':

					                cleaned_values.append(True)

					            elif value.lower() == 'false':

					                cleaned_values.append(False)

					            elif value.lower() == 'null':

					                cleaned_values.append(None)

					            else:

					                try:

					                    # Try to convert to number if possible

					                    if '.' in value:

					                        cleaned_values.append(float(value))

					                    else:

					                        cleaned_values.append(int(value))

					                except ValueError:

					                    # If not a number, keep as is

					                    cleaned_values.append(value)

					        if len(columns) != len(cleaned_values):

					            raise ValueError(f"Column count ({len(columns)}) doesn't match value count ({len(cleaned_values)})")

					        return InsertStatement(table_name=table_name, columns=columns, values=cleaned_values)

					    @staticmethod

					    def parse_select(sql: str) -> SelectStatement:

					        """Parse SELECT statement"""

					        # Remove newlines and extra spaces

					        sql = ' '.join(sql.split())

					        # Extract table name

					        from_idx = sql.upper().find("FROM")

					        if from_idx == -1:

					            raise ValueError("Invalid SELECT statement: missing FROM clause")

					        # Extract columns

					        columns_str = sql[6:from_idx].strip()

					        columns = [col.strip() for col in columns_str.split(",")]

					        # Find all clause positions

					        where_idx = sql.upper().find("WHERE")

					        group_idx = sql.upper().find("GROUP BY")

					        order_idx = sql.upper().find("ORDER BY")

					        limit_idx = sql.upper().find("LIMIT")

					        # Find table name end position

					        table_end = min(x for x in [where_idx, group_idx, order_idx, limit_idx] if x != -1) if any(x != -1 for x in [where_idx, group_idx, order_idx, limit_idx]) else len(sql)

					        table_name = sql[from_idx + 4:table_end].strip()

					        # Parse WHERE clause

					        where_clause = None

					        if where_idx != -1:

					            where_end = min(x for x in [group_idx, order_idx, limit_idx] if x != -1) if any(x != -1 for x in [group_idx, order_idx, limit_idx]) else len(sql)

					            where_clause = sql[where_idx + 5:where_end].strip()

					        # Parse GROUP BY clause

					        group_by = None

					        if group_idx != -1:

					            group_end = min(x for x in [order_idx, limit_idx] if x != -1) if any(x != -1 for x in [order_idx, limit_idx]) else len(sql)

					            group_by_str = sql[group_idx + 8:group_end].strip()

					            group_by = [col.strip() for col in group_by_str.split(",")]

					        # Parse ORDER BY clause

					        order_by = None

					        if order_idx != -1:

					            order_end = limit_idx if limit_idx != -1 else len(sql)

					            order_str = sql[order_idx + 8:order_end].strip()

					            order_parts = order_str.split(",")

					            order_by = []

					            for part in order_parts:

					                part = part.strip()

					                if " DESC" in part.upper():

					                    column = part[:part.upper().find(" DESC")].strip()

					                    direction = OrderDirection.DESC

					                else:

					                    column = part.replace(" ASC", "").strip()

					                    direction = OrderDirection.ASC

					                order_by.append(OrderByClause(column=column, direction=direction))

					        # Parse LIMIT clause

					        limit = None

					        if limit_idx != -1:

					            limit_str = sql[limit_idx + 5:].strip()

					            try:

					                limit = int(limit_str)

					            except ValueError:

					                raise ValueError(f"Invalid LIMIT value: {limit_str}")

					        return SelectStatement(

					            table_name=table_name,

					            columns=columns,

					            where_clause=where_clause,

					            group_by=group_by,

					            order_by=order_by,

					            limit=limit

					        )

									
										38

pyflaredb/sql/statistics.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,38 @@

					from typing import Dict, Any

					import numpy as np

					from pyflaredb.table import Table

					class TableStatistics:

					    def __init__(self):

					        self.table_sizes: Dict[str, int] = {}

					        self.column_stats: Dict[str, Dict[str, Any]] = {}

					    def collect_statistics(self, table: "Table"):

					        """Collect statistics for a table"""

					        self.table_sizes[table.name] = len(table.data)

					        for column in table.columns:

					            values = [row[column.name] for row in table.data if column.name in row]

					            if not values:

					                continue

					            stats = {

					                "distinct_count": len(set(values)),

					                "null_count": sum(1 for v in values if v is None),

					                "min": min(values) if values and None not in values else None,

					                "max": max(values) if values and None not in values else None,

					            }

					            if isinstance(values[0], (int, float)):

					                stats.update(

					                    {

					                        "mean": np.mean(values),

					                        "std_dev": np.std(values),

					                        "histogram": np.histogram(values, bins=100),

					                    }

					                )

					            self.column_stats[f"{table.name}.{column.name}"] = stats

									
										191

pyflaredb/table.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,191 @@

					from typing import Dict, List, Any, Optional

					from dataclasses import dataclass

					from datetime import datetime

					from collections import defaultdict

					from .indexing.btree import BTreeIndex

					@dataclass

					class Column:

					    name: str

					    data_type: str

					    nullable: bool = True

					    unique: bool = False

					    primary_key: bool = False

					    default: Any = None

					class Table:

					    def __init__(self, name: str, columns: List[Column]):

					        self.name = name

					        self.columns = columns

					        self.data: List[Dict[str, Any]] = []

					        self._unique_indexes: Dict[str, Dict[Any, int]] = defaultdict(dict)

					        self._compiled_conditions = {}

					        self._indexes: Dict[str, BTreeIndex] = {}

					        # Validate column definitions

					        self._validate_columns()

					    def _validate_columns(self):

					        """Validate column definitions"""

					        # Ensure only one primary key

					        primary_keys = [col for col in self.columns if col.primary_key]

					        if len(primary_keys) > 1:

					            raise ValueError("Table can only have one primary key")

					        # Validate data types

					        valid_types = {"string", "integer", "float", "boolean", "datetime"}

					        for col in self.columns:

					            if col.data_type.lower() not in valid_types:

					                raise ValueError(f"Invalid data type for column {col.name}: {col.data_type}")

					    def create_index(self, column_name: str) -> None:

					        """Create a B-tree index for a column"""

					        if column_name not in {col.name for col in self.columns}:

					            raise ValueError(f"Column {column_name} does not exist")

					        # Create new index

					        index = BTreeIndex()

					        # Build index from existing data

					        for row_id, row in enumerate(self.data):

					            if column_name in row:

					                index.insert(row[column_name], row_id)

					        self._indexes[column_name] = index

					    def batch_insert(self, rows: List[Dict[str, Any]]) -> bool:

					        """Efficiently insert multiple rows with index updates"""

					        # Pre-validate all rows

					        validated_rows = []

					        unique_values = defaultdict(set)

					        # Check unique constraints across all new rows

					        for row in rows:

					            converted_row = {}

					            # Validate required columns and defaults

					            for column in self.columns:

					                if not column.nullable and column.name not in row and column.default is None:

					                    raise ValueError(f"Required column {column.name} is missing")

					                value = row.get(column.name, column.default)

					                # Type conversion

					                if value is not None:

					                    try:

					                        if column.data_type == "integer":

					                            value = int(value)

					                        elif column.data_type == "float":

					                            value = float(value)

					                        elif column.data_type == "boolean":

					                            value = bool(value)

					                        else:  # string and datetime

					                            value = str(value)

					                    except (ValueError, TypeError):

					                        raise ValueError(f"Invalid value for column {column.name}: {value}")

					                converted_row[column.name] = value

					                # Track unique values

					                if column.unique and value is not None:

					                    if value in unique_values[column.name] or value in self._unique_indexes[column.name]:

					                        raise ValueError(f"Unique constraint violated for column {column.name}")

					                    unique_values[column.name].add(value)

					            validated_rows.append(converted_row)

					        # All rows validated, perform batch insert

					        start_id = len(self.data)

					        for i, row in enumerate(validated_rows):

					            row_id = start_id + i

					            # Update indexes

					            for column_name, index in self._indexes.items():

					                if column_name in row:

					                    index.insert(row[column_name], row_id)

					            # Update unique indexes

					            for column in self.columns:

					                if column.unique:

					                    value = row.get(column.name)

					                    if value is not None:

					                        self._unique_indexes[column.name][value] = row_id

					            self.data.append(row)

					        return True

					    def insert(self, row: Dict[str, Any]) -> bool:

					        """Insert a single row (now uses batch_insert)"""

					        return self.batch_insert([row])

					    def to_dict(self) -> dict:

					        """Convert table to dictionary for serialization"""

					        return {

					            "name": self.name,

					            "columns": [

					                {

					                    "name": col.name,

					                    "data_type": col.data_type,

					                    "nullable": col.nullable,

					                    "unique": col.unique,

					                    "primary_key": col.primary_key

					                }

					                for col in self.columns

					            ],

					            "data": self.data

					        }

					    @classmethod

					    def from_dict(cls, data: dict) -> 'Table':

					        """Create table from dictionary"""

					        columns = [

					            Column(**col_data)

					            for col_data in data["columns"]

					        ]

					        table = cls(data["name"], columns)

					        table.data = data["data"]

					        return table

					    def _validate_type(self, value: Any, expected_type: str) -> bool:

					        """Validate that a value matches the expected data type"""

					        type_mapping = {

					            "string": str,

					            "integer": int,

					            "float": float,

					            "boolean": bool,

					            "datetime": datetime,

					        }

					        if expected_type not in type_mapping:

					            raise ValueError(f"Unsupported data type: {expected_type}")

					        expected_python_type = type_mapping[expected_type]

					        if not isinstance(value, expected_python_type):

					            try:

					                # Attempt to convert the value

					                expected_python_type(value)

					            except (ValueError, TypeError):

					                raise ValueError(

					                    f"Value {value} is not of expected type {expected_type}"

					                )

					        return True

					    def find_by_index(self, column_name: str, value: Any) -> List[Dict[str, Any]]:

					        """Find rows using an index"""

					        if column_name not in self._indexes:

					            raise ValueError(f"No index exists for column {column_name}")

					        index = self._indexes[column_name]

					        row_ids = index.search(value)

					        return [self.data[row_id] for row_id in row_ids]

					    def range_search(self, column_name: str, start_value: Any, end_value: Any) -> List[Dict[str, Any]]:

					        """Perform a range search using an index"""

					        if column_name not in self._indexes:

					            raise ValueError(f"No index exists for column {column_name}")

					        index = self._indexes[column_name]

					        row_ids = index.range_search(start_value, end_value)

					        return [self.data[row_id] for row_id in row_ids]

									
										81

pyflaredb/transaction.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,81 @@

					from typing import Dict, Any, Optional, Set, List

					from enum import Enum

					import time

					import uuid

					import threading

					class TransactionState(Enum):

					    ACTIVE = "active"

					    COMMITTED = "committed"

					    ROLLED_BACK = "rolled_back"

					class Transaction:

					    def __init__(self, tx_id: str):

					        self.id = tx_id

					        self.state = TransactionState.ACTIVE

					        self.start_time = time.time()

					        self.locks: Set[str] = set()  # Set of table names that are locked

					        self.changes: List[Dict[str, Any]] = (

					            []

					        )  # List of changes made during transaction

					class TransactionManager:

					    def __init__(self):

					        self.transactions: Dict[str, Transaction] = {}

					        self.lock = threading.Lock()

					    def begin_transaction(self) -> str:

					        """Start a new transaction"""

					        with self.lock:

					            tx_id = str(uuid.uuid4())

					            self.transactions[tx_id] = Transaction(tx_id)

					            return tx_id

					    def commit(self, tx_id: str) -> bool:

					        """Commit a transaction"""

					        with self.lock:

					            if tx_id not in self.transactions:

					                raise ValueError(f"Transaction {tx_id} not found")

					            tx = self.transactions[tx_id]

					            if tx.state != TransactionState.ACTIVE:

					                raise ValueError(f"Transaction {tx_id} is not active")

					            # Apply changes

					            tx.state = TransactionState.COMMITTED

					            # Release locks

					            tx.locks.clear()

					            return True

					    def rollback(self, tx_id: str) -> bool:

					        """Rollback a transaction"""

					        with self.lock:

					            if tx_id not in self.transactions:

					                raise ValueError(f"Transaction {tx_id} not found")

					            tx = self.transactions[tx_id]

					            if tx.state != TransactionState.ACTIVE:

					                raise ValueError(f"Transaction {tx_id} is not active")

					            # Revert changes

					            tx.state = TransactionState.ROLLED_BACK

					            tx.changes.clear()

					            # Release locks

					            tx.locks.clear()

					            return True

					    def get_transaction(self, tx_id: str) -> Optional[Transaction]:

					        """Get transaction by ID"""

					        return self.transactions.get(tx_id)

					    def is_active(self, tx_id: str) -> bool:

					        """Check if a transaction is active"""

					        tx = self.get_transaction(tx_id)

					        return tx is not None and tx.state == TransactionState.ACTIVE

									
										63

pyflaredb/transaction/manager.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,63 @@

					from typing import Dict, List, Any

					from enum import Enum

					import threading

					from datetime import datetime

					class TransactionState(Enum):

					    ACTIVE = "ACTIVE"

					    COMMITTED = "COMMITTED"

					    ROLLED_BACK = "ROLLED_BACK"

					class Transaction:

					    def __init__(self, id: str):

					        self.id = id

					        self.state = TransactionState.ACTIVE

					        self.changes: List[Dict[str, Any]] = []

					        self.locks = set()

					        self.timestamp = datetime.utcnow()

					class TransactionManager:

					    def __init__(self):

					        self.transactions: Dict[str, Transaction] = {}

					        self.lock = threading.Lock()

					    def begin_transaction(self) -> str:

					        """Start a new transaction"""

					        with self.lock:

					            tx_id = str(len(self.transactions) + 1)

					            self.transactions[tx_id] = Transaction(tx_id)

					            return tx_id

					    def commit(self, tx_id: str):

					        """Commit a transaction"""

					        with self.lock:

					            if tx_id not in self.transactions:

					                raise ValueError(f"Transaction {tx_id} not found")

					            tx = self.transactions[tx_id]

					            if tx.state != TransactionState.ACTIVE:

					                raise ValueError(f"Transaction {tx_id} is not active")

					            # Apply changes

					            self._apply_changes(tx)

					            tx.state = TransactionState.COMMITTED

					    def rollback(self, tx_id: str):

					        """Rollback a transaction"""

					        with self.lock:

					            if tx_id not in self.transactions:

					                raise ValueError(f"Transaction {tx_id} not found")

					            tx = self.transactions[tx_id]

					            if tx.state != TransactionState.ACTIVE:

					                raise ValueError(f"Transaction {tx_id} is not active")

					            # Discard changes

					            tx.changes.clear()

					            tx.state = TransactionState.ROLLED_BACK

					    def _apply_changes(self, transaction: Transaction):

					        """Apply transaction changes"""

					        for change in transaction.changes:

					            # Implementation of applying changes

					            pass

									
										50

pyflaredb/versioning.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,50 @@

					from dataclasses import dataclass

					from datetime import datetime

					from typing import Any, Dict, List, Optional

					@dataclass

					class Version:

					    timestamp: datetime

					    operation: str  # 'INSERT', 'UPDATE', 'DELETE'

					    table_name: str

					    row_id: str

					    data: Dict[str, Any]

					    previous_version: Optional[str] = None  # Hash of previous version

					class VersionStore:

					    def __init__(self):

					        self.versions: List[Version] = []

					        self.current_version: str = None  # Hash of current version

					    def add_version(self, version: Version):

					        """Add a new version to the store"""

					        version_hash = self._calculate_hash(version)

					        self.versions.append(version)

					        self.current_version = version_hash

					    def get_state_at(self, timestamp: datetime) -> Dict[str, List[Dict[str, Any]]]:

					        """Reconstruct database state at given timestamp"""

					        state = {}

					        relevant_versions = [v for v in self.versions if v.timestamp <= timestamp]

					        for version in relevant_versions:

					            if version.table_name not in state:

					                state[version.table_name] = []

					            if version.operation == "INSERT":

					                state[version.table_name].append(version.data)

					            elif version.operation == "DELETE":

					                state[version.table_name] = [

					                    row

					                    for row in state[version.table_name]

					                    if row["id"] != version.row_id

					                ]

					            elif version.operation == "UPDATE":

					                state[version.table_name] = [

					                    version.data if row["id"] == version.row_id else row

					                    for row in state[version.table_name]

					                ]

					        return state

									
										254

test.py
									
										Normal file
									
											View File
											
					@ -0,0 +1,254 @@

					from pyflaredb.core import PyFlareDB

					from pyflaredb.table import Column, Table

					from pyflaredb.benchmark.suite import BenchmarkSuite

					import time

					from datetime import datetime

					import random

					import string

					import json

					from typing import List, Dict, Any

					def generate_realistic_data(n: int) -> List[Dict[str, Any]]:

					    """Generate realistic test data"""

					    domains = ['gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com', 'company.com']

					    cities = ['New York', 'London', 'Tokyo', 'Paris', 'Berlin', 'Sydney', 'Toronto']

					    data = []

					    for i in range(n):

					        # Generate realistic username

					        username = f"{random.choice(string.ascii_lowercase)}{random.choice(string.ascii_lowercase)}"

					        username += ''.join(random.choices(string.ascii_lowercase + string.digits, k=random.randint(6, 12)))

					        # Generate realistic email

					        email = f"{username}@{random.choice(domains)}"

					        # Generate JSON metadata

					        metadata = {

					            "city": random.choice(cities),

					            "last_login": f"2024-{random.randint(1,12):02d}-{random.randint(1,28):02d}",

					            "preferences": {

					                "theme": random.choice(["light", "dark", "system"]),

					                "notifications": random.choice([True, False])

					            }

					        }

					        data.append({

					            "id": f"usr_{i:08d}",

					            "username": username,

					            "email": email,

					            "age": random.randint(18, 80),

					            "score": round(random.uniform(0, 100), 2),

					            "is_active": random.random() > 0.1,  # 90% active users

					            "login_count": random.randint(1, 1000),

					            "metadata": json.dumps(metadata)

					        })

					    return data

					def format_value(value):

					    """Format value based on its type"""

					    if isinstance(value, (float, int)):

					        return f"{value:.4f}"

					    return str(value)

					def test_database_features():

					    """Test all database features with realistic workloads"""

					    print("\n=== Starting Realistic Database Tests ===")

					    # Initialize database

					    db = PyFlareDB("test.db")

					    # 1. Create test table with realistic schema

					    print("\n1. Setting up test environment...")

					    users_table = Table(

					        name="users",

					        columns=[

					            Column("id", "string", nullable=False, primary_key=True),

					            Column("username", "string", nullable=False, unique=True),

					            Column("email", "string", nullable=False),

					            Column("age", "integer", nullable=True),

					            Column("score", "float", nullable=True),

					            Column("is_active", "boolean", nullable=True, default=True),

					            Column("login_count", "integer", nullable=True, default=0),

					            Column("metadata", "string", nullable=True)  # JSON string

					        ],

					    )

					    db.tables["users"] = users_table

					    # Create indexes for commonly queried fields

					    users_table.create_index("age")

					    users_table.create_index("score")

					    users_table.create_index("login_count")

					    # 2. Performance Tests with Realistic Data

					    print("\n2. Running performance tests...")

					    # Generate test data

					    test_data = generate_realistic_data(1000)  # 1000 realistic records

					    # Insert Performance (Single vs Batch)

					    print("\nInsert Performance:")

					    # Single Insert (OLTP-style)

					    start_time = time.time()

					    for record in test_data[:100]:  # Test with first 100 records

					        # Properly escape the metadata string

					        metadata_str = record['metadata'].replace("'", "''")

					        # Format each value according to its type

					        values = [

					            f"'{record['id']}'",                    # string

					            f"'{record['username']}'",              # string

					            f"'{record['email']}'",                 # string

					            str(record['age']),                     # integer

					            str(record['score']),                   # float

					            str(record['is_active']).lower(),       # boolean

					            str(record['login_count']),             # integer

					            f"'{metadata_str}'"                     # string (JSON)

					        ]

					        query = f"""

					        INSERT INTO users 

					            (id, username, email, age, score, is_active, login_count, metadata)

					        VALUES 

					            ({', '.join(values)})

					        """

					        db.execute(query)

					    single_insert_time = time.time() - start_time

					    print(f"Single Insert (100 records, OLTP): {single_insert_time:.4f}s")

					    # Batch Insert (OLAP-style)

					    start_time = time.time()

					    batch_data = test_data[100:200]  # Next 100 records

					    users_table.batch_insert(batch_data)  # This should work as is

					    batch_insert_time = time.time() - start_time

					    print(f"Batch Insert (100 records, OLAP): {batch_insert_time:.4f}s")

					    # 3. Query Performance Tests

					    print("\nQuery Performance (OLTP vs OLAP):")

					    # OLTP-style queries (point queries, simple filters)

					    oltp_queries = [

					        ("Single Record Lookup", "SELECT * FROM users WHERE id = 'usr_00000001'"),

					        ("Simple Range Query", "SELECT * FROM users WHERE age > 30 LIMIT 10"),

					        ("Active Users Count", "SELECT COUNT(*) FROM users WHERE is_active = true"),

					        ("Recent Logins", "SELECT * FROM users WHERE login_count > 500 ORDER BY login_count DESC LIMIT 5")

					    ]

					    # OLAP-style queries (aggregations, complex filters)

					    olap_queries = [

					        ("Age Distribution", """

					            SELECT 

					                CASE 

					                    WHEN age < 25 THEN 'Gen Z'

					                    WHEN age < 40 THEN 'Millennial'

					                    WHEN age < 55 THEN 'Gen X'

					                    ELSE 'Boomer'

					                END as generation,

					                COUNT(*) as count

					            FROM users 

					            GROUP BY generation

					        """),

					        ("User Engagement", """

					            SELECT 

					                username,

					                score,

					                login_count

					            FROM users 

					            WHERE score > 75 

					            AND login_count > 100

					            ORDER BY score DESC

					            LIMIT 10

					        """),

					        ("Complex Analytics", """

					            SELECT 

					                COUNT(*) as total_users,

					                AVG(score) as avg_score,

					                SUM(CASE WHEN is_active THEN 1 ELSE 0 END) as active_users

					            FROM users

					            WHERE age BETWEEN 25 AND 45

					        """)

					    ]

					    print("\nOLTP Query Performance:")

					    for query_name, query in oltp_queries:

					        # First run (cold)

					        start_time = time.time()

					        db.execute(query)

					        cold_time = time.time() - start_time

					        # Second run (warm/cached)

					        start_time = time.time()

					        db.execute(query)

					        warm_time = time.time() - start_time

					        print(f"\n{query_name}:")

					        print(f"  Cold run: {cold_time:.4f}s")

					        print(f"  Warm run: {warm_time:.4f}s")

					        print(f"  Cache improvement: {((cold_time - warm_time) / cold_time * 100):.1f}%")

					    print("\nOLAP Query Performance:")

					    for query_name, query in olap_queries:

					        start_time = time.time()

					        db.execute(query)

					        execution_time = time.time() - start_time

					        print(f"\n{query_name}: {execution_time:.4f}s")

					    # 4. Concurrent Operations Test

					    print("\nConcurrent Operations Simulation:")

					    start_time = time.time()

					    # Simulate mixed workload

					    for _ in range(100):

					        if random.random() < 0.8:  # 80% reads

					            query = random.choice(oltp_queries)[1]

					        else:  # 20% writes

					            record = generate_realistic_data(1)[0]

					            query = f"""

					            INSERT INTO users (id, username, email, age, score, is_active, login_count, metadata)

					            VALUES (

					                '{record['id']}',

					                '{record['username']}',

					                '{record['email']}',

					                {record['age']},

					                {record['score']},

					                {str(record['is_active']).lower()},

					                {record['login_count']},

					                '{record['metadata']}'

					            )

					            """

					        db.execute(query)

					    mixed_workload_time = time.time() - start_time

					    print(f"Mixed Workload (100 operations): {mixed_workload_time:.4f}s")

					    # 5. Memory Usage Test

					    print("\nMemory Usage:")

					    import sys

					    memory_size = sys.getsizeof(db.tables["users"].data) / 1024  # KB

					    records_count = len(db.tables["users"].data)

					    print(f"Memory per record: {(memory_size / records_count):.2f} KB")

					    # 6. Run standard benchmark suite

					    print("\n6. Running standard benchmark suite...")

					    benchmark = BenchmarkSuite(db)

					    results = benchmark.run_benchmark(num_records=10000)

					    print("\nBenchmark Results:")

					    for test_name, metrics in results.items():

					        print(f"\n{test_name.upper()}:")

					        for metric, value in metrics.items():

					            print(f"  {metric}: {format_value(value)}")

					def main():

					    try:

					        test_database_features()

					    except Exception as e:

					        print(f"Test failed: {e}")

					        raise e

					if __name__ == "__main__":

					    main()

First commit

2 .gitignore vendored Normal file Unescape Escape View File

5 pyflaredb/__init__.py Normal file Unescape Escape View File

125 pyflaredb/benchmark/suite.py Normal file Unescape Escape View File

39 pyflaredb/cache/query_cache.py vendored Normal file Unescape Escape View File

90 pyflaredb/core.py Normal file Unescape Escape View File

137 pyflaredb/indexing/btree.py Normal file Unescape Escape View File

33 pyflaredb/monitoring/metrics.py Normal file Unescape Escape View File

245 pyflaredb/sql/executor.py Normal file Unescape Escape View File

51 pyflaredb/sql/optimizer.py Normal file Unescape Escape View File

176 pyflaredb/sql/parser.py Normal file Unescape Escape View File

38 pyflaredb/sql/statistics.py Normal file Unescape Escape View File

191 pyflaredb/table.py Normal file Unescape Escape View File

81 pyflaredb/transaction.py Normal file Unescape Escape View File

63 pyflaredb/transaction/manager.py Normal file Unescape Escape View File

50 pyflaredb/versioning.py Normal file Unescape Escape View File

254 test.py Normal file Unescape Escape View File

2

.gitignore vendored Normal file

View File

5

pyflaredb/init.py Normal file

View File

125

pyflaredb/benchmark/suite.py Normal file

View File

39

pyflaredb/cache/query_cache.py vendored Normal file

View File

90

pyflaredb/core.py Normal file

View File

137

pyflaredb/indexing/btree.py Normal file

View File

33

pyflaredb/monitoring/metrics.py Normal file

View File

245

pyflaredb/sql/executor.py Normal file

View File

51

pyflaredb/sql/optimizer.py Normal file

View File

176

pyflaredb/sql/parser.py Normal file

View File

38

pyflaredb/sql/statistics.py Normal file

View File

191

pyflaredb/table.py Normal file

View File

81

pyflaredb/transaction.py Normal file

View File

63

pyflaredb/transaction/manager.py Normal file

View File

50

pyflaredb/versioning.py Normal file

View File

254

test.py Normal file

View File