Enabled timeframes, models, and caching

2025-07-29 05:51:36 +00:00 · 2024-10-11 12:23:14 +02:00 · 2024-10-11 12:23:14 +02:00 · 59dd9fadfa
commit 59dd9fadfa
parent c382ed89c4
10 changed files with 217 additions and 40 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,4 +2,11 @@
 temp_repo
 .venv
 output
-__pycache__
+output_*
+.vscode
+commit_cache.json
+src/devlog/__pycache__
+src/devlog/__pycache__/*.pyc
+__pycache__
+*.pyc
+timeframes_config.json
--- a/README.md
+++ b/README.md
@ -22,6 +22,10 @@ Devlog retrieves all the commit messages in a repository (local or remote), grou
 - Customizable time periods for grouping commits
 - Natural language generation for readable blog posts
 - Output formats: Markdown and HTML
+- Skip already processed commits if specified
+- Customizable timeframes with cache
+- Generate blog posts for each timeframe
+- Able to use custom models

 ## Installation and usage

@ -49,7 +53,7 @@ python src/devlog/__init__.py

 ## Configuration

-Create a `.env` file in the root directory by copying the `env.example` file and set the following environment variables:
+1. Create a `.env` file in the root directory by copying the `env.example` file and set the following environment variables:

 ```bash
 OLLAMA_URL=<your-local-ollam-url>
@ -57,6 +61,39 @@ GIT_REPO=<your-repo-path-or-url>
 GIT_TOKEN=<your-git-token>
 DEFAULT_BRANCH=<your-default-branch>
 GROUP_COMMITS_DAYS=<number-of-days-to-group-commits>
+SKIP_PROCESSED_COMMITS=<true-or-false>
+OLLAMA_MODEL=<your-ollama-model>
+```
+
+2. Copy the `timeframes_config.json.example` file to `timeframes_config.json` and modify the timeframes as you wish.
+
+### .env file guide
+
+- `OLLAMA_URL`: The URL of your local ollama instance or the public instance I'm hosting.
+- `GIT_REPO`: The path to your local repository or the URL of the remote repository.
+- `GIT_TOKEN`: The token to access the remote repository if it's private.
+- `DEFAULT_BRANCH`: The default branch of your repository.
+- `GROUP_COMMITS_DAYS`: The number of days to group commits.
+- `SKIP_PROCESSED_COMMITS`: Whether to skip already processed commits.
+- `OLLAMA_MODEL`: The model to use for generating blog posts.
+
+
+### timeframes_config.json guide
+
+- `start_date`: The start date of the timeframe.
+- `end_date`: The end date of the timeframe.
+- `use_cache`: Whether to use the cache to speed up the process.
+- `grouping_days`: The number of days to group commits.
+
+Example:
+
+```json
+      {
+        "start_date": "2024-09-01",
+        "end_date": "2024-09-30",
+        "use_cache": true,
+        "grouping_days": 7
+      }
 ```

 ## License
--- a/env.example
+++ b/env.example
@ -2,4 +2,5 @@ GIT_REPO=/your/repo/path/or/url
 GIT_TOKEN=your-git-token
 OLLAMA_URL=http:/localhost:11434
 DEFAULT_BRANCH=main
-GROUP_COMMITS_DAYS=30
+OLLAMA_MODEL=llama3.1:8b
+SKIP_PROCESSED_COMMITS=false
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,6 +9,7 @@ dependencies = [
    "python-dotenv>=1.0.1",
    "requests>=2.32.3",
    "markdown>=3.7",
+    "python-dateutil>=2.9.0.post0",
 ]

 [build-system]
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,4 @@ python-dotenv
 gitpython
 markdown
 requests
+python-dateutil
--- a/src/devlog/libs/ghostwriter.py
+++ b/src/devlog/libs/ghostwriter.py
@ -1,5 +1,5 @@
 import os
-from typing import Dict, Any
+from typing import Dict, Any, List
 import markdown

 def create_folder_structure(base_path: str) -> None:
@ -26,33 +26,72 @@ def write_text_file(content: str, file_path: str) -> None:
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(text)

-def write_weblog(weblog: Dict[str, Any], output_dir: str) -> None:
+def generate_html_index(output_dir: str) -> None:
    """
-    Write the generated weblog as markdown and text files in the appropriate folder structure.
+    Generate and write an HTML index for the HTML files in the output directory.
+    """
+    html_dir = os.path.join(output_dir, 'html')
+    html_files = [f for f in os.listdir(html_dir) if f.endswith('.html') and f != 'index.html']
+    
+    index_content = "<html><head><title>Weblog Index</title></head><body>"
+    index_content += "<h1>Weblog Index</h1><ul>"
+    
+    for html_file in sorted(html_files, reverse=True):
+        file_name = os.path.splitext(html_file)[0]
+        parts = file_name.split('_')
+        if len(parts) > 1:
+            date_range = parts[0]
+            title = ' '.join(parts[1:]).title()
+        else:
+            date_range = "Unknown Date"
+            title = file_name.title()
+        
+        index_content += f'<li><a href="{html_file}">{date_range}: {title}</a></li>'
+    
+    index_content += "</ul></body></html>"
+    
+    index_path = os.path.join(html_dir, 'index.html')
+    with open(index_path, 'w', encoding='utf-8') as f:
+        f.write(index_content)
+
+def write_weblog(weblogs: List[Dict[str, Any]], output_dir: str) -> None:
+    """
+    Write the generated weblogs as markdown and text files in the appropriate folder structure.
    """
    create_folder_structure(output_dir)
    
-    date_range = weblog.get('date_range', 'unknown_date')
-    title = weblog.get('title', 'Untitled')
-    content = weblog.get('content', '')
+    for weblog in weblogs:
+        date_range = weblog.get('date_range', 'unknown_date')
+        title = weblog.get('title', 'Untitled')
+        content = weblog.get('content', '')
+        
+        # Create a filename-friendly version of the title
+        filename = f"{date_range}_{title.lower().replace(' ', '_')}"
+        
+        # Write markdown file
+        md_path = os.path.join(output_dir, 'markdown', f"{filename}.md")
+        write_markdown_file(content, md_path)
+        
+        # Write text file
+        txt_path = os.path.join(output_dir, 'html', f"{filename}.html")
+        write_text_file(content, txt_path)
    
-    # Create a filename-friendly version of the title
-    filename = f"{date_range}_{title.lower().replace(' ', '_')}"
-    
-    # Write markdown file
-    md_path = os.path.join(output_dir, 'markdown', f"{filename}.md")
-    write_markdown_file(content, md_path)
-    
-    # Write text file
-    txt_path = os.path.join(output_dir, 'html', f"{filename}.html")
-    write_text_file(content, txt_path)
+    # Generate HTML index after processing all weblogs
+    generate_html_index(output_dir)

 if __name__ == "__main__":
    # Test the module functionality
-    test_weblog = {
-        'date_range': '2023-06-01_to_2023-06-07',
-        'title': 'Weekly Development Update',
-        'content': '# Weekly Development Update\n\nThis week, we made significant progress on...'
-    }
-    write_weblog(test_weblog, 'output')
-    print("Test weblog written successfully.")
+    test_weblogs = [
+        {
+            'date_range': '2023-06-01_to_2023-06-07',
+            'title': 'Weekly Development Update',
+            'content': '# Weekly Development Update\n\nThis week, we made significant progress on...'
+        },
+        {
+            'date_range': '2023-06-08_to_2023-06-14',
+            'title': 'Sprint Review',
+            'content': '# Sprint Review\n\nDuring this sprint, we accomplished...'
+        }
+    ]
+    write_weblog(test_weblogs, 'output')
+    print("Test weblogs written successfully.")
--- a/src/devlog/libs/gitoperations.py
+++ b/src/devlog/libs/gitoperations.py
@ -5,6 +5,9 @@ from git.exc import InvalidGitRepositoryError
 import logging
 from datetime import datetime, timedelta
 from collections import defaultdict
+import json
+from dateutil.parser import parse
+from dateutil.tz import tzutc

 logger = logging.getLogger(__name__)

@ -15,6 +18,34 @@ class GitOperations:
        self.temp_dir = None
        self.commits = []
        self.default_branch = os.getenv('DEFAULT_BRANCH', 'main')
+        self.cache_file = os.path.join(os.getcwd(), 'commit_cache.json')
+        self.commit_cache = self._load_cache()
+        self.skip_processed = os.getenv('SKIP_PROCESSED_COMMITS', 'false').lower() == 'true'
+
+    def _load_cache(self):
+        if os.path.exists(self.cache_file):
+            with open(self.cache_file, 'r') as f:
+                return json.load(f)
+        return {}
+
+    def _save_cache(self):
+        with open(self.cache_file, 'w') as f:
+            json.dump(self.commit_cache, f)
+
+    def elaborate_commit(self, commit):
+        if commit.hexsha in self.commit_cache:
+            return self.commit_cache[commit.hexsha]
+
+        elaborated_commit = {
+            'hexsha': commit.hexsha,
+            'author': str(commit.author),
+            'date': commit.committed_datetime.isoformat(),
+            'message': commit.message.strip()
+        }
+
+        self.commit_cache[commit.hexsha] = elaborated_commit
+        self._save_cache()
+        return elaborated_commit

    def list_commits(self):
        if self.repo_path_or_url.startswith('http://') or self.repo_path_or_url.startswith('https://'):
@ -34,7 +65,7 @@ class GitOperations:
            logger.info(f"Found {len(commits)} commits")
            
            for commit in commits:
-                self.commits.append(commit)
+                self.commits.append(self.elaborate_commit(commit))
                print(f"Commit: {commit.hexsha}")
                print(f"Author: {commit.author}")
                print(f"Date: {commit.committed_datetime}")
@ -69,7 +100,7 @@ class GitOperations:
            logger.info(f"Cleaning up temporary directory {self.temp_dir}")
            shutil.rmtree(self.temp_dir)

-    def group_commits_by_days(self, days=None):
+    def group_commits_by_days(self, days=None, start_date=None, end_date=None):
        if days is None:
            days = int(os.getenv('GROUP_COMMITS_DAYS', 30))

@ -78,29 +109,56 @@ class GitOperations:

        grouped_commits = defaultdict(list)
        
+        # Convert start_date and end_date to UTC aware datetimes
+        if start_date:
+            start_date = start_date.replace(tzinfo=tzutc())
+        if end_date:
+            end_date = end_date.replace(tzinfo=tzutc())
+
+        # Filter commits based on date range and skip processed commits if enabled
+        filtered_commits = []
+        for commit in self.commits:
+            commit_date = parse(commit['date']).replace(tzinfo=tzutc())
+            if (start_date is None or commit_date >= start_date) and \
+               (end_date is None or commit_date <= end_date):
+                if not self.skip_processed or commit['hexsha'] not in self.commit_cache:
+                    filtered_commits.append(commit)
+                    if self.skip_processed:
+                        self.commit_cache[commit['hexsha']] = True
+        
        # Sort commits by date (oldest first)
-        sorted_commits = sorted(self.commits, key=lambda c: c.committed_datetime)
+        sorted_commits = sorted(filtered_commits, key=lambda c: c['date'])
        
        if not sorted_commits:
-            return []
+            return {}

-        # Get the date of the oldest commit
-        current_date = sorted_commits[0].committed_datetime.date()
-        end_date = current_date + timedelta(days=days)
+        # Group commits by days
+        current_date = parse(sorted_commits[0]['date']).replace(tzinfo=tzutc()).date()
+        end_group_date = current_date + timedelta(days=days)
        group = []

        for commit in sorted_commits:
-            commit_date = commit.committed_datetime.date()
-            if commit_date <= end_date:
+            commit_date = parse(commit['date']).replace(tzinfo=tzutc()).date()
+            if commit_date <= end_group_date:
                group.append(commit)
            else:
-                grouped_commits[f"{current_date} to {end_date}"] = group
+                grouped_commits[f"{current_date} to {end_group_date}"] = group
                current_date = commit_date
-                end_date = current_date + timedelta(days=days)
+                end_group_date = current_date + timedelta(days=days)
                group = [commit]

        # Add the last group
        if group:
-            grouped_commits[f"{current_date} to {end_date}"] = group
+            grouped_commits[f"{current_date} to {end_group_date}"] = group

-        return dict(grouped_commits)
+        # Save the updated cache
+        if self.skip_processed:
+            self._save_cache()
+
+        return dict(grouped_commits)
+
+    def delete_cache(self):
+        if os.path.exists(self.cache_file):
+            os.remove(self.cache_file)
+            logger.info(f"Deleted cache file: {self.cache_file}")
+        self.commit_cache = {}
--- a/src/devlog/libs/ollamator.py
+++ b/src/devlog/libs/ollamator.py
@ -5,7 +5,7 @@ import logging
 logger = logging.getLogger(__name__)

 class Ollamator:
-    def __init__(self, ollama_url, model="llama3"):
+    def __init__(self, ollama_url, model="llama3.1:8b"):
        self.ollama_url = ollama_url
        self.model = model

--- a/timeframes_config.json.example
+++ b/timeframes_config.json.example
@ -0,0 +1,10 @@
+{
+    "timeframes": [
+      {
+        "start_date": "2024-09-01",
+        "end_date": "2024-09-30",
+        "use_cache": true,
+        "grouping_days": 7
+      }
+    ]
+  }
--- a/uv.lock
+++ b/uv.lock
@ -71,6 +71,7 @@ source = { editable = "." }
 dependencies = [
    { name = "gitpython" },
    { name = "markdown" },
+    { name = "python-dateutil" },
    { name = "python-dotenv" },
    { name = "requests" },
 ]
@ -79,6 +80,7 @@ dependencies = [
 requires-dist = [
    { name = "gitpython", specifier = ">=3.1.43" },
    { name = "markdown", specifier = ">=3.7" },
+    { name = "python-dateutil", specifier = ">=2.9.0.post0" },
    { name = "python-dotenv", specifier = ">=1.0.1" },
    { name = "requests", specifier = ">=2.32.3" },
 ]
@ -125,6 +127,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/3f/08/83871f3c50fc983b88547c196d11cf8c3340e37c32d2e9d6152abe2c61f7/Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803", size = 106349 },
 ]

+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
+]
+
 [[package]]
 name = "python-dotenv"
 version = "1.0.1"
@ -149,6 +163,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
 ]

+[[package]]
+name = "six"
+version = "1.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/71/39/171f1c67cd00715f190ba0b100d606d440a28c93c7714febeca8b79af85e/six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", size = 34041 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 },
+]
+
 [[package]]
 name = "smmap"
 version = "5.0.1"