Enabled timeframes, models, and caching

This commit is contained in:
tcsenpai 2024-10-11 12:23:14 +02:00
parent c382ed89c4
commit 59dd9fadfa
10 changed files with 217 additions and 40 deletions

9
.gitignore vendored
View File

@ -2,4 +2,11 @@
temp_repo
.venv
output
__pycache__
output_*
.vscode
commit_cache.json
src/devlog/__pycache__
src/devlog/__pycache__/*.pyc
__pycache__
*.pyc
timeframes_config.json

View File

@ -22,6 +22,10 @@ Devlog retrieves all the commit messages in a repository (local or remote), grou
- Customizable time periods for grouping commits
- Natural language generation for readable blog posts
- Output formats: Markdown and HTML
- Skip already processed commits if specified
- Customizable timeframes with cache
- Generate blog posts for each timeframe
- Able to use custom models
## Installation and usage
@ -49,7 +53,7 @@ python src/devlog/__init__.py
## Configuration
Create a `.env` file in the root directory by copying the `env.example` file and set the following environment variables:
1. Create a `.env` file in the root directory by copying the `env.example` file and set the following environment variables:
```bash
OLLAMA_URL=<your-local-ollam-url>
@ -57,6 +61,39 @@ GIT_REPO=<your-repo-path-or-url>
GIT_TOKEN=<your-git-token>
DEFAULT_BRANCH=<your-default-branch>
GROUP_COMMITS_DAYS=<number-of-days-to-group-commits>
SKIP_PROCESSED_COMMITS=<true-or-false>
OLLAMA_MODEL=<your-ollama-model>
```
2. Copy the `timeframes_config.json.example` file to `timeframes_config.json` and modify the timeframes as you wish.
### .env file guide
- `OLLAMA_URL`: The URL of your local ollama instance or the public instance I'm hosting.
- `GIT_REPO`: The path to your local repository or the URL of the remote repository.
- `GIT_TOKEN`: The token to access the remote repository if it's private.
- `DEFAULT_BRANCH`: The default branch of your repository.
- `GROUP_COMMITS_DAYS`: The number of days to group commits.
- `SKIP_PROCESSED_COMMITS`: Whether to skip already processed commits.
- `OLLAMA_MODEL`: The model to use for generating blog posts.
### timeframes_config.json guide
- `start_date`: The start date of the timeframe.
- `end_date`: The end date of the timeframe.
- `use_cache`: Whether to use the cache to speed up the process.
- `grouping_days`: The number of days to group commits.
Example:
```json
{
"start_date": "2024-09-01",
"end_date": "2024-09-30",
"use_cache": true,
"grouping_days": 7
}
```
## License

View File

@ -2,4 +2,5 @@ GIT_REPO=/your/repo/path/or/url
GIT_TOKEN=your-git-token
OLLAMA_URL=http:/localhost:11434
DEFAULT_BRANCH=main
GROUP_COMMITS_DAYS=30
OLLAMA_MODEL=llama3.1:8b
SKIP_PROCESSED_COMMITS=false

View File

@ -9,6 +9,7 @@ dependencies = [
"python-dotenv>=1.0.1",
"requests>=2.32.3",
"markdown>=3.7",
"python-dateutil>=2.9.0.post0",
]
[build-system]

View File

@ -2,3 +2,4 @@ python-dotenv
gitpython
markdown
requests
python-dateutil

View File

@ -1,5 +1,5 @@
import os
from typing import Dict, Any
from typing import Dict, Any, List
import markdown
def create_folder_structure(base_path: str) -> None:
@ -26,33 +26,72 @@ def write_text_file(content: str, file_path: str) -> None:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(text)
def write_weblog(weblog: Dict[str, Any], output_dir: str) -> None:
def generate_html_index(output_dir: str) -> None:
"""
Write the generated weblog as markdown and text files in the appropriate folder structure.
Generate and write an HTML index for the HTML files in the output directory.
"""
html_dir = os.path.join(output_dir, 'html')
html_files = [f for f in os.listdir(html_dir) if f.endswith('.html') and f != 'index.html']
index_content = "<html><head><title>Weblog Index</title></head><body>"
index_content += "<h1>Weblog Index</h1><ul>"
for html_file in sorted(html_files, reverse=True):
file_name = os.path.splitext(html_file)[0]
parts = file_name.split('_')
if len(parts) > 1:
date_range = parts[0]
title = ' '.join(parts[1:]).title()
else:
date_range = "Unknown Date"
title = file_name.title()
index_content += f'<li><a href="{html_file}">{date_range}: {title}</a></li>'
index_content += "</ul></body></html>"
index_path = os.path.join(html_dir, 'index.html')
with open(index_path, 'w', encoding='utf-8') as f:
f.write(index_content)
def write_weblog(weblogs: List[Dict[str, Any]], output_dir: str) -> None:
"""
Write the generated weblogs as markdown and text files in the appropriate folder structure.
"""
create_folder_structure(output_dir)
date_range = weblog.get('date_range', 'unknown_date')
title = weblog.get('title', 'Untitled')
content = weblog.get('content', '')
for weblog in weblogs:
date_range = weblog.get('date_range', 'unknown_date')
title = weblog.get('title', 'Untitled')
content = weblog.get('content', '')
# Create a filename-friendly version of the title
filename = f"{date_range}_{title.lower().replace(' ', '_')}"
# Write markdown file
md_path = os.path.join(output_dir, 'markdown', f"{filename}.md")
write_markdown_file(content, md_path)
# Write text file
txt_path = os.path.join(output_dir, 'html', f"{filename}.html")
write_text_file(content, txt_path)
# Create a filename-friendly version of the title
filename = f"{date_range}_{title.lower().replace(' ', '_')}"
# Write markdown file
md_path = os.path.join(output_dir, 'markdown', f"{filename}.md")
write_markdown_file(content, md_path)
# Write text file
txt_path = os.path.join(output_dir, 'html', f"{filename}.html")
write_text_file(content, txt_path)
# Generate HTML index after processing all weblogs
generate_html_index(output_dir)
if __name__ == "__main__":
# Test the module functionality
test_weblog = {
'date_range': '2023-06-01_to_2023-06-07',
'title': 'Weekly Development Update',
'content': '# Weekly Development Update\n\nThis week, we made significant progress on...'
}
write_weblog(test_weblog, 'output')
print("Test weblog written successfully.")
test_weblogs = [
{
'date_range': '2023-06-01_to_2023-06-07',
'title': 'Weekly Development Update',
'content': '# Weekly Development Update\n\nThis week, we made significant progress on...'
},
{
'date_range': '2023-06-08_to_2023-06-14',
'title': 'Sprint Review',
'content': '# Sprint Review\n\nDuring this sprint, we accomplished...'
}
]
write_weblog(test_weblogs, 'output')
print("Test weblogs written successfully.")

View File

@ -5,6 +5,9 @@ from git.exc import InvalidGitRepositoryError
import logging
from datetime import datetime, timedelta
from collections import defaultdict
import json
from dateutil.parser import parse
from dateutil.tz import tzutc
logger = logging.getLogger(__name__)
@ -15,6 +18,34 @@ class GitOperations:
self.temp_dir = None
self.commits = []
self.default_branch = os.getenv('DEFAULT_BRANCH', 'main')
self.cache_file = os.path.join(os.getcwd(), 'commit_cache.json')
self.commit_cache = self._load_cache()
self.skip_processed = os.getenv('SKIP_PROCESSED_COMMITS', 'false').lower() == 'true'
def _load_cache(self):
if os.path.exists(self.cache_file):
with open(self.cache_file, 'r') as f:
return json.load(f)
return {}
def _save_cache(self):
with open(self.cache_file, 'w') as f:
json.dump(self.commit_cache, f)
def elaborate_commit(self, commit):
if commit.hexsha in self.commit_cache:
return self.commit_cache[commit.hexsha]
elaborated_commit = {
'hexsha': commit.hexsha,
'author': str(commit.author),
'date': commit.committed_datetime.isoformat(),
'message': commit.message.strip()
}
self.commit_cache[commit.hexsha] = elaborated_commit
self._save_cache()
return elaborated_commit
def list_commits(self):
if self.repo_path_or_url.startswith('http://') or self.repo_path_or_url.startswith('https://'):
@ -34,7 +65,7 @@ class GitOperations:
logger.info(f"Found {len(commits)} commits")
for commit in commits:
self.commits.append(commit)
self.commits.append(self.elaborate_commit(commit))
print(f"Commit: {commit.hexsha}")
print(f"Author: {commit.author}")
print(f"Date: {commit.committed_datetime}")
@ -69,7 +100,7 @@ class GitOperations:
logger.info(f"Cleaning up temporary directory {self.temp_dir}")
shutil.rmtree(self.temp_dir)
def group_commits_by_days(self, days=None):
def group_commits_by_days(self, days=None, start_date=None, end_date=None):
if days is None:
days = int(os.getenv('GROUP_COMMITS_DAYS', 30))
@ -78,29 +109,56 @@ class GitOperations:
grouped_commits = defaultdict(list)
# Convert start_date and end_date to UTC aware datetimes
if start_date:
start_date = start_date.replace(tzinfo=tzutc())
if end_date:
end_date = end_date.replace(tzinfo=tzutc())
# Filter commits based on date range and skip processed commits if enabled
filtered_commits = []
for commit in self.commits:
commit_date = parse(commit['date']).replace(tzinfo=tzutc())
if (start_date is None or commit_date >= start_date) and \
(end_date is None or commit_date <= end_date):
if not self.skip_processed or commit['hexsha'] not in self.commit_cache:
filtered_commits.append(commit)
if self.skip_processed:
self.commit_cache[commit['hexsha']] = True
# Sort commits by date (oldest first)
sorted_commits = sorted(self.commits, key=lambda c: c.committed_datetime)
sorted_commits = sorted(filtered_commits, key=lambda c: c['date'])
if not sorted_commits:
return []
return {}
# Get the date of the oldest commit
current_date = sorted_commits[0].committed_datetime.date()
end_date = current_date + timedelta(days=days)
# Group commits by days
current_date = parse(sorted_commits[0]['date']).replace(tzinfo=tzutc()).date()
end_group_date = current_date + timedelta(days=days)
group = []
for commit in sorted_commits:
commit_date = commit.committed_datetime.date()
if commit_date <= end_date:
commit_date = parse(commit['date']).replace(tzinfo=tzutc()).date()
if commit_date <= end_group_date:
group.append(commit)
else:
grouped_commits[f"{current_date} to {end_date}"] = group
grouped_commits[f"{current_date} to {end_group_date}"] = group
current_date = commit_date
end_date = current_date + timedelta(days=days)
end_group_date = current_date + timedelta(days=days)
group = [commit]
# Add the last group
if group:
grouped_commits[f"{current_date} to {end_date}"] = group
grouped_commits[f"{current_date} to {end_group_date}"] = group
return dict(grouped_commits)
# Save the updated cache
if self.skip_processed:
self._save_cache()
return dict(grouped_commits)
def delete_cache(self):
if os.path.exists(self.cache_file):
os.remove(self.cache_file)
logger.info(f"Deleted cache file: {self.cache_file}")
self.commit_cache = {}

View File

@ -5,7 +5,7 @@ import logging
logger = logging.getLogger(__name__)
class Ollamator:
def __init__(self, ollama_url, model="llama3"):
def __init__(self, ollama_url, model="llama3.1:8b"):
self.ollama_url = ollama_url
self.model = model

View File

@ -0,0 +1,10 @@
{
"timeframes": [
{
"start_date": "2024-09-01",
"end_date": "2024-09-30",
"use_cache": true,
"grouping_days": 7
}
]
}

23
uv.lock generated
View File

@ -71,6 +71,7 @@ source = { editable = "." }
dependencies = [
{ name = "gitpython" },
{ name = "markdown" },
{ name = "python-dateutil" },
{ name = "python-dotenv" },
{ name = "requests" },
]
@ -79,6 +80,7 @@ dependencies = [
requires-dist = [
{ name = "gitpython", specifier = ">=3.1.43" },
{ name = "markdown", specifier = ">=3.7" },
{ name = "python-dateutil", specifier = ">=2.9.0.post0" },
{ name = "python-dotenv", specifier = ">=1.0.1" },
{ name = "requests", specifier = ">=2.32.3" },
]
@ -125,6 +127,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/3f/08/83871f3c50fc983b88547c196d11cf8c3340e37c32d2e9d6152abe2c61f7/Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803", size = 106349 },
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "six" },
]
sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
]
[[package]]
name = "python-dotenv"
version = "1.0.1"
@ -149,6 +163,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
]
[[package]]
name = "six"
version = "1.16.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/71/39/171f1c67cd00715f190ba0b100d606d440a28c93c7714febeca8b79af85e/six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", size = 34041 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254", size = 11053 },
]
[[package]]
name = "smmap"
version = "5.0.1"