Revert "ci: add changelog generation scripts"

This commit is contained in:
Alexander Minges 2025-07-17 15:06:28 +02:00
parent f7c1e519c1
commit c728c22a77
5 changed files with 16 additions and 598 deletions

View file

@ -45,7 +45,7 @@ Thank you for your interest in contributing to **doi2dataset**! We welcome contr
### Prerequisites
- Python 3.11+
- Python 3.7+
- pip
### Installation

View file

@ -20,14 +20,14 @@ classifiers = [
"Topic :: Scientific/Engineering :: Information Analysis",
"Topic :: Software Development :: Libraries :: Python Modules",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Operating System :: OS Independent",
]
keywords = ["doi", "dataverse", "metadata", "research", "datasets"]
requires-python = ">=3.11"
requires-python = ">=3.10"
dependencies = [
"dnspython>=2.7.0,<3.0.0",
"requests>=2.32.3,<2.33.0",
@ -112,7 +112,7 @@ exclude_lines = [
[tool.ruff]
line-length = 88
target-version = "py311"
target-version = "py310"
extend-exclude = [".venv", "build", "dist", "docs", ".pytest_cache", "htmlcov"]
[tool.ruff.lint]

View file

@ -1,279 +0,0 @@
#!/usr/bin/env python3
"""
Changelog generation script for doi2dataset.
This script generates changelog entries from conventional commits since the last release.
It can be used to assist with creating changelog entries for new releases.
"""
import argparse
import re
import shutil
import subprocess
from datetime import datetime
from typing import TypedDict
class CommitData(TypedDict):
"""Type definition for parsed commit data."""
type: str
scope: str | None
description: str
is_breaking: bool
raw: str
class ChangelogGenerator:
"""Generate changelog entries from conventional commits."""
def __init__(self):
self.commit_types = {
"feat": "Added",
"fix": "Fixed",
"docs": "Documentation",
"style": "Style",
"refactor": "Changed",
"perf": "Performance",
"test": "Testing",
"build": "Build",
"ci": "CI/CD",
"chore": "Maintenance",
"revert": "Reverted",
}
# Regex to parse conventional commit messages
self.commit_regex = re.compile(
r"^(?P<type>\w+)(?:\((?P<scope>[\w-]+)\))?: (?P<description>.+)$"
)
# Breaking changes pattern
self.breaking_regex = re.compile(r"(BREAKING CHANGE:|!:)")
def get_last_tag(self) -> str | None:
"""Get the most recent git tag."""
git_executable = shutil.which("git")
try:
result = subprocess.run(
[git_executable, "describe", "--tags", "--abbrev=0"],
capture_output=True,
text=True,
check=True,
)
return result.stdout.strip()
except subprocess.CalledProcessError:
return None
def get_commits_since_tag(self, tag: str | None = None) -> list[str]:
"""Get commit messages since the specified tag (or all if no tag)."""
if tag:
git_range = f"{tag}..HEAD"
else:
git_range = "HEAD"
git_executable = shutil.which("git")
try:
result = subprocess.run(
[git_executable, "log", git_range, "--pretty=format:%s", "--reverse"],
capture_output=True,
text=True,
check=True,
)
return [
line.strip()
for line in result.stdout.strip().split("\n")
if line.strip()
]
except subprocess.CalledProcessError:
return []
def parse_commit(self, commit_message: str) -> CommitData | None:
"""Parse a conventional commit message."""
match = self.commit_regex.match(commit_message)
if not match:
return None
commit_type = match.group("type")
scope = match.group("scope")
description = match.group("description")
# Check for breaking changes
is_breaking = bool(self.breaking_regex.search(commit_message))
return CommitData(
type=commit_type,
scope=scope,
description=description,
is_breaking=is_breaking,
raw=commit_message,
)
def categorize_commits(self, commits: list[str]) -> dict[str, list[CommitData]]:
"""Categorize commits by type."""
categorized: dict[str, list[CommitData]] = {}
breaking_changes: list[CommitData] = []
for commit_msg in commits:
parsed = self.parse_commit(commit_msg)
if not parsed:
continue
commit_type = parsed["type"]
# Handle breaking changes specially
if parsed["is_breaking"]:
breaking_changes.append(parsed)
continue
# Skip certain types that aren't user-relevant
if commit_type in ["test", "ci", "build", "chore"]:
continue
# Map commit type to changelog category
category = self.commit_types.get(commit_type, "Changed")
if category not in categorized:
categorized[category] = []
categorized[category].append(parsed)
# Add breaking changes as a separate category
if breaking_changes:
categorized["Breaking Changes"] = breaking_changes
return categorized
def format_changelog_entry(
self, version: str, categorized_commits: dict[str, list[CommitData]]
) -> str:
"""Format the changelog entry."""
today = datetime.now().strftime("%Y-%m-%d")
lines: list[str] = [f"## [{version}] - {today}", ""]
# Define order of categories
category_order: list[str] = [
"Breaking Changes",
"Added",
"Changed",
"Fixed",
"Performance",
"Documentation",
"Style",
"CI/CD",
"Maintenance",
]
for category in category_order:
if category not in categorized_commits:
continue
commits: list[CommitData] = categorized_commits[category]
if not commits:
continue
lines.append(f"### {category}")
lines.append("")
for commit in commits:
description: str = commit["description"]
scope: str | None = commit["scope"]
# Capitalize first letter of description
description = (
description[0].upper() + description[1:] if description else ""
)
# Format the entry
if scope:
entry = f"- **{scope}**: {description}"
else:
entry = f"- {description}"
lines.append(entry)
lines.append("")
return "\n".join(lines)
def generate_changelog(
self, version: str | None = None, since_tag: str | None = None
) -> str:
"""Generate changelog entry for commits since the last tag."""
if since_tag is None:
since_tag = self.get_last_tag()
commits = self.get_commits_since_tag(since_tag)
if not commits:
return "No commits found since last release."
categorized = self.categorize_commits(commits)
if not categorized:
return "No relevant commits found for changelog."
# Use provided version or prompt for it
if version is None:
if since_tag:
print(f"Last release was: {since_tag}")
else:
print("No previous releases found")
version = input("Enter version for this release (e.g., v2.0.4): ").strip()
return self.format_changelog_entry(version, categorized)
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Generate changelog entries from conventional commits"
)
parser.add_argument(
"--version", help="Version for the changelog entry (e.g., v2.0.4)"
)
parser.add_argument(
"--since", help="Generate changelog since this tag (default: last tag)"
)
parser.add_argument("--output", help="Output file (default: stdout)")
parser.add_argument(
"--preview", action="store_true", help="Preview commits that will be included"
)
args = parser.parse_args()
generator = ChangelogGenerator()
if args.preview:
# Show preview of commits
since_tag = args.since or generator.get_last_tag()
commits = generator.get_commits_since_tag(since_tag)
print(f"Commits since {since_tag or 'beginning'}:")
print("-" * 50)
for commit in commits:
parsed = generator.parse_commit(commit)
if parsed:
type_info = f"[{parsed['type']}]"
if parsed["scope"]:
type_info += f"({parsed['scope']})"
print(f"{type_info}: {parsed['description']}")
else:
print(f"[unparsed]: {commit}")
return
# Generate changelog
changelog = generator.generate_changelog(args.version, args.since)
if args.output:
with open(args.output, "w") as f:
f.write(changelog)
print(f"Changelog written to {args.output}")
else:
print(changelog)
if __name__ == "__main__":
main()

View file

@ -23,9 +23,7 @@ import sys
from pathlib import Path
def run_command(
cmd: list[str], check: bool = True
) -> subprocess.CompletedProcess[str] | subprocess.CalledProcessError:
def run_command(cmd, check=True):
"""Run a shell command and return the result."""
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=check)
@ -38,7 +36,7 @@ def run_command(
return e
def check_gitlint_installed() -> None:
def check_gitlint_installed():
"""Check if gitlint is installed."""
result = run_command(["which", "gitlint"], check=False)
if result.returncode != 0:
@ -48,14 +46,10 @@ def check_gitlint_installed() -> None:
sys.exit(1)
def lint_commit(
commit_hash: str | None = None,
commit_range: str | None = None,
staged: bool = False,
) -> bool:
def lint_commit(commit_hash=None, commit_range=None, staged=False):
"""Lint commit message(s) using gitlint."""
# Build gitlint command
cmd: list[str] = ["gitlint"]
cmd = ["gitlint"]
if staged:
# Lint staged commit message
@ -88,7 +82,7 @@ def lint_commit(
return False
def main() -> None:
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Lint git commit messages using gitlint",
@ -125,32 +119,32 @@ Examples:
return
# Validate arguments
exclusive_args: list[str | bool | None] = [args.hash, args.range, args.staged]
exclusive_args = [args.hash, args.range, args.staged]
if sum(bool(arg) for arg in exclusive_args) > 1:
print("Error: --hash, --range, and --staged are mutually exclusive")
sys.exit(1)
# Lint commits
success: bool = lint_commit(
success = lint_commit(
commit_hash=args.hash, commit_range=args.range, staged=args.staged
)
sys.exit(0 if success else 1)
def install_hook() -> None:
def install_hook():
"""Install the script as a git commit-msg hook."""
git_dir: Path = Path(".git")
git_dir = Path(".git")
if not git_dir.exists():
print("Error: Not in a git repository")
sys.exit(1)
hooks_dir: Path = git_dir / "hooks"
hooks_dir = git_dir / "hooks"
hooks_dir.mkdir(exist_ok=True)
hook_file: Path = hooks_dir / "commit-msg"
hook_file = hooks_dir / "commit-msg"
hook_content: str = """#!/usr/bin/env python3
hook_content = """#!/usr/bin/env python3
# Git commit-msg hook for gitlint
# Python-based commit message linting with gitlint
import subprocess

View file

@ -1,297 +0,0 @@
#!/usr/bin/env python3
"""
Release preparation script for doi2dataset.
This script helps maintainers prepare releases by:
1. Generating a draft changelog from conventional commits
2. Optionally integrating it with the existing CHANGELOG.md
3. Providing a summary of changes for review
Usage:
python scripts/prepare-release.py --version v2.0.4
python scripts/prepare-release.py --version v2.0.4 --update-changelog
python scripts/prepare-release.py --preview
"""
import argparse
import re
import subprocess
import sys
from pathlib import Path
from typing import TypedDict
class CommitData(TypedDict):
"""Type definition for parsed commit data."""
type: str
scope: str | None
description: str
is_breaking: bool
raw: str
class ReleasePreparator:
"""Helper for preparing releases with semi-automatic changelog generation."""
def __init__(self, project_root: Path):
self.project_root = project_root
self.changelog_path = project_root / "CHANGELOG.md"
# Import the changelog generator
sys.path.insert(0, str(project_root / "scripts"))
from generate_changelog import ChangelogGenerator
self.generator = ChangelogGenerator()
def get_current_version(self) -> str | None:
"""Get current version from setuptools_scm."""
try:
result = subprocess.run(
[
sys.executable,
"-c",
"from setuptools_scm import get_version; print(get_version())",
],
capture_output=True,
text=True,
check=True,
cwd=self.project_root,
)
return result.stdout.strip()
except subprocess.CalledProcessError:
return None
def validate_version(self, version: str) -> bool:
"""Validate version format."""
pattern = r"^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.-]+)?$"
return bool(re.match(pattern, version))
def read_changelog(self) -> str:
"""Read the current changelog."""
if not self.changelog_path.exists():
return ""
return self.changelog_path.read_text()
def find_unreleased_section(
self, changelog_content: str
) -> tuple[int, int, str] | None:
"""Find the [Unreleased] section in the changelog and return its content."""
lines = changelog_content.split("\n")
start_idx = None
end_idx = None
for i, line in enumerate(lines):
if line.startswith("## [Unreleased]"):
start_idx = i
elif start_idx is not None and line.startswith("## [v"):
end_idx = i
break
if start_idx is not None:
end_idx = end_idx or len(lines)
# Extract the content between [Unreleased] and the next version
unreleased_content = "\n".join(lines[start_idx + 1 : end_idx]).strip()
return start_idx, end_idx, unreleased_content
return None
def update_changelog(self, version: str, generated_entry: str) -> str:
"""Update the changelog with the new release entry."""
current_content = self.read_changelog()
if not current_content:
# Create new changelog if it doesn't exist
header = """# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
"""
return header + generated_entry + "\n"
# Find the [Unreleased] section
unreleased_section = self.find_unreleased_section(current_content)
if unreleased_section:
start_idx, end_idx, unreleased_content = unreleased_section
lines = current_content.split("\n")
# Create new structure: keep existing [Unreleased] content, add new release
new_lines = lines[:start_idx]
new_lines.append("## [Unreleased]")
new_lines.append("")
# Add the unreleased content back if it exists and has substance
if unreleased_content and unreleased_content.strip():
new_lines.append(unreleased_content)
new_lines.append("")
# Add the new release entry
new_lines.append(generated_entry.rstrip())
new_lines.append("")
new_lines.extend(lines[end_idx:])
return "\n".join(new_lines)
else:
# If no [Unreleased] section found, add after the header
lines = current_content.split("\n")
# Find where to insert (after the header)
insert_idx = 0
for i, line in enumerate(lines):
if line.startswith("## [Unreleased]"):
insert_idx = i
break
elif line.startswith("## [v") and insert_idx == 0:
insert_idx = i
break
if insert_idx == 0:
# Add after the first few header lines
insert_idx = min(7, len(lines))
new_lines = lines[:insert_idx]
new_lines.append("## [Unreleased]")
new_lines.append("")
new_lines.append(generated_entry.rstrip())
new_lines.append("")
new_lines.extend(lines[insert_idx:])
return "\n".join(new_lines)
def show_preview(self):
"""Show a preview of what will be included in the changelog."""
print("🔍 Preview of commits since last release:")
print("=" * 60)
last_tag = self.generator.get_last_tag()
commits = self.generator.get_commits_since_tag(last_tag)
if not commits:
print("No commits found since last release.")
return
print(f"Since: {last_tag or 'beginning'}")
print(f"Commits: {len(commits)}")
print("-" * 60)
for commit in commits:
parsed = self.generator.parse_commit(commit)
if parsed:
type_info = f"[{parsed['type']}]"
if parsed["scope"]:
type_info += f"({parsed['scope']})"
if parsed["is_breaking"]:
type_info += " ⚠️ BREAKING"
print(f"{type_info}: {parsed['description']}")
else:
print(f"[unparsed]: {commit}")
def prepare_release(self, version: str, update_changelog: bool = False) -> str:
"""Prepare a release with the given version."""
if not self.validate_version(version):
raise ValueError(f"Invalid version format: {version}")
print(f"🚀 Preparing release {version}")
print("-" * 50)
# Generate changelog entry
generated_entry = self.generator.generate_changelog(version)
if "No relevant commits found" in generated_entry:
print("⚠️ No relevant commits found for changelog.")
return generated_entry
print("📝 Generated changelog entry:")
print("=" * 50)
print(generated_entry)
print("=" * 50)
if update_changelog:
# Update the actual changelog file
updated_content = self.update_changelog(version, generated_entry)
self.changelog_path.write_text(updated_content)
print(f"✅ Updated {self.changelog_path}")
# Show next steps
print("\n🎯 Next steps:")
print("1. Review and edit the changelog as needed")
print("2. Commit the changelog update:")
print(" git add CHANGELOG.md")
print(f' git commit -m "docs: update changelog for {version}"')
print("3. Create and push the release tag:")
print(f" git tag {version}")
print(f" git push origin {version}")
else:
print("\n💡 To update CHANGELOG.md automatically, run:")
print(
f" python scripts/prepare-release.py --version {version} --update-changelog"
)
return generated_entry
def main():
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Prepare a new release with semi-automatic changelog generation"
)
parser.add_argument("--version", help="Version for the release (e.g., v2.0.4)")
parser.add_argument(
"--update-changelog",
action="store_true",
help="Update CHANGELOG.md with the generated entry",
)
parser.add_argument(
"--preview", action="store_true", help="Preview commits that will be included"
)
args = parser.parse_args()
project_root = Path(__file__).parent.parent
preparator = ReleasePreparator(project_root)
try:
if args.preview:
preparator.show_preview()
elif args.version:
preparator.prepare_release(args.version, args.update_changelog)
else:
# Interactive mode
print("🎯 Release Preparation Helper")
print("=" * 40)
# Show preview first
preparator.show_preview()
print("\n" + "=" * 40)
current_version = preparator.get_current_version()
if current_version:
print(f"Current version: {current_version}")
version = input("Enter version for release (e.g., v2.0.4): ").strip()
if not version:
print("No version provided. Exiting.")
return
update = input("Update CHANGELOG.md? (y/N): ").strip().lower()
update_changelog = update in ["y", "yes"]
preparator.prepare_release(version, update_changelog)
except KeyboardInterrupt:
print("\n\n👋 Release preparation cancelled.")
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()