diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 771fc6b..bdf61af 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,7 +45,7 @@ Thank you for your interest in contributing to **doi2dataset**! We welcome contr ### Prerequisites -- Python 3.7+ +- Python 3.11+ - pip ### Installation diff --git a/pyproject.toml b/pyproject.toml index 84bd858..6a81b2e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,14 +20,14 @@ classifiers = [ "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ] keywords = ["doi", "dataverse", "metadata", "research", "datasets"] -requires-python = ">=3.10" +requires-python = ">=3.11" dependencies = [ "dnspython>=2.7.0,<3.0.0", "requests>=2.32.3,<2.33.0", @@ -112,7 +112,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 -target-version = "py310" +target-version = "py311" extend-exclude = [".venv", "build", "dist", "docs", ".pytest_cache", "htmlcov"] [tool.ruff.lint] diff --git a/scripts/generate_changelog.py b/scripts/generate_changelog.py new file mode 100755 index 0000000..15ee0b3 --- /dev/null +++ b/scripts/generate_changelog.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +""" +Changelog generation script for doi2dataset. + +This script generates changelog entries from conventional commits since the last release. +It can be used to assist with creating changelog entries for new releases. +""" + +import argparse +import re +import shutil +import subprocess +from datetime import datetime +from typing import TypedDict + + +class CommitData(TypedDict): + """Type definition for parsed commit data.""" + + type: str + scope: str | None + description: str + is_breaking: bool + raw: str + + +class ChangelogGenerator: + """Generate changelog entries from conventional commits.""" + + def __init__(self): + self.commit_types = { + "feat": "Added", + "fix": "Fixed", + "docs": "Documentation", + "style": "Style", + "refactor": "Changed", + "perf": "Performance", + "test": "Testing", + "build": "Build", + "ci": "CI/CD", + "chore": "Maintenance", + "revert": "Reverted", + } + + # Regex to parse conventional commit messages + self.commit_regex = re.compile( + r"^(?P\w+)(?:\((?P[\w-]+)\))?: (?P.+)$" + ) + + # Breaking changes pattern + self.breaking_regex = re.compile(r"(BREAKING CHANGE:|!:)") + + def get_last_tag(self) -> str | None: + """Get the most recent git tag.""" + git_executable = shutil.which("git") + try: + result = subprocess.run( + [git_executable, "describe", "--tags", "--abbrev=0"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() + except subprocess.CalledProcessError: + return None + + def get_commits_since_tag(self, tag: str | None = None) -> list[str]: + """Get commit messages since the specified tag (or all if no tag).""" + if tag: + git_range = f"{tag}..HEAD" + else: + git_range = "HEAD" + + git_executable = shutil.which("git") + try: + result = subprocess.run( + [git_executable, "log", git_range, "--pretty=format:%s", "--reverse"], + capture_output=True, + text=True, + check=True, + ) + return [ + line.strip() + for line in result.stdout.strip().split("\n") + if line.strip() + ] + except subprocess.CalledProcessError: + return [] + + def parse_commit(self, commit_message: str) -> CommitData | None: + """Parse a conventional commit message.""" + match = self.commit_regex.match(commit_message) + if not match: + return None + + commit_type = match.group("type") + scope = match.group("scope") + description = match.group("description") + + # Check for breaking changes + is_breaking = bool(self.breaking_regex.search(commit_message)) + + return CommitData( + type=commit_type, + scope=scope, + description=description, + is_breaking=is_breaking, + raw=commit_message, + ) + + def categorize_commits(self, commits: list[str]) -> dict[str, list[CommitData]]: + """Categorize commits by type.""" + categorized: dict[str, list[CommitData]] = {} + breaking_changes: list[CommitData] = [] + + for commit_msg in commits: + parsed = self.parse_commit(commit_msg) + if not parsed: + continue + + commit_type = parsed["type"] + + # Handle breaking changes specially + if parsed["is_breaking"]: + breaking_changes.append(parsed) + continue + + # Skip certain types that aren't user-relevant + if commit_type in ["test", "ci", "build", "chore"]: + continue + + # Map commit type to changelog category + category = self.commit_types.get(commit_type, "Changed") + + if category not in categorized: + categorized[category] = [] + + categorized[category].append(parsed) + + # Add breaking changes as a separate category + if breaking_changes: + categorized["Breaking Changes"] = breaking_changes + + return categorized + + def format_changelog_entry( + self, version: str, categorized_commits: dict[str, list[CommitData]] + ) -> str: + """Format the changelog entry.""" + today = datetime.now().strftime("%Y-%m-%d") + + lines: list[str] = [f"## [{version}] - {today}", ""] + + # Define order of categories + category_order: list[str] = [ + "Breaking Changes", + "Added", + "Changed", + "Fixed", + "Performance", + "Documentation", + "Style", + "CI/CD", + "Maintenance", + ] + + for category in category_order: + if category not in categorized_commits: + continue + + commits: list[CommitData] = categorized_commits[category] + if not commits: + continue + + lines.append(f"### {category}") + lines.append("") + + for commit in commits: + description: str = commit["description"] + scope: str | None = commit["scope"] + + # Capitalize first letter of description + description = ( + description[0].upper() + description[1:] if description else "" + ) + + # Format the entry + if scope: + entry = f"- **{scope}**: {description}" + else: + entry = f"- {description}" + + lines.append(entry) + + lines.append("") + + return "\n".join(lines) + + def generate_changelog( + self, version: str | None = None, since_tag: str | None = None + ) -> str: + """Generate changelog entry for commits since the last tag.""" + if since_tag is None: + since_tag = self.get_last_tag() + + commits = self.get_commits_since_tag(since_tag) + + if not commits: + return "No commits found since last release." + + categorized = self.categorize_commits(commits) + + if not categorized: + return "No relevant commits found for changelog." + + # Use provided version or prompt for it + if version is None: + if since_tag: + print(f"Last release was: {since_tag}") + else: + print("No previous releases found") + version = input("Enter version for this release (e.g., v2.0.4): ").strip() + + return self.format_changelog_entry(version, categorized) + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Generate changelog entries from conventional commits" + ) + parser.add_argument( + "--version", help="Version for the changelog entry (e.g., v2.0.4)" + ) + parser.add_argument( + "--since", help="Generate changelog since this tag (default: last tag)" + ) + parser.add_argument("--output", help="Output file (default: stdout)") + parser.add_argument( + "--preview", action="store_true", help="Preview commits that will be included" + ) + + args = parser.parse_args() + + generator = ChangelogGenerator() + + if args.preview: + # Show preview of commits + since_tag = args.since or generator.get_last_tag() + commits = generator.get_commits_since_tag(since_tag) + + print(f"Commits since {since_tag or 'beginning'}:") + print("-" * 50) + + for commit in commits: + parsed = generator.parse_commit(commit) + if parsed: + type_info = f"[{parsed['type']}]" + if parsed["scope"]: + type_info += f"({parsed['scope']})" + print(f"{type_info}: {parsed['description']}") + else: + print(f"[unparsed]: {commit}") + + return + + # Generate changelog + changelog = generator.generate_changelog(args.version, args.since) + + if args.output: + with open(args.output, "w") as f: + f.write(changelog) + print(f"Changelog written to {args.output}") + else: + print(changelog) + + +if __name__ == "__main__": + main() diff --git a/scripts/lint-commit.py b/scripts/lint-commit.py index a424e43..55ad6c7 100644 --- a/scripts/lint-commit.py +++ b/scripts/lint-commit.py @@ -23,7 +23,9 @@ import sys from pathlib import Path -def run_command(cmd, check=True): +def run_command( + cmd: list[str], check: bool = True +) -> subprocess.CompletedProcess[str] | subprocess.CalledProcessError: """Run a shell command and return the result.""" try: result = subprocess.run(cmd, capture_output=True, text=True, check=check) @@ -36,7 +38,7 @@ def run_command(cmd, check=True): return e -def check_gitlint_installed(): +def check_gitlint_installed() -> None: """Check if gitlint is installed.""" result = run_command(["which", "gitlint"], check=False) if result.returncode != 0: @@ -46,10 +48,14 @@ def check_gitlint_installed(): sys.exit(1) -def lint_commit(commit_hash=None, commit_range=None, staged=False): +def lint_commit( + commit_hash: str | None = None, + commit_range: str | None = None, + staged: bool = False, +) -> bool: """Lint commit message(s) using gitlint.""" # Build gitlint command - cmd = ["gitlint"] + cmd: list[str] = ["gitlint"] if staged: # Lint staged commit message @@ -82,7 +88,7 @@ def lint_commit(commit_hash=None, commit_range=None, staged=False): return False -def main(): +def main() -> None: """Main entry point.""" parser = argparse.ArgumentParser( description="Lint git commit messages using gitlint", @@ -119,32 +125,32 @@ Examples: return # Validate arguments - exclusive_args = [args.hash, args.range, args.staged] + exclusive_args: list[str | bool | None] = [args.hash, args.range, args.staged] if sum(bool(arg) for arg in exclusive_args) > 1: print("Error: --hash, --range, and --staged are mutually exclusive") sys.exit(1) # Lint commits - success = lint_commit( + success: bool = lint_commit( commit_hash=args.hash, commit_range=args.range, staged=args.staged ) sys.exit(0 if success else 1) -def install_hook(): +def install_hook() -> None: """Install the script as a git commit-msg hook.""" - git_dir = Path(".git") + git_dir: Path = Path(".git") if not git_dir.exists(): print("Error: Not in a git repository") sys.exit(1) - hooks_dir = git_dir / "hooks" + hooks_dir: Path = git_dir / "hooks" hooks_dir.mkdir(exist_ok=True) - hook_file = hooks_dir / "commit-msg" + hook_file: Path = hooks_dir / "commit-msg" - hook_content = """#!/usr/bin/env python3 + hook_content: str = """#!/usr/bin/env python3 # Git commit-msg hook for gitlint # Python-based commit message linting with gitlint import subprocess diff --git a/scripts/prepare-release.py b/scripts/prepare-release.py new file mode 100755 index 0000000..4e29e6c --- /dev/null +++ b/scripts/prepare-release.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +""" +Release preparation script for doi2dataset. + +This script helps maintainers prepare releases by: +1. Generating a draft changelog from conventional commits +2. Optionally integrating it with the existing CHANGELOG.md +3. Providing a summary of changes for review + +Usage: + python scripts/prepare-release.py --version v2.0.4 + python scripts/prepare-release.py --version v2.0.4 --update-changelog + python scripts/prepare-release.py --preview +""" + +import argparse +import re +import subprocess +import sys +from pathlib import Path +from typing import TypedDict + + +class CommitData(TypedDict): + """Type definition for parsed commit data.""" + + type: str + scope: str | None + description: str + is_breaking: bool + raw: str + + +class ReleasePreparator: + """Helper for preparing releases with semi-automatic changelog generation.""" + + def __init__(self, project_root: Path): + self.project_root = project_root + self.changelog_path = project_root / "CHANGELOG.md" + + # Import the changelog generator + sys.path.insert(0, str(project_root / "scripts")) + from generate_changelog import ChangelogGenerator + + self.generator = ChangelogGenerator() + + def get_current_version(self) -> str | None: + """Get current version from setuptools_scm.""" + try: + result = subprocess.run( + [ + sys.executable, + "-c", + "from setuptools_scm import get_version; print(get_version())", + ], + capture_output=True, + text=True, + check=True, + cwd=self.project_root, + ) + return result.stdout.strip() + except subprocess.CalledProcessError: + return None + + def validate_version(self, version: str) -> bool: + """Validate version format.""" + pattern = r"^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.-]+)?$" + return bool(re.match(pattern, version)) + + def read_changelog(self) -> str: + """Read the current changelog.""" + if not self.changelog_path.exists(): + return "" + return self.changelog_path.read_text() + + def find_unreleased_section( + self, changelog_content: str + ) -> tuple[int, int, str] | None: + """Find the [Unreleased] section in the changelog and return its content.""" + lines = changelog_content.split("\n") + + start_idx = None + end_idx = None + + for i, line in enumerate(lines): + if line.startswith("## [Unreleased]"): + start_idx = i + elif start_idx is not None and line.startswith("## [v"): + end_idx = i + break + + if start_idx is not None: + end_idx = end_idx or len(lines) + # Extract the content between [Unreleased] and the next version + unreleased_content = "\n".join(lines[start_idx + 1 : end_idx]).strip() + return start_idx, end_idx, unreleased_content + + return None + + def update_changelog(self, version: str, generated_entry: str) -> str: + """Update the changelog with the new release entry.""" + current_content = self.read_changelog() + + if not current_content: + # Create new changelog if it doesn't exist + header = """# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +""" + return header + generated_entry + "\n" + + # Find the [Unreleased] section + unreleased_section = self.find_unreleased_section(current_content) + + if unreleased_section: + start_idx, end_idx, unreleased_content = unreleased_section + lines = current_content.split("\n") + + # Create new structure: keep existing [Unreleased] content, add new release + new_lines = lines[:start_idx] + new_lines.append("## [Unreleased]") + new_lines.append("") + + # Add the unreleased content back if it exists and has substance + if unreleased_content and unreleased_content.strip(): + new_lines.append(unreleased_content) + new_lines.append("") + + # Add the new release entry + new_lines.append(generated_entry.rstrip()) + new_lines.append("") + new_lines.extend(lines[end_idx:]) + + return "\n".join(new_lines) + else: + # If no [Unreleased] section found, add after the header + lines = current_content.split("\n") + + # Find where to insert (after the header) + insert_idx = 0 + for i, line in enumerate(lines): + if line.startswith("## [Unreleased]"): + insert_idx = i + break + elif line.startswith("## [v") and insert_idx == 0: + insert_idx = i + break + + if insert_idx == 0: + # Add after the first few header lines + insert_idx = min(7, len(lines)) + + new_lines = lines[:insert_idx] + new_lines.append("## [Unreleased]") + new_lines.append("") + new_lines.append(generated_entry.rstrip()) + new_lines.append("") + new_lines.extend(lines[insert_idx:]) + + return "\n".join(new_lines) + + def show_preview(self): + """Show a preview of what will be included in the changelog.""" + print("šŸ” Preview of commits since last release:") + print("=" * 60) + + last_tag = self.generator.get_last_tag() + commits = self.generator.get_commits_since_tag(last_tag) + + if not commits: + print("No commits found since last release.") + return + + print(f"Since: {last_tag or 'beginning'}") + print(f"Commits: {len(commits)}") + print("-" * 60) + + for commit in commits: + parsed = self.generator.parse_commit(commit) + if parsed: + type_info = f"[{parsed['type']}]" + if parsed["scope"]: + type_info += f"({parsed['scope']})" + if parsed["is_breaking"]: + type_info += " āš ļø BREAKING" + print(f"{type_info}: {parsed['description']}") + else: + print(f"[unparsed]: {commit}") + + def prepare_release(self, version: str, update_changelog: bool = False) -> str: + """Prepare a release with the given version.""" + if not self.validate_version(version): + raise ValueError(f"Invalid version format: {version}") + + print(f"šŸš€ Preparing release {version}") + print("-" * 50) + + # Generate changelog entry + generated_entry = self.generator.generate_changelog(version) + + if "No relevant commits found" in generated_entry: + print("āš ļø No relevant commits found for changelog.") + return generated_entry + + print("šŸ“ Generated changelog entry:") + print("=" * 50) + print(generated_entry) + print("=" * 50) + + if update_changelog: + # Update the actual changelog file + updated_content = self.update_changelog(version, generated_entry) + self.changelog_path.write_text(updated_content) + print(f"āœ… Updated {self.changelog_path}") + + # Show next steps + print("\nšŸŽÆ Next steps:") + print("1. Review and edit the changelog as needed") + print("2. Commit the changelog update:") + print(" git add CHANGELOG.md") + print(f' git commit -m "docs: update changelog for {version}"') + print("3. Create and push the release tag:") + print(f" git tag {version}") + print(f" git push origin {version}") + else: + print("\nšŸ’” To update CHANGELOG.md automatically, run:") + print( + f" python scripts/prepare-release.py --version {version} --update-changelog" + ) + + return generated_entry + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Prepare a new release with semi-automatic changelog generation" + ) + parser.add_argument("--version", help="Version for the release (e.g., v2.0.4)") + parser.add_argument( + "--update-changelog", + action="store_true", + help="Update CHANGELOG.md with the generated entry", + ) + parser.add_argument( + "--preview", action="store_true", help="Preview commits that will be included" + ) + + args = parser.parse_args() + + project_root = Path(__file__).parent.parent + preparator = ReleasePreparator(project_root) + + try: + if args.preview: + preparator.show_preview() + elif args.version: + preparator.prepare_release(args.version, args.update_changelog) + else: + # Interactive mode + print("šŸŽÆ Release Preparation Helper") + print("=" * 40) + + # Show preview first + preparator.show_preview() + + print("\n" + "=" * 40) + current_version = preparator.get_current_version() + if current_version: + print(f"Current version: {current_version}") + + version = input("Enter version for release (e.g., v2.0.4): ").strip() + + if not version: + print("No version provided. Exiting.") + return + + update = input("Update CHANGELOG.md? (y/N): ").strip().lower() + update_changelog = update in ["y", "yes"] + + preparator.prepare_release(version, update_changelog) + + except KeyboardInterrupt: + print("\n\nšŸ‘‹ Release preparation cancelled.") + except Exception as e: + print(f"āŒ Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main()