diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bdf61af..771fc6b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,7 +45,7 @@ Thank you for your interest in contributing to **doi2dataset**! We welcome contr ### Prerequisites -- Python 3.11+ +- Python 3.7+ - pip ### Installation diff --git a/pyproject.toml b/pyproject.toml index 6a81b2e..84bd858 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,14 +20,14 @@ classifiers = [ "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Software Development :: Libraries :: Python Modules", "Programming Language :: Python :: 3", - + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", ] keywords = ["doi", "dataverse", "metadata", "research", "datasets"] -requires-python = ">=3.11" +requires-python = ">=3.10" dependencies = [ "dnspython>=2.7.0,<3.0.0", "requests>=2.32.3,<2.33.0", @@ -112,7 +112,7 @@ exclude_lines = [ [tool.ruff] line-length = 88 -target-version = "py311" +target-version = "py310" extend-exclude = [".venv", "build", "dist", "docs", ".pytest_cache", "htmlcov"] [tool.ruff.lint] diff --git a/scripts/generate_changelog.py b/scripts/generate_changelog.py deleted file mode 100755 index 15ee0b3..0000000 --- a/scripts/generate_changelog.py +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env python3 -""" -Changelog generation script for doi2dataset. - -This script generates changelog entries from conventional commits since the last release. -It can be used to assist with creating changelog entries for new releases. -""" - -import argparse -import re -import shutil -import subprocess -from datetime import datetime -from typing import TypedDict - - -class CommitData(TypedDict): - """Type definition for parsed commit data.""" - - type: str - scope: str | None - description: str - is_breaking: bool - raw: str - - -class ChangelogGenerator: - """Generate changelog entries from conventional commits.""" - - def __init__(self): - self.commit_types = { - "feat": "Added", - "fix": "Fixed", - "docs": "Documentation", - "style": "Style", - "refactor": "Changed", - "perf": "Performance", - "test": "Testing", - "build": "Build", - "ci": "CI/CD", - "chore": "Maintenance", - "revert": "Reverted", - } - - # Regex to parse conventional commit messages - self.commit_regex = re.compile( - r"^(?P\w+)(?:\((?P[\w-]+)\))?: (?P.+)$" - ) - - # Breaking changes pattern - self.breaking_regex = re.compile(r"(BREAKING CHANGE:|!:)") - - def get_last_tag(self) -> str | None: - """Get the most recent git tag.""" - git_executable = shutil.which("git") - try: - result = subprocess.run( - [git_executable, "describe", "--tags", "--abbrev=0"], - capture_output=True, - text=True, - check=True, - ) - return result.stdout.strip() - except subprocess.CalledProcessError: - return None - - def get_commits_since_tag(self, tag: str | None = None) -> list[str]: - """Get commit messages since the specified tag (or all if no tag).""" - if tag: - git_range = f"{tag}..HEAD" - else: - git_range = "HEAD" - - git_executable = shutil.which("git") - try: - result = subprocess.run( - [git_executable, "log", git_range, "--pretty=format:%s", "--reverse"], - capture_output=True, - text=True, - check=True, - ) - return [ - line.strip() - for line in result.stdout.strip().split("\n") - if line.strip() - ] - except subprocess.CalledProcessError: - return [] - - def parse_commit(self, commit_message: str) -> CommitData | None: - """Parse a conventional commit message.""" - match = self.commit_regex.match(commit_message) - if not match: - return None - - commit_type = match.group("type") - scope = match.group("scope") - description = match.group("description") - - # Check for breaking changes - is_breaking = bool(self.breaking_regex.search(commit_message)) - - return CommitData( - type=commit_type, - scope=scope, - description=description, - is_breaking=is_breaking, - raw=commit_message, - ) - - def categorize_commits(self, commits: list[str]) -> dict[str, list[CommitData]]: - """Categorize commits by type.""" - categorized: dict[str, list[CommitData]] = {} - breaking_changes: list[CommitData] = [] - - for commit_msg in commits: - parsed = self.parse_commit(commit_msg) - if not parsed: - continue - - commit_type = parsed["type"] - - # Handle breaking changes specially - if parsed["is_breaking"]: - breaking_changes.append(parsed) - continue - - # Skip certain types that aren't user-relevant - if commit_type in ["test", "ci", "build", "chore"]: - continue - - # Map commit type to changelog category - category = self.commit_types.get(commit_type, "Changed") - - if category not in categorized: - categorized[category] = [] - - categorized[category].append(parsed) - - # Add breaking changes as a separate category - if breaking_changes: - categorized["Breaking Changes"] = breaking_changes - - return categorized - - def format_changelog_entry( - self, version: str, categorized_commits: dict[str, list[CommitData]] - ) -> str: - """Format the changelog entry.""" - today = datetime.now().strftime("%Y-%m-%d") - - lines: list[str] = [f"## [{version}] - {today}", ""] - - # Define order of categories - category_order: list[str] = [ - "Breaking Changes", - "Added", - "Changed", - "Fixed", - "Performance", - "Documentation", - "Style", - "CI/CD", - "Maintenance", - ] - - for category in category_order: - if category not in categorized_commits: - continue - - commits: list[CommitData] = categorized_commits[category] - if not commits: - continue - - lines.append(f"### {category}") - lines.append("") - - for commit in commits: - description: str = commit["description"] - scope: str | None = commit["scope"] - - # Capitalize first letter of description - description = ( - description[0].upper() + description[1:] if description else "" - ) - - # Format the entry - if scope: - entry = f"- **{scope}**: {description}" - else: - entry = f"- {description}" - - lines.append(entry) - - lines.append("") - - return "\n".join(lines) - - def generate_changelog( - self, version: str | None = None, since_tag: str | None = None - ) -> str: - """Generate changelog entry for commits since the last tag.""" - if since_tag is None: - since_tag = self.get_last_tag() - - commits = self.get_commits_since_tag(since_tag) - - if not commits: - return "No commits found since last release." - - categorized = self.categorize_commits(commits) - - if not categorized: - return "No relevant commits found for changelog." - - # Use provided version or prompt for it - if version is None: - if since_tag: - print(f"Last release was: {since_tag}") - else: - print("No previous releases found") - version = input("Enter version for this release (e.g., v2.0.4): ").strip() - - return self.format_changelog_entry(version, categorized) - - -def main(): - """Main entry point.""" - parser = argparse.ArgumentParser( - description="Generate changelog entries from conventional commits" - ) - parser.add_argument( - "--version", help="Version for the changelog entry (e.g., v2.0.4)" - ) - parser.add_argument( - "--since", help="Generate changelog since this tag (default: last tag)" - ) - parser.add_argument("--output", help="Output file (default: stdout)") - parser.add_argument( - "--preview", action="store_true", help="Preview commits that will be included" - ) - - args = parser.parse_args() - - generator = ChangelogGenerator() - - if args.preview: - # Show preview of commits - since_tag = args.since or generator.get_last_tag() - commits = generator.get_commits_since_tag(since_tag) - - print(f"Commits since {since_tag or 'beginning'}:") - print("-" * 50) - - for commit in commits: - parsed = generator.parse_commit(commit) - if parsed: - type_info = f"[{parsed['type']}]" - if parsed["scope"]: - type_info += f"({parsed['scope']})" - print(f"{type_info}: {parsed['description']}") - else: - print(f"[unparsed]: {commit}") - - return - - # Generate changelog - changelog = generator.generate_changelog(args.version, args.since) - - if args.output: - with open(args.output, "w") as f: - f.write(changelog) - print(f"Changelog written to {args.output}") - else: - print(changelog) - - -if __name__ == "__main__": - main() diff --git a/scripts/lint-commit.py b/scripts/lint-commit.py index 55ad6c7..a424e43 100644 --- a/scripts/lint-commit.py +++ b/scripts/lint-commit.py @@ -23,9 +23,7 @@ import sys from pathlib import Path -def run_command( - cmd: list[str], check: bool = True -) -> subprocess.CompletedProcess[str] | subprocess.CalledProcessError: +def run_command(cmd, check=True): """Run a shell command and return the result.""" try: result = subprocess.run(cmd, capture_output=True, text=True, check=check) @@ -38,7 +36,7 @@ def run_command( return e -def check_gitlint_installed() -> None: +def check_gitlint_installed(): """Check if gitlint is installed.""" result = run_command(["which", "gitlint"], check=False) if result.returncode != 0: @@ -48,14 +46,10 @@ def check_gitlint_installed() -> None: sys.exit(1) -def lint_commit( - commit_hash: str | None = None, - commit_range: str | None = None, - staged: bool = False, -) -> bool: +def lint_commit(commit_hash=None, commit_range=None, staged=False): """Lint commit message(s) using gitlint.""" # Build gitlint command - cmd: list[str] = ["gitlint"] + cmd = ["gitlint"] if staged: # Lint staged commit message @@ -88,7 +82,7 @@ def lint_commit( return False -def main() -> None: +def main(): """Main entry point.""" parser = argparse.ArgumentParser( description="Lint git commit messages using gitlint", @@ -125,32 +119,32 @@ Examples: return # Validate arguments - exclusive_args: list[str | bool | None] = [args.hash, args.range, args.staged] + exclusive_args = [args.hash, args.range, args.staged] if sum(bool(arg) for arg in exclusive_args) > 1: print("Error: --hash, --range, and --staged are mutually exclusive") sys.exit(1) # Lint commits - success: bool = lint_commit( + success = lint_commit( commit_hash=args.hash, commit_range=args.range, staged=args.staged ) sys.exit(0 if success else 1) -def install_hook() -> None: +def install_hook(): """Install the script as a git commit-msg hook.""" - git_dir: Path = Path(".git") + git_dir = Path(".git") if not git_dir.exists(): print("Error: Not in a git repository") sys.exit(1) - hooks_dir: Path = git_dir / "hooks" + hooks_dir = git_dir / "hooks" hooks_dir.mkdir(exist_ok=True) - hook_file: Path = hooks_dir / "commit-msg" + hook_file = hooks_dir / "commit-msg" - hook_content: str = """#!/usr/bin/env python3 + hook_content = """#!/usr/bin/env python3 # Git commit-msg hook for gitlint # Python-based commit message linting with gitlint import subprocess diff --git a/scripts/prepare-release.py b/scripts/prepare-release.py deleted file mode 100755 index 4e29e6c..0000000 --- a/scripts/prepare-release.py +++ /dev/null @@ -1,297 +0,0 @@ -#!/usr/bin/env python3 -""" -Release preparation script for doi2dataset. - -This script helps maintainers prepare releases by: -1. Generating a draft changelog from conventional commits -2. Optionally integrating it with the existing CHANGELOG.md -3. Providing a summary of changes for review - -Usage: - python scripts/prepare-release.py --version v2.0.4 - python scripts/prepare-release.py --version v2.0.4 --update-changelog - python scripts/prepare-release.py --preview -""" - -import argparse -import re -import subprocess -import sys -from pathlib import Path -from typing import TypedDict - - -class CommitData(TypedDict): - """Type definition for parsed commit data.""" - - type: str - scope: str | None - description: str - is_breaking: bool - raw: str - - -class ReleasePreparator: - """Helper for preparing releases with semi-automatic changelog generation.""" - - def __init__(self, project_root: Path): - self.project_root = project_root - self.changelog_path = project_root / "CHANGELOG.md" - - # Import the changelog generator - sys.path.insert(0, str(project_root / "scripts")) - from generate_changelog import ChangelogGenerator - - self.generator = ChangelogGenerator() - - def get_current_version(self) -> str | None: - """Get current version from setuptools_scm.""" - try: - result = subprocess.run( - [ - sys.executable, - "-c", - "from setuptools_scm import get_version; print(get_version())", - ], - capture_output=True, - text=True, - check=True, - cwd=self.project_root, - ) - return result.stdout.strip() - except subprocess.CalledProcessError: - return None - - def validate_version(self, version: str) -> bool: - """Validate version format.""" - pattern = r"^v[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.-]+)?$" - return bool(re.match(pattern, version)) - - def read_changelog(self) -> str: - """Read the current changelog.""" - if not self.changelog_path.exists(): - return "" - return self.changelog_path.read_text() - - def find_unreleased_section( - self, changelog_content: str - ) -> tuple[int, int, str] | None: - """Find the [Unreleased] section in the changelog and return its content.""" - lines = changelog_content.split("\n") - - start_idx = None - end_idx = None - - for i, line in enumerate(lines): - if line.startswith("## [Unreleased]"): - start_idx = i - elif start_idx is not None and line.startswith("## [v"): - end_idx = i - break - - if start_idx is not None: - end_idx = end_idx or len(lines) - # Extract the content between [Unreleased] and the next version - unreleased_content = "\n".join(lines[start_idx + 1 : end_idx]).strip() - return start_idx, end_idx, unreleased_content - - return None - - def update_changelog(self, version: str, generated_entry: str) -> str: - """Update the changelog with the new release entry.""" - current_content = self.read_changelog() - - if not current_content: - # Create new changelog if it doesn't exist - header = """# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased] - -""" - return header + generated_entry + "\n" - - # Find the [Unreleased] section - unreleased_section = self.find_unreleased_section(current_content) - - if unreleased_section: - start_idx, end_idx, unreleased_content = unreleased_section - lines = current_content.split("\n") - - # Create new structure: keep existing [Unreleased] content, add new release - new_lines = lines[:start_idx] - new_lines.append("## [Unreleased]") - new_lines.append("") - - # Add the unreleased content back if it exists and has substance - if unreleased_content and unreleased_content.strip(): - new_lines.append(unreleased_content) - new_lines.append("") - - # Add the new release entry - new_lines.append(generated_entry.rstrip()) - new_lines.append("") - new_lines.extend(lines[end_idx:]) - - return "\n".join(new_lines) - else: - # If no [Unreleased] section found, add after the header - lines = current_content.split("\n") - - # Find where to insert (after the header) - insert_idx = 0 - for i, line in enumerate(lines): - if line.startswith("## [Unreleased]"): - insert_idx = i - break - elif line.startswith("## [v") and insert_idx == 0: - insert_idx = i - break - - if insert_idx == 0: - # Add after the first few header lines - insert_idx = min(7, len(lines)) - - new_lines = lines[:insert_idx] - new_lines.append("## [Unreleased]") - new_lines.append("") - new_lines.append(generated_entry.rstrip()) - new_lines.append("") - new_lines.extend(lines[insert_idx:]) - - return "\n".join(new_lines) - - def show_preview(self): - """Show a preview of what will be included in the changelog.""" - print("šŸ” Preview of commits since last release:") - print("=" * 60) - - last_tag = self.generator.get_last_tag() - commits = self.generator.get_commits_since_tag(last_tag) - - if not commits: - print("No commits found since last release.") - return - - print(f"Since: {last_tag or 'beginning'}") - print(f"Commits: {len(commits)}") - print("-" * 60) - - for commit in commits: - parsed = self.generator.parse_commit(commit) - if parsed: - type_info = f"[{parsed['type']}]" - if parsed["scope"]: - type_info += f"({parsed['scope']})" - if parsed["is_breaking"]: - type_info += " āš ļø BREAKING" - print(f"{type_info}: {parsed['description']}") - else: - print(f"[unparsed]: {commit}") - - def prepare_release(self, version: str, update_changelog: bool = False) -> str: - """Prepare a release with the given version.""" - if not self.validate_version(version): - raise ValueError(f"Invalid version format: {version}") - - print(f"šŸš€ Preparing release {version}") - print("-" * 50) - - # Generate changelog entry - generated_entry = self.generator.generate_changelog(version) - - if "No relevant commits found" in generated_entry: - print("āš ļø No relevant commits found for changelog.") - return generated_entry - - print("šŸ“ Generated changelog entry:") - print("=" * 50) - print(generated_entry) - print("=" * 50) - - if update_changelog: - # Update the actual changelog file - updated_content = self.update_changelog(version, generated_entry) - self.changelog_path.write_text(updated_content) - print(f"āœ… Updated {self.changelog_path}") - - # Show next steps - print("\nšŸŽÆ Next steps:") - print("1. Review and edit the changelog as needed") - print("2. Commit the changelog update:") - print(" git add CHANGELOG.md") - print(f' git commit -m "docs: update changelog for {version}"') - print("3. Create and push the release tag:") - print(f" git tag {version}") - print(f" git push origin {version}") - else: - print("\nšŸ’” To update CHANGELOG.md automatically, run:") - print( - f" python scripts/prepare-release.py --version {version} --update-changelog" - ) - - return generated_entry - - -def main(): - """Main entry point.""" - parser = argparse.ArgumentParser( - description="Prepare a new release with semi-automatic changelog generation" - ) - parser.add_argument("--version", help="Version for the release (e.g., v2.0.4)") - parser.add_argument( - "--update-changelog", - action="store_true", - help="Update CHANGELOG.md with the generated entry", - ) - parser.add_argument( - "--preview", action="store_true", help="Preview commits that will be included" - ) - - args = parser.parse_args() - - project_root = Path(__file__).parent.parent - preparator = ReleasePreparator(project_root) - - try: - if args.preview: - preparator.show_preview() - elif args.version: - preparator.prepare_release(args.version, args.update_changelog) - else: - # Interactive mode - print("šŸŽÆ Release Preparation Helper") - print("=" * 40) - - # Show preview first - preparator.show_preview() - - print("\n" + "=" * 40) - current_version = preparator.get_current_version() - if current_version: - print(f"Current version: {current_version}") - - version = input("Enter version for release (e.g., v2.0.4): ").strip() - - if not version: - print("No version provided. Exiting.") - return - - update = input("Update CHANGELOG.md? (y/N): ").strip().lower() - update_changelog = update in ["y", "yes"] - - preparator.prepare_release(version, update_changelog) - - except KeyboardInterrupt: - print("\n\nšŸ‘‹ Release preparation cancelled.") - except Exception as e: - print(f"āŒ Error: {e}") - sys.exit(1) - - -if __name__ == "__main__": - main()