From 9d270ec6016816d78da03d753046a381d13e5aa5 Mon Sep 17 00:00:00 2001
From: Alexander Minges <alexander.minges@uni-due.de>
Date: Mon, 14 Jul 2025 09:39:07 +0200
Subject: [PATCH] feat: add pre-commit setup with gitlint

---
 .pre-commit-config.yaml          |  54 +++
 README.md                        |  94 +++++
 docs/make.bat                    |  94 ++---
 docs/source/commit-messages.rst  | 229 +++++++++++
 docs/source/contributing.rst     |  38 +-
 docs/source/index.rst            |   1 +
 doi2dataset.py                   | 679 +++++++++++++++++++++----------
 pyproject.toml                   |   5 +
 requirements-dev.txt             |   1 +
 scripts/lint-commit.py           | 179 ++++++++
 tests/test_citation_builder.py   |  12 +-
 tests/test_doi2dataset.py        |   4 +
 tests/test_fetch_doi_mock.py     |  14 +-
 tests/test_license_processor.py  |  26 +-
 tests/test_metadata_processor.py |  95 +++--
 tests/test_person.py             |  16 +-
 tests/test_publication_utils.py  |  16 +-
 17 files changed, 1197 insertions(+), 360 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 docs/source/commit-messages.rst
 create mode 100644 scripts/lint-commit.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..419ab30
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,54 @@
+# Pre-commit configuration for doi2dataset
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+
+repos:
+  # Built-in pre-commit hooks
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: check-json
+      - id: check-toml
+      - id: mixed-line-ending
+        args: ['--fix=lf']
+
+  # Python code formatting and linting
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.6.9
+    hooks:
+      - id: ruff
+        args: [--fix, --exit-non-zero-on-fix]
+      - id: ruff-format
+
+  # Git commit message linting with gitlint
+  - repo: https://github.com/jorisroovers/gitlint
+    rev: v0.19.1
+    hooks:
+      - id: gitlint
+        stages: [commit-msg]
+
+  # Optional: Check for common security issues
+  - repo: https://github.com/PyCQA/bandit
+    rev: 1.7.10
+    hooks:
+      - id: bandit
+        args: ["-c", "pyproject.toml"]
+        additional_dependencies: ["bandit[toml]"]
+
+# Configuration for specific hooks
+ci:
+  autofix_commit_msg: |
+    [pre-commit.ci] auto fixes from pre-commit hooks
+
+    for more information, see https://pre-commit.ci
+  autofix_prs: true
+  autoupdate_branch: ''
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
+  autoupdate_schedule: weekly
+  skip: []
+  submodules: false
diff --git a/README.md b/README.md
index 187dbfe..c664f98 100644
--- a/README.md
+++ b/README.md
@@ -152,6 +152,73 @@ Documentation is automatically built and deployed via GitLab CI/CD:
 - Deployed to GitLab Pages
 - Accessible at your project's Pages URL
 
+## Git Commit Message Linting
+
+This project uses [gitlint](https://jorisroovers.github.io/gitlint/) to enforce consistent commit message formatting. Commit messages should follow the [Conventional Commits](https://www.conventionalcommits.org/) specification.
+
+### Commit Message Format
+
+Commit messages must follow this format:
+
+```
+<type>(<scope>): <description>
+
+[optional body]
+
+[optional footer(s)]
+```
+
+**Types:**
+
+- `feat`: A new feature
+- `fix`: A bug fix
+- `docs`: Documentation only changes
+- `style`: Changes that do not affect the meaning of the code
+- `refactor`: A code change that neither fixes a bug nor adds a feature
+- `test`: Adding missing tests or correcting existing tests
+- `chore`: Changes to the build process or auxiliary tools
+- `ci`: Changes to CI configuration files and scripts
+- `build`: Changes that affect the build system or dependencies
+- `perf`: A code change that improves performance
+- `revert`: Reverts a previous commit
+
+**Examples:**
+
+```
+feat(api): add support for DOI batch processing
+fix(metadata): handle missing author information gracefully
+docs: update installation instructions
+test(citation): add tests for license processing
+```
+
+### Linting Commit Messages
+
+To lint commit messages, use the provided script:
+
+```bash
+# Lint the last commit
+python scripts/lint-commit.py
+
+# Lint a specific commit
+python scripts/lint-commit.py --hash <commit-hash>
+
+# Lint a range of commits
+python scripts/lint-commit.py --range HEAD~3..
+
+# Install as a git hook (optional)
+python scripts/lint-commit.py --install-hook
+```
+
+### Git Hook Installation
+
+You can optionally install a git hook that automatically checks commit messages:
+
+```bash
+python scripts/lint-commit.py --install-hook
+```
+
+This will create a `commit-msg` hook that runs automatically when you commit, ensuring all commit messages follow the required format.
+
 ## Testing
 
 Tests are implemented with pytest. The test suite provides comprehensive coverage of core functionalities. To run the tests, execute:
@@ -270,6 +337,33 @@ This version has been updated to make the tool more generalized and suitable for
 
 Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
 
+### Development Setup
+
+1. Install development dependencies:
+
+   ```bash
+   pip install -r requirements-dev.txt
+   ```
+
+2. Run tests to ensure everything works:
+
+   ```bash
+   pytest
+   ```
+
+3. Install the git commit message hook (recommended):
+   ```bash
+   python scripts/lint-commit.py --install-hook
+   ```
+
+### Code Quality
+
+- Follow the existing code style and formatting
+- Write tests for new functionality
+- Ensure all tests pass before submitting
+- Use meaningful commit messages following the conventional commits format
+- Run `python scripts/lint-commit.py` to validate commit messages
+
 ## License
 
 This project is licensed under the MIT License. See the [LICENSE.md](LICENSE.md) file for details.
diff --git a/docs/make.bat b/docs/make.bat
index 9725e0f..3857d4c 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,47 +1,47 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.https://www.sphinx-doc.org/
-	exit /b 1
-)
-
-if "%1" == "" goto help
-
-if "%1" == "multiversion" goto multiversion
-if "%1" == "multiversion-clean" goto multiversion-clean
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:multiversion
-sphinx-multiversion %SOURCEDIR% %BUILDDIR%\multiversion\html %SPHINXOPTS% %O%
-goto end
-
-:multiversion-clean
-rmdir /s /q %BUILDDIR%\html 2>nul
-sphinx-multiversion %SOURCEDIR% %BUILDDIR%\multiversion\html %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.https://www.sphinx-doc.org/
+	exit /b 1
+)
+
+if "%1" == "" goto help
+
+if "%1" == "multiversion" goto multiversion
+if "%1" == "multiversion-clean" goto multiversion-clean
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:multiversion
+sphinx-multiversion %SOURCEDIR% %BUILDDIR%\multiversion\html %SPHINXOPTS% %O%
+goto end
+
+:multiversion-clean
+rmdir /s /q %BUILDDIR%\html 2>nul
+sphinx-multiversion %SOURCEDIR% %BUILDDIR%\multiversion\html %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/commit-messages.rst b/docs/source/commit-messages.rst
new file mode 100644
index 0000000..b5388dd
--- /dev/null
+++ b/docs/source/commit-messages.rst
@@ -0,0 +1,229 @@
+Git Commit Message Linting
+===========================
+
+This project uses `gitlint <https://jorisroovers.github.io/gitlint/>`_ to enforce consistent commit message formatting. All commit messages must follow the `Conventional Commits <https://www.conventionalcommits.org/>`_ specification to ensure clear and standardized project history.
+
+Why Commit Message Standards Matter
+-----------------------------------
+
+Standardized commit messages provide several benefits:
+
+* **Improved readability**: Clear, consistent format makes it easier to understand changes
+* **Automated changelog generation**: Tools can parse conventional commits to generate changelogs
+* **Better collaboration**: Team members can quickly understand the nature of changes
+* **Easier debugging**: Well-formatted commits help identify when bugs were introduced
+* **Semantic versioning**: Conventional commits can trigger automated version bumps
+
+Commit Message Format
+---------------------
+
+All commit messages must follow this format:
+
+.. code-block:: text
+
+    <type>(<scope>): <description>
+
+    [optional body]
+
+    [optional footer(s)]
+
+Components
+~~~~~~~~~~
+
+**Type (required)**
+  The type of change being made. Must be one of:
+
+  * ``feat``: A new feature
+  * ``fix``: A bug fix
+  * ``docs``: Documentation only changes
+  * ``style``: Changes that do not affect the meaning of the code (white-space, formatting, etc.)
+  * ``refactor``: A code change that neither fixes a bug nor adds a feature
+  * ``test``: Adding missing tests or correcting existing tests
+  * ``chore``: Changes to the build process or auxiliary tools and libraries
+  * ``ci``: Changes to CI configuration files and scripts
+  * ``build``: Changes that affect the build system or external dependencies
+  * ``perf``: A code change that improves performance
+  * ``revert``: Reverts a previous commit
+
+**Scope (optional)**
+  The scope of the change, enclosed in parentheses. Common scopes for this project:
+
+  * ``api``: Changes to API functionality
+  * ``metadata``: Changes to metadata processing
+  * ``citation``: Changes to citation building
+  * ``config``: Changes to configuration handling
+  * ``tests``: Changes to test files
+  * ``docs``: Changes to documentation
+  * ``deps``: Changes to dependencies
+
+**Description (required)**
+  A short description of the change:
+
+  * Use the imperative, present tense: "change" not "changed" nor "changes"
+  * Don't capitalize the first letter
+  * No period (.) at the end
+  * Maximum 50 characters
+
+**Body (optional)**
+  A longer description of the change:
+
+  * Use the imperative, present tense
+  * Wrap at 72 characters
+  * Explain what and why vs. how
+
+**Footer (optional)**
+  One or more footers may be provided:
+
+  * ``BREAKING CHANGE:`` description of breaking changes
+  * ``Closes #123``: reference to closed issues
+  * ``Co-authored-by: Name <email@example.com>``: additional authors
+
+Examples
+--------
+
+**Simple feature addition:**
+
+.. code-block:: text
+
+    feat(api): add support for DOI batch processing
+
+**Bug fix with scope:**
+
+.. code-block:: text
+
+    fix(metadata): handle missing author information gracefully
+
+**Documentation update:**
+
+.. code-block:: text
+
+    docs: update installation instructions
+
+**Breaking change:**
+
+.. code-block:: text
+
+    feat(api): change metadata output format
+
+    BREAKING CHANGE: The metadata output format has changed from JSON
+    to YAML. Users need to update their parsing code accordingly.
+
+**Multi-line with body:**
+
+.. code-block:: text
+
+    refactor(citation): improve author name parsing
+
+    The author name parsing logic has been refactored to handle
+    more edge cases, including names with multiple middle initials
+    and international characters.
+
+    Closes #45
+
+Configuration
+-------------
+
+The project uses a ``.gitlint`` configuration file that enforces:
+
+* Maximum title length of 50 characters
+* Conventional commit format validation
+* Maximum body line length of 72 characters
+* Exclusion of certain words like "WIP", "TODO", "FIXME" in titles
+* Automatic ignoring of merge commits and dependency updates
+
+Linting Tools
+-------------
+
+Manual Linting
+~~~~~~~~~~~~~~~
+
+Use the provided script to lint commit messages:
+
+.. code-block:: bash
+
+    # Lint the last commit
+    python scripts/lint-commit.py
+
+    # Lint a specific commit by hash
+    python scripts/lint-commit.py --hash <commit-hash>
+
+    # Lint a range of commits
+    python scripts/lint-commit.py --range HEAD~3..
+
+    # Check staged commit message
+    python scripts/lint-commit.py --staged
+
+Git Hook Installation
+~~~~~~~~~~~~~~~~~~~~~
+
+Install an automated git hook to check commit messages:
+
+.. code-block:: bash
+
+    python scripts/lint-commit.py --install-hook
+
+This creates a ``commit-msg`` hook that automatically validates commit messages when you commit. The commit will be rejected if the message doesn't meet the requirements.
+
+Direct Gitlint Usage
+~~~~~~~~~~~~~~~~~~~~
+
+You can also use gitlint directly:
+
+.. code-block:: bash
+
+    # Lint last commit
+    gitlint
+
+    # Lint specific commit
+    gitlint --commit <commit-hash>
+
+    # Lint commit range
+    gitlint --commits HEAD~3..
+
+Common Validation Errors
+-------------------------
+
+**Title too long**
+  Keep titles under 50 characters. If you need more space, use the body.
+
+**Invalid type**
+  Use only the allowed types: ``feat``, ``fix``, ``docs``, ``style``, ``refactor``, ``test``, ``chore``, ``ci``, ``build``, ``perf``, ``revert``.
+
+**Missing colon**
+  Don't forget the colon after the type/scope: ``feat(api): add feature``
+
+**Capitalized description**
+  Don't capitalize the first letter of the description: ``feat: add feature`` not ``feat: Add feature``
+
+**Trailing period**
+  Don't add a period at the end of the title: ``feat: add feature`` not ``feat: add feature.``
+
+**Body line too long**
+  Keep body lines under 72 characters. Break long lines appropriately.
+
+Troubleshooting
+---------------
+
+**Gitlint not found**
+  Install development dependencies:
+
+  .. code-block:: bash
+
+      pip install -r requirements-dev.txt
+
+**Hook not working**
+  Ensure the hook is executable:
+
+  .. code-block:: bash
+
+      chmod +x .git/hooks/commit-msg
+
+**Existing commits don't follow format**
+  The linting only applies to new commits. Existing commits can be left as-is or rebased if necessary.
+
+Integration with CI/CD
+----------------------
+
+The commit message linting can be integrated into CI/CD pipelines to ensure all commits in pull requests follow the standard format. This helps maintain consistency across all contributors.
+
+For more information on gitlint configuration and advanced usage, see the `official gitlint documentation <https://jorisroovers.github.io/gitlint/>`_.
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index 00e9719..6c0a5db 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -115,20 +115,47 @@ Development Setup
 
       pip install -r requirements-dev.txt
 
-4. Make your changes
-5. Run tests to ensure everything works
-6. Submit a pull request
+4. Install the git commit message hook (recommended):
+
+   .. code-block:: bash
+
+      python scripts/lint-commit.py --install-hook
+
+5. Make your changes
+6. Run tests to ensure everything works
+7. Validate your commit messages follow the standards
+8. Submit a pull request
 
 Code Style
 ----------
 
 Please follow the existing code style and conventions used in the project. Make sure to:
 
-- Write clear, descriptive commit messages
+- Write clear, descriptive commit messages following the :doc:`commit-messages` standards
 - Add tests for new functionality
 - Update documentation as needed
 - Follow Python best practices
 
+Commit Message Standards
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+All commit messages must follow the Conventional Commits specification. See the :doc:`commit-messages` documentation for detailed information on:
+
+- Required message format
+- Available commit types
+- Examples of proper commit messages
+- How to use the linting tools
+
+To validate your commit messages:
+
+.. code-block:: bash
+
+   # Lint the last commit
+   python scripts/lint-commit.py
+
+   # Install automatic validation hook
+   python scripts/lint-commit.py --install-hook
+
 Submitting Changes
 ------------------
 
@@ -136,6 +163,7 @@ Submitting Changes
 2. Make your changes with appropriate tests
 3. Ensure all tests pass
 4. Update documentation if needed
-5. Submit a pull request with a clear description of your changes
+5. Ensure all commit messages follow the conventional commits format
+6. Submit a pull request with a clear description of your changes
 
 Thank you for contributing to **doi2dataset**!
diff --git a/docs/source/index.rst b/docs/source/index.rst
index ea9015b..608c1d4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -39,4 +39,5 @@ Key Features:
    usage
    modules
    contributing
+   commit-messages
    faq
diff --git a/doi2dataset.py b/doi2dataset.py
index 7d23409..12a3373 100755
--- a/doi2dataset.py
+++ b/doi2dataset.py
@@ -51,11 +51,13 @@ from rich.theme import Theme
 # Get version from setuptools_scm
 try:
     from importlib.metadata import version
+
     __version__ = version("doi2dataset")
 except ImportError:
     # Fallback for older Python versions
     try:
         import pkg_resources
+
         __version__ = pkg_resources.get_distribution("doi2dataset").version
     except Exception:
         __version__ = "1.0.0"  # Fallback version
@@ -74,34 +76,37 @@ from idutils.validators import is_doi  # noqa: E402
 
 # Icon definitions for console output
 ICONS = {
-    'success':    "✓",    # Simple checkmark
-    'error':      "✗",    # Simple X
-    'warning':    "!",    # Simple exclamation
-    'info':       "ℹ",    # Info symbol
-    'processing': "⋯",    # Three dots
-    'done':       "∎",    # Filled square
-    'file':       "⨳",    # Document symbol
-    'folder':     "⊞",    # Folder symbol
-    'clock':      "◷",    # Clock symbol
-    'search':     "⌕",    # Search symbol
-    'data':       "≡",    # Three lines
-    'doi':        "∾",    # Link symbol
-    'total':      "∑",    # Sum symbol
-    'save':       "⤓",    # Save/download arrow
-    'upload':     "⤒"     # Upload arrow
+    "success": "✓",  # Simple checkmark
+    "error": "✗",  # Simple X
+    "warning": "!",  # Simple exclamation
+    "info": "ℹ",  # Info symbol
+    "processing": "⋯",  # Three dots
+    "done": "∎",  # Filled square
+    "file": "⨳",  # Document symbol
+    "folder": "⊞",  # Folder symbol
+    "clock": "◷",  # Clock symbol
+    "search": "⌕",  # Search symbol
+    "data": "≡",  # Three lines
+    "doi": "∾",  # Link symbol
+    "total": "∑",  # Sum symbol
+    "save": "⤓",  # Save/download arrow
+    "upload": "⤒",  # Upload arrow
 }
 
 # Theme configuration for Rich console output
-THEME = Theme({
-    "info": "cyan",
-    "warning": "yellow",
-    "error": "red bold",
-    "success": "green",
-})
+THEME = Theme(
+    {
+        "info": "cyan",
+        "warning": "yellow",
+        "error": "red bold",
+        "success": "green",
+    }
+)
 
 # Available sources for metadata abstracts
 SOURCES = ["openalex", "crossref", "none"]
 
+
 def format_status(icon: str, message: str, style: str = "default") -> str:
     """
     Format a status message with an icon and a given style.
@@ -116,12 +121,15 @@ def format_status(icon: str, message: str, style: str = "default") -> str:
     """
     return f"[{style}]{ICONS[icon]} {message}[/{style}]"
 
+
 class FieldType(Enum):
     """Enum representing different Dataverse field types."""
+
     PRIMITIVE = "primitive"
     COMPOUND = "compound"
     VOCABULARY = "controlledVocabulary"
 
+
 @dataclass
 class BaseMetadataField[T]:
     """
@@ -137,6 +145,7 @@ class BaseMetadataField[T]:
         value (T): The value stored in the field.
         type (FieldType): The type of the field, automatically set based on T.
     """
+
     name: str
     multiple: bool
     value: T
@@ -172,11 +181,13 @@ class BaseMetadataField[T]:
         """
         raise NotImplementedError("Subclasses must implement the to_dict method.")
 
+
 @dataclass
 class PrimitiveMetadataField(BaseMetadataField[str]):
     """
     Metadata field representing a primitive type (e.g., string) for Dataverse.
     """
+
     def _set_type(self) -> None:
         self.type = FieldType.PRIMITIVE
 
@@ -194,7 +205,7 @@ class PrimitiveMetadataField(BaseMetadataField[str]):
                 "typeClass": self.type.value,
                 "multiple": self.multiple,
                 "value": self.value,
-                "expandedValue": self.expanded_value
+                "expandedValue": self.expanded_value,
             }
         else:
             return {
@@ -204,11 +215,13 @@ class PrimitiveMetadataField(BaseMetadataField[str]):
                 "value": self.value,
             }
 
+
 @dataclass
 class ControlledVocabularyMetadataField(BaseMetadataField[str | list[str]]):
     """
     Metadata field for controlled vocabulary values.
     """
+
     def _set_type(self) -> None:
         self.type = FieldType.VOCABULARY
 
@@ -229,11 +242,14 @@ class ControlledVocabularyMetadataField(BaseMetadataField[str | list[str]]):
 
 @dataclass
 class CompoundMetadataField(
-    BaseMetadataField[Sequence[Sequence[PrimitiveMetadataField | ControlledVocabularyMetadataField]]]
+    BaseMetadataField[
+        Sequence[Sequence[PrimitiveMetadataField | ControlledVocabularyMetadataField]]
+    ]
 ):
     """
     Metadata field representing compound types, composed of multiple subfields.
     """
+
     def _set_type(self) -> None:
         self.type = FieldType.COMPOUND
 
@@ -255,9 +271,10 @@ class CompoundMetadataField(
             "typeName": self.name,
             "typeClass": self.type.value,
             "multiple": self.multiple,
-            "value": value_list
+            "value": value_list,
         }
 
+
 @dataclass
 class Institution:
     """
@@ -267,6 +284,7 @@ class Institution:
         display_name (str): The name of the institution.
         ror (str): Research Organization Registry identifier (optional).
     """
+
     display_name: str
     ror: str = ""
 
@@ -282,12 +300,15 @@ class Institution:
             expanded_value = {
                 "scheme": "http://www.grid.ac/ontology/",
                 "termName": self.display_name,
-                "@type": "https://schema.org/Organization"
+                "@type": "https://schema.org/Organization",
             }
-            return PrimitiveMetadataField("authorAffiliation", False, self.ror, expanded_value=expanded_value)
+            return PrimitiveMetadataField(
+                "authorAffiliation", False, self.ror, expanded_value=expanded_value
+            )
         else:
             return PrimitiveMetadataField("authorAffiliation", False, self.display_name)
 
+
 @dataclass
 class Person:
     """
@@ -300,6 +321,7 @@ class Person:
         email (str): Email address (optional).
         affiliation (Institution): Affiliation of the person (optional).
     """
+
     family_name: str
     given_name: str
     orcid: str = ""
@@ -307,34 +329,34 @@ class Person:
     affiliation: Institution | str = ""
 
     def to_dict(self) -> dict[str, str | list[str] | dict[str, str]]:
-            """
-            Convert Person to a dictionary for JSON serialization.
+        """
+        Convert Person to a dictionary for JSON serialization.
 
-            Handles affiliations properly by checking if the affiliation
-            is an Institution object or a string.
+        Handles affiliations properly by checking if the affiliation
+        is an Institution object or a string.
 
-            Returns:
-                dict: A dictionary containing the person's information including
-                     name, contact details, and affiliation.
-            """
-            return_dict: dict[str, str | list[str] | dict[str, str]] = {
-                "family_name": self.family_name,
-                "given_name": self.given_name,
-                "orcid": self.orcid,
-                "email": self.email
-            }
+        Returns:
+            dict: A dictionary containing the person's information including
+                 name, contact details, and affiliation.
+        """
+        return_dict: dict[str, str | list[str] | dict[str, str]] = {
+            "family_name": self.family_name,
+            "given_name": self.given_name,
+            "orcid": self.orcid,
+            "email": self.email,
+        }
 
-            if isinstance(self.affiliation, Institution):
-                if self.affiliation.ror:
-                    return_dict["affiliation"] = self.affiliation.ror
-                elif self.affiliation.display_name:
-                    return_dict["affiliation"] = self.affiliation.display_name
-                else:
-                    return_dict["affiliation"] = ""
+        if isinstance(self.affiliation, Institution):
+            if self.affiliation.ror:
+                return_dict["affiliation"] = self.affiliation.ror
+            elif self.affiliation.display_name:
+                return_dict["affiliation"] = self.affiliation.display_name
             else:
-                return_dict["affiliation"] = self.affiliation if self.affiliation else ""
+                return_dict["affiliation"] = ""
+        else:
+            return_dict["affiliation"] = self.affiliation if self.affiliation else ""
 
-            return return_dict
+        return return_dict
 
     def format_name(self) -> str:
         """
@@ -345,7 +367,9 @@ class Person:
         """
         return f"{self.family_name}, {self.given_name}"
 
-    def author_fields(self) -> list[PrimitiveMetadataField | ControlledVocabularyMetadataField]:
+    def author_fields(
+        self,
+    ) -> list[PrimitiveMetadataField | ControlledVocabularyMetadataField]:
         """
         Build metadata fields for the author.
 
@@ -360,19 +384,23 @@ class Person:
         if isinstance(self.affiliation, Institution):
             affiliation_field = self.affiliation.affiliation_field()
         else:
-            affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation)
+            affiliation_field = PrimitiveMetadataField(
+                "authorAffiliation", False, self.affiliation
+            )
 
         if self.orcid:
             return [
                 PrimitiveMetadataField("authorName", False, self.format_name()),
                 affiliation_field,
-                ControlledVocabularyMetadataField("authorIdentifierScheme", False, "ORCID"),
-                PrimitiveMetadataField("authorIdentifier", False, self.orcid)
+                ControlledVocabularyMetadataField(
+                    "authorIdentifierScheme", False, "ORCID"
+                ),
+                PrimitiveMetadataField("authorIdentifier", False, self.orcid),
             ]
         else:
             return [
                 PrimitiveMetadataField("authorName", False, self.format_name()),
-                affiliation_field
+                affiliation_field,
             ]
 
     def dataset_contact_fields(self) -> list[PrimitiveMetadataField]:
@@ -391,12 +419,14 @@ class Person:
         if isinstance(self.affiliation, Institution):
             affiliation_field = self.affiliation.affiliation_field()
         else:
-            affiliation_field = PrimitiveMetadataField("datasetContactAffiliation", False, self.affiliation)
+            affiliation_field = PrimitiveMetadataField(
+                "datasetContactAffiliation", False, self.affiliation
+            )
 
         return [
             PrimitiveMetadataField("datasetContactName", False, self.format_name()),
             affiliation_field,
-            PrimitiveMetadataField("datasetContactEmail", False, self.email)
+            PrimitiveMetadataField("datasetContactEmail", False, self.email),
         ]
 
 
@@ -410,10 +440,12 @@ class License:
         uri (str): The license URI.
         short (str): The short identifier of the license.
     """
+
     name: str
     uri: str
     short: str
 
+
 @dataclass
 class Abstract:
     """
@@ -423,6 +455,7 @@ class Abstract:
         text (str): The abstract text.
         source (str): The source of the abstract ('crossref', 'openalex', or 'none').
     """
+
     text: str
     source: str
 
@@ -435,7 +468,10 @@ class Abstract:
         """
         allowed_sources = ["crossref", "openalex", "none"]
         if self.source not in allowed_sources:
-            raise ValueError(f"{self.source} is not valid! Needs to be one of {str(allowed_sources)}.")
+            raise ValueError(
+                f"{self.source} is not valid! Needs to be one of {str(allowed_sources)}."
+            )
+
 
 @dataclass
 class ConfigData:
@@ -447,18 +483,21 @@ class ConfigData:
         pis (list[dict[str, Any]]): List of principal investigator configurations.
         default_grants (list[dict[str, str]]): Default grant configurations.
     """
+
     dataverse: dict[str, str]
     pis: list[dict[str, Any]]
     default_grants: list[dict[str, str]]
 
+
 class Config:
     """
     Singleton class to handle configuration loading and retrieval.
     """
-    _instance: 'Config | None' = None
+
+    _instance: "Config | None" = None
     _config_data: ConfigData | None = None
 
-    def __new__(cls) -> 'Config':
+    def __new__(cls) -> "Config":
         """
         Create and return the singleton instance of Config.
 
@@ -489,20 +528,22 @@ class Config:
         if not config_path.exists():
             raise FileNotFoundError(f"Config file not found: {config_path}")
 
-        with open(config_path, encoding='utf-8') as f:
+        with open(config_path, encoding="utf-8") as f:
             config_data = yaml.safe_load(f)
 
         # Validate PI email addresses
-        pis = config_data.get('pis', [])
+        pis = config_data.get("pis", [])
         for pi in pis:
-            if email := pi.get('email'):
+            if email := pi.get("email"):
                 if not validate_email_address(email):
-                    raise ValueError(f"Configuration Error: Invalid email address for PI {pi.get('given_name', '')} {pi.get('family_name', '')}: {email}")
+                    raise ValueError(
+                        f"Configuration Error: Invalid email address for PI {pi.get('given_name', '')} {pi.get('family_name', '')}: {email}"
+                    )
 
         cls._config_data = ConfigData(
-            dataverse=config_data.get('dataverse', {}),
-            pis=config_data.get('pis', []),
-            default_grants=config_data.get('default_grants', [])
+            dataverse=config_data.get("dataverse", {}),
+            pis=config_data.get("pis", []),
+            default_grants=config_data.get("default_grants", []),
         )
 
     @classmethod
@@ -552,6 +593,7 @@ class Config:
         """
         return self.get_config().dataverse
 
+
 class APIClient:
     """
     Client for making HTTP requests to external APIs.
@@ -559,7 +601,13 @@ class APIClient:
     Attributes:
         session (requests.Session): The underlying requests session.
     """
-    def __init__(self, contact_mail: str | None = None, user_agent: str = f"UDE-Doi2Dataset/{__version__}", token: str | None = None) -> None:
+
+    def __init__(
+        self,
+        contact_mail: str | None = None,
+        user_agent: str = f"UDE-Doi2Dataset/{__version__}",
+        token: str | None = None,
+    ) -> None:
         """
         Initialize the API client with optional contact mail, user agent, and token.
 
@@ -571,7 +619,9 @@ class APIClient:
         self.session = requests.Session()
         self._set_headers(contact_mail, user_agent, token)
 
-    def _set_headers(self, contact_mail: str | None, user_agent: str, token: str | None) -> None:
+    def _set_headers(
+        self, contact_mail: str | None, user_agent: str, token: str | None
+    ) -> None:
         """
         Set HTTP headers for the session based on contact email and token.
 
@@ -590,7 +640,9 @@ class APIClient:
 
         self.session.headers.update(header)
 
-    def make_request(self, url: str, method: str = "GET", **kwargs: Any) -> requests.Response | None:
+    def make_request(
+        self, url: str, method: str = "GET", **kwargs: Any
+    ) -> requests.Response | None:
         """
         Make an HTTP request and return the response.
 
@@ -610,10 +662,12 @@ class APIClient:
             print(f"\n{ICONS['error']} Request failed: {str(e)}")
             return None
 
+
 class NameProcessor:
     """
     Provides utility methods for processing names.
     """
+
     @staticmethod
     def normalize_string(s: str) -> str:
         """
@@ -625,7 +679,11 @@ class NameProcessor:
         Returns:
             str: The normalized string.
         """
-        return unicodedata.normalize("NFKD", s.lower()).encode("ASCII", "ignore").decode("ASCII")
+        return (
+            unicodedata.normalize("NFKD", s.lower())
+            .encode("ASCII", "ignore")
+            .decode("ASCII")
+        )
 
     @staticmethod
     def split_name(full_name: str) -> tuple[str, str]:
@@ -648,10 +706,12 @@ class NameProcessor:
 
         return " ".join(parts[:-1]), parts[-1]
 
+
 class PIFinder:
     """
     Finds principal investigators (PIs) among a list of Person objects.
     """
+
     def __init__(self, pis: list[Person]) -> None:
         """
         Initialize with a list of Person objects representing potential PIs.
@@ -661,7 +721,12 @@ class PIFinder:
         """
         self.pis = pis
 
-    def find_pi(self, family_name: str | None = None, orcid: str | None = None, given_name: str | None = None) -> Person | None:
+    def find_pi(
+        self,
+        family_name: str | None = None,
+        orcid: str | None = None,
+        given_name: str | None = None,
+    ) -> Person | None:
         """
         Find a PI by ORCID or name.
 
@@ -711,7 +776,10 @@ class PIFinder:
         normalized_family_name = NameProcessor.normalize_string(family_name)
 
         for person in self.pis:
-            if NameProcessor.normalize_string(person.family_name) == normalized_family_name:
+            if (
+                NameProcessor.normalize_string(person.family_name)
+                == normalized_family_name
+            ):
                 matches.append(person)
 
         if not matches:
@@ -720,7 +788,10 @@ class PIFinder:
         if given_name:
             normalized_given_name = NameProcessor.normalize_string(given_name)
             for match in matches:
-                if NameProcessor.normalize_string(match.given_name) == normalized_given_name:
+                if (
+                    NameProcessor.normalize_string(match.given_name)
+                    == normalized_given_name
+                ):
                     return match
             return None
 
@@ -729,19 +800,30 @@ class PIFinder:
 
         raise ValueError("Multiple matches found for family name")
 
+
 class LicenseProcessor:
     """
     Processes license information from metadata.
     """
+
     LICENSE_MAP = {
         "cc-by": ("https://creativecommons.org/licenses/by/4.0/", "CC BY 4.0"),
         "cc-by-sa": ("https://creativecommons.org/licenses/by-sa/4.0/", "CC BY-SA 4.0"),
         "cc-by-nc": ("https://creativecommons.org/licenses/by-nc/4.0/", "CC BY-NC 4.0"),
-        "cc-by-nc-sa": ("https://creativecommons.org/licenses/by-nc-sa/4.0/", "CC BY-NC-SA 4.0"),
-        "cc-by-nc-nd": ("https://creativecommons.org/licenses/by-nc-nd/4.0/", "CC BY-NC-ND 4.0"),
+        "cc-by-nc-sa": (
+            "https://creativecommons.org/licenses/by-nc-sa/4.0/",
+            "CC BY-NC-SA 4.0",
+        ),
+        "cc-by-nc-nd": (
+            "https://creativecommons.org/licenses/by-nc-nd/4.0/",
+            "CC BY-NC-ND 4.0",
+        ),
         "cc-by-nd": ("https://creativecommons.org/licenses/by-nd/4.0/", "CC BY-ND 4.0"),
         "cc0": ("https://creativecommons.org/publicdomain/zero/1.0/", "CC0 1.0"),
-        "pd": ("https://creativecommons.org/publicdomain/mark/1.0/", "Public Domain Mark 1.0"),
+        "pd": (
+            "https://creativecommons.org/publicdomain/mark/1.0/",
+            "Public Domain Mark 1.0",
+        ),
     }
 
     @classmethod
@@ -765,10 +847,12 @@ class LicenseProcessor:
         uri, name = cls.LICENSE_MAP.get(base_license, ("", license_short))
         return License(name=name, uri=uri, short=license_short)
 
+
 class AbstractProcessor:
     """
     Retrieves and processes abstracts from CrossRef and OpenAlex.
     """
+
     def __init__(self, api_client: APIClient, console: Console | None = None):
         """
         Initialize with an APIClient instance.
@@ -780,7 +864,9 @@ class AbstractProcessor:
         self.api_client = api_client
         self.console = console or Console()
 
-    def get_abstract(self, doi: str, data: dict[str, Any], license: License) -> Abstract:
+    def get_abstract(
+        self, doi: str, data: dict[str, Any], license: License
+    ) -> Abstract:
         """
         Get an abstract based on DOI and license permissions.
 
@@ -795,26 +881,42 @@ class AbstractProcessor:
         license_ok = {"cc-by", "cc-by-sa", "cc-by-nc", "cc-by-nc-sa", "cc0", "pd"}
 
         if license.short in license_ok:
-            self.console.print(f"\n{ICONS['info']} License {license.name} allows derivative works. Pulling abstract from CrossRef.", style="info")
+            self.console.print(
+                f"\n{ICONS['info']} License {license.name} allows derivative works. Pulling abstract from CrossRef.",
+                style="info",
+            )
             crossref_abstract = self._get_crossref_abstract(doi)
             if crossref_abstract:
                 return Abstract(text=crossref_abstract, source="crossref")
             else:
-                self.console.print(f"\n{ICONS['warning']} No abstract found in CrossRef!", style="warning")
+                self.console.print(
+                    f"\n{ICONS['warning']} No abstract found in CrossRef!",
+                    style="warning",
+                )
         else:
             if license.name:
-                self.console.print(f"\n{ICONS['info']} License {license.name} does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
+                self.console.print(
+                    f"\n{ICONS['info']} License {license.name} does not allow derivative works. Reconstructing abstract from OpenAlex!",
+                    style="info",
+                )
             else:
-                self.console.print(f"\n{ICONS['info']} Custom license does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
-
+                self.console.print(
+                    f"\n{ICONS['info']} Custom license does not allow derivative works. Reconstructing abstract from OpenAlex!",
+                    style="info",
+                )
 
         openalex_abstract = self._get_openalex_abstract(data)
         if openalex_abstract:
             return Abstract(text=openalex_abstract, source="openalex")
         else:
-            self.console.print(f"\n{ICONS['warning']} No abstract found in OpenAlex!", style="warning")
+            self.console.print(
+                f"\n{ICONS['warning']} No abstract found in OpenAlex!", style="warning"
+            )
 
-        self.console.print(f"\n{ICONS['warning']} No abstract found in either CrossRef nor OpenAlex!", style="warning")
+        self.console.print(
+            f"\n{ICONS['warning']} No abstract found in either CrossRef nor OpenAlex!",
+            style="warning",
+        )
         return Abstract(text="", source="none")
 
     def _get_crossref_abstract(self, doi: str) -> str | None:
@@ -849,7 +951,9 @@ class AbstractProcessor:
         if not inv_index:
             return None
 
-        word_positions = [(word, pos) for word, positions in inv_index.items() for pos in positions]
+        word_positions = [
+            (word, pos) for word, positions in inv_index.items() for pos in positions
+        ]
         sorted_words = sorted(word_positions, key=lambda x: x[1])
         return " ".join(word for word, _ in sorted_words)
 
@@ -874,13 +978,13 @@ class AbstractProcessor:
 
         # Replace closing tags that follow ordered list openings
         # This regex matches </jats:list> that comes after <ol> tags
-        pattern = r'(<ol>.*?)</jats:list>'
-        text = re.sub(pattern, r'\1</ol>', text, flags=re.DOTALL)
+        pattern = r"(<ol>.*?)</jats:list>"
+        text = re.sub(pattern, r"\1</ol>", text, flags=re.DOTALL)
 
         # Process unordered lists second
         text = text.replace('<jats:list list-type="bullet">', "<ul>")
         # Replace remaining </jats:list> tags as unordered list closings
-        text = text.replace('</jats:list>', '</ul>')
+        text = text.replace("</jats:list>", "</ul>")
 
         # Handle other JATS tags
         replacements = {
@@ -912,10 +1016,12 @@ class AbstractProcessor:
             text = text.replace(jats_tag, html_tag)
         return text
 
+
 class SubjectMapper:
     """
     Maps subject names from input data to controlled vocabulary.
     """
+
     CONTROLLED_VOCAB = {
         "Agricultural Sciences": "Agricultural Sciences",
         "Arts and Humanities": "Arts and Humanities",
@@ -939,7 +1045,9 @@ class SubjectMapper:
     }
 
     @classmethod
-    def get_subjects(cls, data: dict[str, Any], fallback_subject: str = "Other") -> list[str]:
+    def get_subjects(
+        cls, data: dict[str, Any], fallback_subject: str = "Other"
+    ) -> list[str]:
         """
         Extract and map subjects from input data.
 
@@ -961,7 +1069,6 @@ class SubjectMapper:
         mapped_subjects = cls.map_subjects(subject_collection)
         return mapped_subjects if mapped_subjects else [fallback_subject]
 
-
     @classmethod
     def map_subjects(cls, subjects: list[str]) -> list[str]:
         """
@@ -979,11 +1086,15 @@ class SubjectMapper:
                 valid_subjects.add(mapped_subject)
         return list(valid_subjects)
 
+
 class CitationBuilder:
     """
     Builds various citation-related metadata fields.
     """
-    def __init__(self, data: dict[str, Any], doi: str, pi_finder: PIFinder, ror: bool = False) -> None:
+
+    def __init__(
+        self, data: dict[str, Any], doi: str, pi_finder: PIFinder, ror: bool = False
+    ) -> None:
         """
         Initialize the CitationBuilder with data, DOI, and a PIFinder.
 
@@ -1004,18 +1115,22 @@ class CitationBuilder:
         Returns:
             list[list[PrimitiveMetadataField]]: Nested list of identifier metadata fields.
         """
-        other_ids = [[
-            PrimitiveMetadataField("otherIdAgency", False, "doi"),
-            PrimitiveMetadataField("otherIdValue", False, self.doi)
-        ]]
+        other_ids = [
+            [
+                PrimitiveMetadataField("otherIdAgency", False, "doi"),
+                PrimitiveMetadataField("otherIdValue", False, self.doi),
+            ]
+        ]
 
         if pmid := self.data.get("ids", {}).get("pmid"):
             try:
                 normalized_pmid = normalize_pmid(pmid)
-                other_ids.append([
-                    PrimitiveMetadataField("otherIdAgency", False, "pmid"),
-                    PrimitiveMetadataField("otherIdValue", False, normalized_pmid)
-                ])
+                other_ids.append(
+                    [
+                        PrimitiveMetadataField("otherIdAgency", False, "pmid"),
+                        PrimitiveMetadataField("otherIdValue", False, normalized_pmid),
+                    ]
+                )
             except ValueError:
                 pass
 
@@ -1034,7 +1149,12 @@ class CitationBuilder:
         grants: list[list[PrimitiveMetadataField]] = []
 
         for grant in default_grants:
-            grants.append([PrimitiveMetadataField("grantNumberAgency", False, grant["funder"]), PrimitiveMetadataField("grantNumberValue", False, grant["id"])])
+            grants.append(
+                [
+                    PrimitiveMetadataField("grantNumberAgency", False, grant["funder"]),
+                    PrimitiveMetadataField("grantNumberValue", False, grant["id"]),
+                ]
+            )
 
         for grant in self.data.get("grants", []):
             grant_funder = grant.get("funder_display_name", {})
@@ -1042,11 +1162,15 @@ class CitationBuilder:
             if not grant_funder or not grant_id:
                 continue
 
-            grants.append([PrimitiveMetadataField("grantNumberAgency", False, grant_funder), PrimitiveMetadataField("grantNumberValue", False, grant_id)])
+            grants.append(
+                [
+                    PrimitiveMetadataField("grantNumberAgency", False, grant_funder),
+                    PrimitiveMetadataField("grantNumberValue", False, grant_id),
+                ]
+            )
 
         return grants
 
-
     def build_authors(self) -> tuple[list[Person], list[Person]]:
         """
         Build lists of authors and corresponding authors from the metadata.
@@ -1065,14 +1189,17 @@ class CitationBuilder:
             authors.append(author_person)
 
             if authorship.get("is_corresponding"):
-                corresponding_entry = self._process_corresponding_author(author_person, authorship)
+                corresponding_entry = self._process_corresponding_author(
+                    author_person, authorship
+                )
                 if corresponding_entry:
                     corresponding_authors.append(corresponding_entry)
 
         return authors, corresponding_authors
 
-
-    def _process_author(self, author: dict[str, Any], authorship: dict[str, Any]) -> Person:
+    def _process_author(
+        self, author: dict[str, Any], authorship: dict[str, Any]
+    ) -> Person:
         """
         Process author data and return a Person instance.
 
@@ -1089,7 +1216,9 @@ class CitationBuilder:
         person = Person(family_name, given_name)
 
         if affiliations := authorship.get("affiliations"):
-            affiliation = Institution(affiliations[0].get("raw_affiliation_string", "").strip())
+            affiliation = Institution(
+                affiliations[0].get("raw_affiliation_string", "").strip()
+            )
 
             person.affiliation = affiliation
 
@@ -1097,18 +1226,20 @@ class CitationBuilder:
             if institutions := authorship.get("institutions"):
                 institution = institutions[0]
                 if institution.get("ror"):
-                    affiliation = Institution(institution.get("display_name"), institution.get("ror"))
+                    affiliation = Institution(
+                        institution.get("display_name"), institution.get("ror")
+                    )
 
                     person.affiliation = affiliation
 
-
         if orcid := author.get("orcid"):
             person.orcid = normalize_orcid(orcid)
 
         return person
 
-
-    def _process_corresponding_author(self, author: Person, authorship: dict[str, Any]) -> Person | None:
+    def _process_corresponding_author(
+        self, author: Person, authorship: dict[str, Any]
+    ) -> Person | None:
         """
         Identify the corresponding author based on provided PI information.
 
@@ -1122,7 +1253,7 @@ class CitationBuilder:
         pi = self.pi_finder.find_pi(
             family_name=author.family_name,
             given_name=author.given_name,
-            orcid=author.orcid
+            orcid=author.orcid,
         )
 
         if not pi:
@@ -1141,19 +1272,26 @@ class CitationBuilder:
 
         for topic in self.data.get("topics", []):
             if topic.get("score") >= 0.8:
-
-                topic_class_value_field = PrimitiveMetadataField("topicClassValue",
-                    False, topic.get("display_name"))
-                topic_class_vocab_field = PrimitiveMetadataField("topicClassVocab",
-                    False, "OpenAlex")
+                topic_class_value_field = PrimitiveMetadataField(
+                    "topicClassValue", False, topic.get("display_name")
+                )
+                topic_class_vocab_field = PrimitiveMetadataField(
+                    "topicClassVocab", False, "OpenAlex"
+                )
                 topic_class_vocab_uri_field = PrimitiveMetadataField(
-                    "topicClassVocabURI", False, topic.get("id"))
+                    "topicClassVocabURI", False, topic.get("id")
+                )
 
-                topics.append([topic_class_value_field, topic_class_vocab_field, topic_class_vocab_uri_field])
+                topics.append(
+                    [
+                        topic_class_value_field,
+                        topic_class_vocab_field,
+                        topic_class_vocab_uri_field,
+                    ]
+                )
 
         return topics
 
-
     def build_keywords(self) -> list[list[PrimitiveMetadataField]]:
         """
         Build metadata fields for keywords from both regular keywords and MeSH terms.
@@ -1166,7 +1304,9 @@ class CitationBuilder:
         for keyword in self.data.get("keywords", []):
             # Filter out possibly unrelated keywords (low score)
             if keyword["score"] >= 0.5:
-                keyword_value_field = PrimitiveMetadataField("keywordValue", False, keyword["display_name"])
+                keyword_value_field = PrimitiveMetadataField(
+                    "keywordValue", False, keyword["display_name"]
+                )
                 keywords.append([keyword_value_field])
 
         mesh_base_url = "http://id.nlm.nih.gov/mesh"
@@ -1175,21 +1315,37 @@ class CitationBuilder:
             if mesh["qualifier_ui"]:
                 url = f"{url}{mesh['qualifier_ui']}"
 
+            keyword_value_field = PrimitiveMetadataField(
+                "keywordValue", False, mesh["descriptor_name"]
+            )
+            keyword_term_uri_field = PrimitiveMetadataField(
+                "keywordTermURI", False, url
+            )
+            keyword_vocabulary_field = PrimitiveMetadataField(
+                "keywordVocabulary", False, "MeSH"
+            )
+            keyword_vocabulary_uri_field = PrimitiveMetadataField(
+                "keywordVocabularyURI", False, mesh_base_url
+            )
 
-            keyword_value_field = PrimitiveMetadataField("keywordValue", False, mesh["descriptor_name"])
-            keyword_term_uri_field = PrimitiveMetadataField("keywordTermURI", False, url)
-            keyword_vocabulary_field = PrimitiveMetadataField("keywordVocabulary", False, "MeSH")
-            keyword_vocabulary_uri_field = PrimitiveMetadataField("keywordVocabularyURI", False, mesh_base_url)
-
-            keywords.append([keyword_value_field, keyword_term_uri_field, keyword_vocabulary_field, keyword_vocabulary_uri_field])
+            keywords.append(
+                [
+                    keyword_value_field,
+                    keyword_term_uri_field,
+                    keyword_vocabulary_field,
+                    keyword_vocabulary_uri_field,
+                ]
+            )
 
         return keywords
 
+
 class MetadataProcessor:
     """
     Processes metadata for a given DOI by fetching data from OpenAlex,
     building metadata blocks, and optionally uploading the dataset.
     """
+
     def __init__(
         self,
         doi: str,
@@ -1198,10 +1354,10 @@ class MetadataProcessor:
         default_subject: str = "Other",
         contact_mail: str | None = None,
         upload: bool = False,
-        ror: bool= False,
+        ror: bool = False,
         console: Console | None = None,
         progress: Progress | None = None,
-        task_id: TaskID | None = None
+        task_id: TaskID | None = None,
     ) -> None:
         """
         Initialize the MetadataProcessor with configuration and processing options.
@@ -1259,7 +1415,7 @@ class MetadataProcessor:
         Advance the progress bar if enabled.
         """
         if self.progress and self.task_id is not None:
-                self.progress.advance(self.task_id)
+            self.progress.advance(self.task_id)
 
     def process(self) -> dict[str, Any]:
         """
@@ -1268,7 +1424,9 @@ class MetadataProcessor:
         Returns:
             dict[str, Any]: The constructed metadata dictionary.
         """
-        self.console.print(f"{ICONS['processing']} Processing DOI: {self.doi}", style="info")
+        self.console.print(
+            f"{ICONS['processing']} Processing DOI: {self.doi}", style="info"
+        )
 
         data = self._fetch_data()
         self._update_progress()
@@ -1283,7 +1441,10 @@ class MetadataProcessor:
         self._save_output(metadata)
         self._update_progress()
 
-        self.console.print(f"\n{ICONS['success']} Successfully processed: {self.doi}\n", style="success")
+        self.console.print(
+            f"\n{ICONS['success']} Successfully processed: {self.doi}\n",
+            style="success",
+        )
         return metadata
 
     def _upload_data(self, metadata: dict[str, Any]) -> dict[str, Any]:
@@ -1301,19 +1462,25 @@ class MetadataProcessor:
         """
         config = Config()
 
-        token = config.DATAVERSE['api_token']
+        token = config.DATAVERSE["api_token"]
         client = APIClient(token=token)
         url = f"{config.DATAVERSE['url']}/api/dataverses/{config.DATAVERSE['dataverse']}/datasets?doNotValidate=true"
-        auth = (config.DATAVERSE['auth_user'], config.DATAVERSE['auth_password'])
+        auth = (config.DATAVERSE["auth_user"], config.DATAVERSE["auth_password"])
 
         response = client.make_request(url, method="POST", auth=auth, json=metadata)
 
         if response is None or response.status_code != 201:
-            self.console.print(f"\n{ICONS['error']} Failed to upload to Dataverse: {url}", style="error")
+            self.console.print(
+                f"\n{ICONS['error']} Failed to upload to Dataverse: {url}",
+                style="error",
+            )
             raise ValueError(f"Failed to upload to Dataverse: {url}")
         else:
             perma = response.json().get("data", {}).get("persistentId", "")
-            self.console.print(f"{ICONS['upload']} Dataset uploaded to: {config.DATAVERSE['dataverse']} with ID {perma}", style="info")
+            self.console.print(
+                f"{ICONS['upload']} Dataset uploaded to: {config.DATAVERSE['dataverse']} with ID {perma}",
+                style="info",
+            )
 
         return response.json()
 
@@ -1331,7 +1498,10 @@ class MetadataProcessor:
         response = self.api_client.make_request(url)
 
         if response is None or response.status_code != 200:
-            self.console.print(f"\n{ICONS['error']} Failed to fetch data for DOI: {self.doi}", style="error")
+            self.console.print(
+                f"\n{ICONS['error']} Failed to fetch data for DOI: {self.doi}",
+                style="error",
+            )
             raise ValueError(f"Failed to fetch data for DOI: {self.doi}")
 
         return response.json()
@@ -1353,20 +1523,27 @@ class MetadataProcessor:
 
         authors, corresponding_authors = citation_builder.build_authors()
 
-        author_fields: list[list[PrimitiveMetadataField | ControlledVocabularyMetadataField]] = []
+        author_fields: list[
+            list[PrimitiveMetadataField | ControlledVocabularyMetadataField]
+        ] = []
         corresponding_author_fields: list[list[PrimitiveMetadataField]] = []
         for author in authors:
             author_fields.append(author.author_fields())
 
         if not corresponding_authors:
-            self.console.print(f"{ICONS['warning']}  No corresponding authors explicitly declared; PIs are used as a fallback!", style="warning")
+            self.console.print(
+                f"{ICONS['warning']}  No corresponding authors explicitly declared; PIs are used as a fallback!",
+                style="warning",
+            )
             pis = self._get_involved_pis(data)
             corresponding_authors: list[Person]
             for pi in pis:
                 corresponding_authors.append(pi)
 
         for corresponding_author in corresponding_authors:
-            corresponding_author_fields.append(corresponding_author.dataset_contact_fields())
+            corresponding_author_fields.append(
+                corresponding_author.dataset_contact_fields()
+            )
 
         description = self._build_description(data, abstract)
 
@@ -1377,33 +1554,78 @@ class MetadataProcessor:
                 "metadataBlocks": {
                     "citation": {
                         "fields": [
-                            PrimitiveMetadataField("title", False, data.get("title", "")).to_dict(),
-                            PrimitiveMetadataField("distributionDate", False, data.get("publication_date", "")).to_dict(),
-                            CompoundMetadataField("otherId", True, citation_builder.build_other_ids()).to_dict(),
-                            CompoundMetadataField("dsDescription", True, [[PrimitiveMetadataField("dsDescriptionValue", False, description)]]).to_dict(),
-                            ControlledVocabularyMetadataField("subject", True, SubjectMapper.get_subjects(data, self.default_subject)).to_dict(),
-                            CompoundMetadataField("topicClassification", True, citation_builder.build_topics()).to_dict(),
-                            CompoundMetadataField("keyword", True, citation_builder.build_keywords()).to_dict(),
-                            PrimitiveMetadataField("depositor", False, self.depositor or data["primary_location"]["source"].get("display_name", "")).to_dict(),
-                            PrimitiveMetadataField("alternativeURL", False, f"https://doi.org/{self.doi}").to_dict(),
-                            CompoundMetadataField("author", True, author_fields).to_dict(),
-                            CompoundMetadataField("datasetContact", True, corresponding_author_fields).to_dict(),
-                            CompoundMetadataField("grantNumber", True, grants).to_dict()
+                            PrimitiveMetadataField(
+                                "title", False, data.get("title", "")
+                            ).to_dict(),
+                            PrimitiveMetadataField(
+                                "distributionDate",
+                                False,
+                                data.get("publication_date", ""),
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "otherId", True, citation_builder.build_other_ids()
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "dsDescription",
+                                True,
+                                [
+                                    [
+                                        PrimitiveMetadataField(
+                                            "dsDescriptionValue", False, description
+                                        )
+                                    ]
+                                ],
+                            ).to_dict(),
+                            ControlledVocabularyMetadataField(
+                                "subject",
+                                True,
+                                SubjectMapper.get_subjects(data, self.default_subject),
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "topicClassification",
+                                True,
+                                citation_builder.build_topics(),
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "keyword", True, citation_builder.build_keywords()
+                            ).to_dict(),
+                            PrimitiveMetadataField(
+                                "depositor",
+                                False,
+                                self.depositor
+                                or data["primary_location"]["source"].get(
+                                    "display_name", ""
+                                ),
+                            ).to_dict(),
+                            PrimitiveMetadataField(
+                                "alternativeURL", False, f"https://doi.org/{self.doi}"
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "author", True, author_fields
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "datasetContact", True, corresponding_author_fields
+                            ).to_dict(),
+                            CompoundMetadataField(
+                                "grantNumber", True, grants
+                            ).to_dict(),
                         ],
-                        "displayName": "Citation Metadata"
+                        "displayName": "Citation Metadata",
                     }
                 },
-                "files": []
+                "files": [],
             }
         }
 
         if license_info.name:
             return_dict["datasetVersion"]["license"] = {
                 "name": license_info.name,
-                "uri": license_info.uri
+                "uri": license_info.uri,
             }
         else:
-            return_dict["datasetVersion"]["termsOfUse"] = f"All rights reserved. Copyright © {self._get_publication_year(data)}, [TODO: Insert copyright holder here!]"
+            return_dict["datasetVersion"]["termsOfUse"] = (
+                f"All rights reserved. Copyright © {self._get_publication_year(data)}, [TODO: Insert copyright holder here!]"
+            )
 
         return return_dict
 
@@ -1442,7 +1664,10 @@ class MetadataProcessor:
         elif all([journal, publication_date, type]):
             return f"<p>This {type} was published on {publication_date} in <i>{journal}</i></p>"
 
-        self.console.print(f"{ICONS['warning']}  No abstract header added, missing information (journal, publication date and/or document type)", style="warning")
+        self.console.print(
+            f"{ICONS['warning']}  No abstract header added, missing information (journal, publication date and/or document type)",
+            style="warning",
+        )
         return ""
 
     def _get_publication_year(self, data: dict[str, Any]) -> str:
@@ -1457,8 +1682,6 @@ class MetadataProcessor:
         """
         return data.get("publication_year", "")
 
-
-
     def _get_involved_pis(self, data: dict[str, Any]) -> list[Person]:
         """
         Identify involved principal investigators from the metadata for use as fallback
@@ -1486,7 +1709,7 @@ class MetadataProcessor:
             if pi := self.pi_finder.find_pi(
                 family_name=family_name,
                 given_name=given_name,
-                orcid=author.get("orcid")
+                orcid=author.get("orcid"),
             ):
                 involved_pis.append(pi)
 
@@ -1516,20 +1739,29 @@ class MetadataProcessor:
                     Returns:
                         A JSON-serializable representation of the object.
                     """
+
                     def default(self, o: Any) -> Any:
-                        if hasattr(o, 'to_dict'):
+                        if hasattr(o, "to_dict"):
                             return o.to_dict()
                         return super().default(o)
 
                 with open(self.output_path, "w", encoding="utf-8") as f:
-                    json.dump(metadata, f, indent=4, ensure_ascii=False, cls=CustomEncoder)
-                self.console.print(f"{ICONS['save']} Metadata saved in: {self.output_path}", style="info")
+                    json.dump(
+                        metadata, f, indent=4, ensure_ascii=False, cls=CustomEncoder
+                    )
+                self.console.print(
+                    f"{ICONS['save']} Metadata saved in: {self.output_path}",
+                    style="info",
+                )
             except Exception as e:
-                self.console.print(f"{ICONS['error']} Error saving metadata: {str(e)}\n", style="error")
+                self.console.print(
+                    f"{ICONS['error']} Error saving metadata: {str(e)}\n", style="error"
+                )
                 raise
         else:
             self.console.print(metadata)
 
+
 def sanitize_filename(doi: str) -> str:
     """
     Convert DOI to a valid filename using only alphanumeric characters and underscores.
@@ -1541,12 +1773,13 @@ def sanitize_filename(doi: str) -> str:
         str: Sanitized filename string.
     """
     # Replace non-alphanumeric characters with underscores
-    sanitized = ''.join(c if c.isalnum() else '_' for c in doi)
+    sanitized = "".join(c if c.isalnum() else "_" for c in doi)
     # Remove consecutive underscores
-    while '__' in sanitized:
-        sanitized = sanitized.replace('__', '_')
+    while "__" in sanitized:
+        sanitized = sanitized.replace("__", "_")
     # Remove leading/trailing underscores
-    return sanitized.strip('_')
+    return sanitized.strip("_")
+
 
 def print_summary(results: dict[str, list[Any]], console: Console) -> None:
     """
@@ -1565,19 +1798,21 @@ def print_summary(results: dict[str, list[Any]], console: Console) -> None:
     table.add_row(
         f"{ICONS['success']} Success",
         str(len(results["success"])),
-        ", ".join(results["success"][:3]) + ("..." if len(results["success"]) > 3 else "")
+        ", ".join(results["success"][:3])
+        + ("..." if len(results["success"]) > 3 else ""),
     )
 
     if results["failed"]:
         table.add_row(
             f"{ICONS['error']} Failed",
             str(len(results["failed"])),
-            ", ".join(doi for doi, _ in results["failed"][:3]) +
-            ("..." if len(results["failed"]) > 3 else "")
+            ", ".join(doi for doi, _ in results["failed"][:3])
+            + ("..." if len(results["failed"]) > 3 else ""),
         )
 
     console.print(Panel(table, title="Summary", border_style="blue"))
 
+
 def validate_email_address(email: str):
     """
     Validate an email address and ensure its domain has an MX record.
@@ -1594,13 +1829,14 @@ def validate_email_address(email: str):
         email = valid.normalized
 
         # Check domain has MX record
-        domain = email.split('@')[1]
-        dns.resolver.resolve(domain, 'MX')
+        domain = email.split("@")[1]
+        dns.resolver.resolve(domain, "MX")
 
         return True
     except (EmailNotValidError, dns.resolver.NoAnswer, dns.resolver.NXDOMAIN):
         return False
 
+
 def process_doi_batch(
     dois: set[str],
     output_dir: Path,
@@ -1609,7 +1845,7 @@ def process_doi_batch(
     contact_mail: str | None = None,
     upload: bool = False,
     ror: bool = False,
-    console: Console | None = None
+    console: Console | None = None,
 ) -> dict[str, list[Any]]:
     """
     Process a batch of DOIs and return a summary of results.
@@ -1648,30 +1884,25 @@ def process_doi_batch(
     if upload:
         doi_total_steps = 4  # Fetch, Build, Upload, Save
     else:
-        doi_total_steps = 3 # Fetch, Build, Save
+        doi_total_steps = 3  # Fetch, Build, Save
 
     with Progress(
         *progress_columns,
         console=console,
-        transient=True  # This makes the progress bar disappear after completion
+        transient=True,  # This makes the progress bar disappear after completion
     ) as progress:
         # Add main task
-        main_task = progress.add_task(
-            "[bold blue]Processing DOIs...",
-            total=len(dois)
-        )
+        main_task = progress.add_task("[bold blue]Processing DOIs...", total=len(dois))
 
         # Add status task for current DOI
         status_task = progress.add_task(
             "[cyan]Current:",
             total=None,  # Indeterminate progress
-            visible=False  # Hidden initially
+            visible=False,  # Hidden initially
         )
 
         status_task = progress.add_task(
-            "[cyan]Current:",
-            total=doi_total_steps,
-            visible=False
+            "[cyan]Current:", total=doi_total_steps, visible=False
         )
 
         for doi in dois:
@@ -1681,7 +1912,7 @@ def process_doi_batch(
                     status_task,
                     description=f"[cyan]Current: [white]{doi[:50]}...",
                     visible=True,
-                    completed=0  # Reset progress for new DOI
+                    completed=0,  # Reset progress for new DOI
                 )
 
                 # Process the DOI
@@ -1698,7 +1929,7 @@ def process_doi_batch(
                     ror=ror,
                     console=console,
                     progress=progress,
-                    task_id=status_task
+                    task_id=status_task,
                 )
 
                 # Process and capture result
@@ -1714,8 +1945,7 @@ def process_doi_batch(
 
                 # Show error but keep progress bar
                 progress.console.print(
-                    f"{ICONS['error']} Error processing {doi}: {str(e)}",
-                    style="error"
+                    f"{ICONS['error']} Error processing {doi}: {str(e)}", style="error"
                 )
             finally:
                 # Clear current status
@@ -1732,76 +1962,81 @@ def main():
     console = Console(theme=THEME)
 
     try:
-
-        parser = argparse.ArgumentParser(description="Process DOIs to generate metadata")
-        parser.add_argument(
-            "dois",
-            nargs="*",
-            help="One or more DOIs to process"
+        parser = argparse.ArgumentParser(
+            description="Process DOIs to generate metadata"
         )
+        parser.add_argument("dois", nargs="*", help="One or more DOIs to process")
         parser.add_argument(
-            "-f", "--file",
+            "-f",
+            "--file",
             help="File containing DOIs (one per line)",
-            type=argparse.FileType('r')
+            type=argparse.FileType("r"),
         )
         parser.add_argument(
-            "-o", "--output-dir",
+            "-o",
+            "--output-dir",
             help="Output directory for metadata files",
-            default="."
+            default=".",
         )
         parser.add_argument(
-            "-d", "--depositor",
-            help="Name of the depositor",
-            default=None
+            "-d", "--depositor", help="Name of the depositor", default=None
         )
         parser.add_argument(
-            "-s", "--subject",
+            "-s",
+            "--subject",
             help="Default subject",
-            default="Medicine, Health and Life Sciences"
+            default="Medicine, Health and Life Sciences",
         )
         parser.add_argument(
-            "-m", "--contact-mail",
-            help="Contact email address",
-            default=False
+            "-m", "--contact-mail", help="Contact email address", default=False
         )
         parser.add_argument(
-            "-u", "--upload",
-            help="Upload to Dataverse",
-            action="store_true"
+            "-u", "--upload", help="Upload to Dataverse", action="store_true"
         )
         parser.add_argument(
-            "-r", "--use-ror",
-            help="Use ROR ID if available",
-            action="store_true"
+            "-r", "--use-ror", help="Use ROR ID if available", action="store_true"
         )
 
         args = parser.parse_args()
 
         # Ensure we have either DOIs as arguments or a file
         if not args.dois and not args.file:
-            console.print(f"{ICONS['error']} Error: No DOIs provided. Use either command line arguments or -f/--file option.", style="error")
+            console.print(
+                f"{ICONS['error']} Error: No DOIs provided. Use either command line arguments or -f/--file option.",
+                style="error",
+            )
             parser.print_help()
             sys.exit(1)
 
         # Get DOIs from both direct arguments and file if provided
         dois = set(args.dois)  # Start with directly provided DOIs
         if args.file:
-            console.print(f"{ICONS['file']} Reading DOIs from file: {args.file.name}", style="info")
+            console.print(
+                f"{ICONS['file']} Reading DOIs from file: {args.file.name}",
+                style="info",
+            )
             dois.update(line.strip() for line in args.file if line.strip())
 
         # Create output directory if it doesn't exist
         output_dir = Path(args.output_dir)
         try:
             output_dir.mkdir(parents=True, exist_ok=True)
-            console.print(f"{ICONS['folder']} Output directory: {output_dir}\n", style="info")
+            console.print(
+                f"{ICONS['folder']} Output directory: {output_dir}\n", style="info"
+            )
         except Exception as e:
-            console.print(f"Failed to create output directory: {str(e)}\n", style="error")
+            console.print(
+                f"Failed to create output directory: {str(e)}\n", style="error"
+            )
             sys.exit(1)
 
         if args.contact_mail:
             if not validate_email_address(args.contact_mail):
                 raise ValueError(f"Not a valid email address: {args.contact_mail}")
-            console.print(f"{ICONS['info']} Exposing contact email <{args.contact_mail}> to API services.\n", style="info")
+            console.print(
+                f"{ICONS['info']} Exposing contact email <{args.contact_mail}> to API services.\n",
+                style="info",
+            )
 
         # Process DOIs and track time
         process_doi_batch(
@@ -1812,16 +2047,18 @@ def main():
             contact_mail=args.contact_mail,
             upload=args.upload,
             ror=args.use_ror,
-            console=console
+            console=console,
         )
 
-
-
     except KeyboardInterrupt:
-        console.print(f"\n{ICONS['warning']}  Processing interrupted by user", style="warning")
+        console.print(
+            f"\n{ICONS['warning']}  Processing interrupted by user", style="warning"
+        )
         sys.exit(1)
     except Exception as e:
-        console.print(f"\n{ICONS['error']}  An unexpected error occurred: {str(e)}", style="error")
+        console.print(
+            f"\n{ICONS['error']}  An unexpected error occurred: {str(e)}", style="error"
+        )
         sys.exit(1)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index bd1a783..84bd858 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ dev = [
     "pytest-mock>=3.14.0,<4.0",
     "pytest-cov>=6.0.0,<7.0",
     "ruff>=0.11.1,<0.20",
+    "gitlint>=0.19.1,<0.20",
 ]
 test = [
     "pytest>=8.3.5,<9.0",
@@ -132,3 +133,7 @@ ignore = [
 
 [tool.ruff.lint.per-file-ignores]
 "tests/*" = ["E501"]
+
+[tool.bandit]
+exclude_dirs = ["tests", "docs", ".venv", "build", "dist"]
+skips = ["B101", "B601", "B404", "B603"]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index ab30c10..eb0ffaa 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -2,3 +2,4 @@ pytest>=8.3.5,<9.0
 pytest-mock>=3.14.0,<4.0
 pytest-cov>=6.0.0,<7.0
 ruff>=0.11.1,<0.20
+gitlint>=0.19.1,<0.20
diff --git a/scripts/lint-commit.py b/scripts/lint-commit.py
new file mode 100644
index 0000000..a424e43
--- /dev/null
+++ b/scripts/lint-commit.py
@@ -0,0 +1,179 @@
+#!/usr/bin/env python3
+"""
+Simple script to lint git commit messages using gitlint.
+
+This script can be used to:
+1. Lint the last commit message
+2. Lint a specific commit by hash
+3. Lint commit messages in a range
+4. Be used as a pre-commit hook
+
+Usage:
+    python scripts/lint-commit.py              # Lint last commit
+    python scripts/lint-commit.py --hash <hash>  # Lint specific commit
+    python scripts/lint-commit.py --range <range>  # Lint commit range
+    python scripts/lint-commit.py --staged     # Lint staged commit message
+
+This implementation enforces conventional commit message format.
+"""
+
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+
+
+def run_command(cmd, check=True):
+    """Run a shell command and return the result."""
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=check)
+        return result
+    except subprocess.CalledProcessError as e:
+        print(f"Error running command: {cmd}")
+        print(f"Exit code: {e.returncode}")
+        print(f"Output: {e.stdout}")
+        print(f"Error: {e.stderr}")
+        return e
+
+
+def check_gitlint_installed():
+    """Check if gitlint is installed."""
+    result = run_command(["which", "gitlint"], check=False)
+    if result.returncode != 0:
+        print("Error: gitlint is not installed.")
+        print("Please install it with: pip install gitlint")
+        print("Or install dev dependencies: pip install -r requirements-dev.txt")
+        sys.exit(1)
+
+
+def lint_commit(commit_hash=None, commit_range=None, staged=False):
+    """Lint commit message(s) using gitlint."""
+    # Build gitlint command
+    cmd = ["gitlint"]
+
+    if staged:
+        # Lint staged commit message
+        cmd.extend(["--staged"])
+    elif commit_range:
+        # Lint commit range
+        cmd.extend(["--commits", commit_range])
+    elif commit_hash:
+        # Lint specific commit
+        cmd.extend(["--commit", commit_hash])
+    else:
+        # Lint last commit (default)
+        cmd.extend(["--commit", "HEAD"])
+
+    print(f"Running: {' '.join(cmd)}")
+    print("-" * 50)
+
+    # Run gitlint
+    result = run_command(cmd, check=False)
+
+    if result.returncode == 0:
+        print("✅ All commit messages are valid!")
+        return True
+    else:
+        print("❌ Commit message validation failed:")
+        print(result.stdout)
+        if result.stderr:
+            print("Error output:")
+            print(result.stderr)
+        return False
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(
+        description="Lint git commit messages using gitlint",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    %(prog)s                    # Lint last commit
+    %(prog)s --hash abc123      # Lint specific commit
+    %(prog)s --range HEAD~3..   # Lint last 3 commits
+    %(prog)s --staged           # Lint staged commit message
+        """,
+    )
+
+    parser.add_argument("--hash", help="Specific commit hash to lint")
+
+    parser.add_argument("--range", help="Commit range to lint (e.g., HEAD~3..)")
+
+    parser.add_argument(
+        "--staged", action="store_true", help="Lint staged commit message"
+    )
+
+    parser.add_argument(
+        "--install-hook", action="store_true", help="Install as git commit-msg hook"
+    )
+
+    args = parser.parse_args()
+
+    # Check if gitlint is installed
+    check_gitlint_installed()
+
+    # Install hook if requested
+    if args.install_hook:
+        install_hook()
+        return
+
+    # Validate arguments
+    exclusive_args = [args.hash, args.range, args.staged]
+    if sum(bool(arg) for arg in exclusive_args) > 1:
+        print("Error: --hash, --range, and --staged are mutually exclusive")
+        sys.exit(1)
+
+    # Lint commits
+    success = lint_commit(
+        commit_hash=args.hash, commit_range=args.range, staged=args.staged
+    )
+
+    sys.exit(0 if success else 1)
+
+
+def install_hook():
+    """Install the script as a git commit-msg hook."""
+    git_dir = Path(".git")
+    if not git_dir.exists():
+        print("Error: Not in a git repository")
+        sys.exit(1)
+
+    hooks_dir = git_dir / "hooks"
+    hooks_dir.mkdir(exist_ok=True)
+
+    hook_file = hooks_dir / "commit-msg"
+
+    hook_content = """#!/usr/bin/env python3
+# Git commit-msg hook for gitlint
+# Python-based commit message linting with gitlint
+import subprocess
+import sys
+
+# Run gitlint on the commit message
+result = subprocess.run(  # nosec B603
+    ["gitlint", "--msg-filename", sys.argv[1]],
+    capture_output=True,
+    text=True
+)
+
+if result.returncode != 0:
+    print("Commit message validation failed:")
+    print(result.stdout)
+    if result.stderr:
+        print("Error output:")
+        print(result.stderr)
+    sys.exit(1)
+
+print("✅ Commit message is valid!")
+"""
+
+    hook_file.write_text(hook_content)
+    hook_file.chmod(0o755)
+
+    print(f"✅ Installed commit-msg hook at {hook_file}")
+    print("The hook will automatically run when you commit.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_citation_builder.py b/tests/test_citation_builder.py
index 08045fd..b664bf7 100644
--- a/tests/test_citation_builder.py
+++ b/tests/test_citation_builder.py
@@ -23,7 +23,7 @@ def test_pi():
         given_name="Author",
         orcid="0000-0000-0000-1234",
         email="test.author@example.org",
-        affiliation="Test University"
+        affiliation="Test University",
     )
 
 
@@ -115,7 +115,9 @@ def test_build_authors_with_ror(openalex_data, pi_finder):
         pytest.skip("Test data doesn't contain any ROR identifiers")
 
     # Create builder with ror=True to enable ROR identifiers
-    builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True)
+    builder = CitationBuilder(
+        data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True
+    )
 
     # Get authors
     authors, _ = builder.build_authors()
@@ -129,11 +131,11 @@ def test_build_authors_with_ror(openalex_data, pi_finder):
 
     for author in authors:
         # Check if author has affiliation
-        if not hasattr(author, 'affiliation') or not author.affiliation:
+        if not hasattr(author, "affiliation") or not author.affiliation:
             continue
 
         # Check if affiliation is an Institution with a ROR ID
-        if not hasattr(author.affiliation, 'ror'):
+        if not hasattr(author.affiliation, "ror"):
             continue
 
         # Check if ROR ID is present and contains "ror.org"
@@ -154,7 +156,7 @@ def test_build_authors_with_ror(openalex_data, pi_finder):
         assert affiliation_field.value == institution_with_ror.ror
 
         # Verify the expanded_value dictionary has the expected structure
-        assert hasattr(affiliation_field, 'expanded_value')
+        assert hasattr(affiliation_field, "expanded_value")
         assert isinstance(affiliation_field.expanded_value, dict)
 
         # Check specific fields in the expanded_value
diff --git a/tests/test_doi2dataset.py b/tests/test_doi2dataset.py
index e5515d8..4f4ec15 100644
--- a/tests/test_doi2dataset.py
+++ b/tests/test_doi2dataset.py
@@ -13,6 +13,7 @@ def test_sanitize_filename():
     result = sanitize_filename(doi)
     assert result == expected
 
+
 def test_split_name_with_comma():
     """Test splitting a full name that contains a comma."""
     full_name = "Doe, John"
@@ -20,6 +21,7 @@ def test_split_name_with_comma():
     assert given == "John"
     assert family == "Doe"
 
+
 def test_split_name_without_comma():
     """Test splitting a full name that does not contain a comma."""
     full_name = "John Doe"
@@ -27,11 +29,13 @@ def test_split_name_without_comma():
     assert given == "John"
     assert family == "Doe"
 
+
 def test_validate_email_address_valid():
     """Test that a valid email address is correctly recognized."""
     valid_email = "john.doe@iana.org"
     assert validate_email_address(valid_email) is True
 
+
 def test_validate_email_address_invalid():
     """Test that an invalid email address is correctly rejected."""
     invalid_email = "john.doe@invalid_domain"
diff --git a/tests/test_fetch_doi_mock.py b/tests/test_fetch_doi_mock.py
index ecdd7b5..3ed99b0 100644
--- a/tests/test_fetch_doi_mock.py
+++ b/tests/test_fetch_doi_mock.py
@@ -20,6 +20,7 @@ class FakeResponse:
     """
     A fake response object to simulate an API response.
     """
+
     def __init__(self, json_data, status_code=200):
         self._json = json_data
         self.status_code = status_code
@@ -30,6 +31,7 @@ class FakeResponse:
     def raise_for_status(self):
         pass
 
+
 @pytest.fixture(autouse=True)
 def load_config_test():
     """
@@ -39,6 +41,7 @@ def load_config_test():
     config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml")
     Config.load_config(config_path=config_path)
 
+
 @pytest.fixture
 def fake_openalex_response():
     """
@@ -50,6 +53,7 @@ def fake_openalex_response():
         data = json.load(f)
     return data
 
+
 def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
     """
     Test fetching DOI metadata by simulating the API call with a locally saved JSON response.
@@ -88,7 +92,7 @@ def test_openalex_abstract_extraction(mocker, fake_openalex_response):
     assert abstract_text is not None
 
     # If abstract exists in the response, it should be properly extracted
-    if 'abstract_inverted_index' in fake_openalex_response:
+    if "abstract_inverted_index" in fake_openalex_response:
         assert len(abstract_text) > 0
 
 
@@ -152,7 +156,7 @@ def test_pi_finder_find_by_orcid():
         given_name="Jon",
         orcid="0000-0000-0000-0000",
         email="jon.doe@iana.org",
-        affiliation="Institute of Science, Some University"
+        affiliation="Institute of Science, Some University",
     )
 
     # Create PIFinder with our test PI
@@ -181,8 +185,10 @@ def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
     doi = "10.1038/srep45389"
 
     # Mock API response
-    mocker.patch("doi2dataset.APIClient.make_request",
-                 return_value=FakeResponse(fake_openalex_response, 200))
+    mocker.patch(
+        "doi2dataset.APIClient.make_request",
+        return_value=FakeResponse(fake_openalex_response, 200),
+    )
 
     # Create processor with upload disabled and progress disabled
     processor = MetadataProcessor(doi=doi, upload=False, progress=False)
diff --git a/tests/test_license_processor.py b/tests/test_license_processor.py
index 560fe5a..f9eff58 100644
--- a/tests/test_license_processor.py
+++ b/tests/test_license_processor.py
@@ -3,37 +3,27 @@ from doi2dataset import License, LicenseProcessor
 
 def test_license_processor_cc_by():
     """Test processing a CC BY license"""
-    data = {
-        "primary_location": {
-            "license": "cc-by"
-        }
-    }
+    data = {"primary_location": {"license": "cc-by"}}
     license_obj = LicenseProcessor.process_license(data)
     assert isinstance(license_obj, License)
     assert license_obj.short == "cc-by"
     assert license_obj.name == "CC BY 4.0"
     assert license_obj.uri == "https://creativecommons.org/licenses/by/4.0/"
 
+
 def test_license_processor_cc0():
     """Test processing a CC0 license"""
-    data = {
-        "primary_location": {
-            "license": "cc0"
-        }
-    }
+    data = {"primary_location": {"license": "cc0"}}
     license_obj = LicenseProcessor.process_license(data)
     assert isinstance(license_obj, License)
     assert license_obj.short == "cc0"
     assert license_obj.name == "CC0 1.0"
     assert license_obj.uri == "https://creativecommons.org/publicdomain/zero/1.0/"
 
+
 def test_license_processor_unknown_license():
     """Test processing an unknown license"""
-    data = {
-        "primary_location": {
-            "license": "unknown-license"
-        }
-    }
+    data = {"primary_location": {"license": "unknown-license"}}
     license_obj = LicenseProcessor.process_license(data)
     assert isinstance(license_obj, License)
     assert license_obj.short == "unknown-license"
@@ -41,17 +31,17 @@ def test_license_processor_unknown_license():
     assert license_obj.name == "unknown-license" or license_obj.name == ""
     assert hasattr(license_obj, "uri")
 
+
 def test_license_processor_no_license():
     """Test processing with no license information"""
-    data = {
-        "primary_location": {}
-    }
+    data = {"primary_location": {}}
     license_obj = LicenseProcessor.process_license(data)
     assert isinstance(license_obj, License)
     assert license_obj.short == "unknown"
     assert license_obj.name == ""
     assert license_obj.uri == ""
 
+
 def test_license_processor_no_primary_location():
     """Test processing with no primary location"""
     data = {}
diff --git a/tests/test_metadata_processor.py b/tests/test_metadata_processor.py
index ffacf4e..e489150 100644
--- a/tests/test_metadata_processor.py
+++ b/tests/test_metadata_processor.py
@@ -33,7 +33,10 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
     abstract_mock = MagicMock()
     abstract_mock.text = "This is a sample abstract"
     abstract_mock.source = "openalex"
-    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
+    monkeypatch.setattr(
+        "doi2dataset.AbstractProcessor.get_abstract",
+        lambda *args, **kwargs: abstract_mock,
+    )
 
     # Mock the _fetch_data method to return our test data
     metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
@@ -47,21 +50,23 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
 
     # Verify the basic metadata fields were extracted correctly
     assert metadata is not None
-    assert 'datasetVersion' in metadata
+    assert "datasetVersion" in metadata
 
     # Examine the fields inside datasetVersion.metadataBlocks
-    assert 'metadataBlocks' in metadata['datasetVersion']
-    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
+    assert "metadataBlocks" in metadata["datasetVersion"]
+    citation = metadata["datasetVersion"]["metadataBlocks"].get("citation", {})
 
     # Check fields in citation section
-    assert 'fields' in citation
-    fields = citation['fields']
+    assert "fields" in citation
+    fields = citation["fields"]
 
     # Check for basic metadata fields in a more flexible way
-    field_names = [field.get('typeName') for field in fields]
-    assert 'title' in field_names
-    assert 'subject' in field_names
-    assert 'dsDescription' in field_names  # Description is named 'dsDescription' in the schema
+    field_names = [field.get("typeName") for field in fields]
+    assert "title" in field_names
+    assert "subject" in field_names
+    assert (
+        "dsDescription" in field_names
+    )  # Description is named 'dsDescription' in the schema
 
 
 def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
@@ -73,7 +78,10 @@ def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
     abstract_mock = MagicMock()
     abstract_mock.text = "This is a sample abstract"
     abstract_mock.source = "openalex"
-    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
+    monkeypatch.setattr(
+        "doi2dataset.AbstractProcessor.get_abstract",
+        lambda *args, **kwargs: abstract_mock,
+    )
 
     # Mock the _fetch_data method to return our test data
     metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
@@ -86,33 +94,35 @@ def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
     metadata = metadata_processor._build_metadata(openalex_data)
 
     # Examine the fields inside datasetVersion.metadataBlocks
-    assert 'metadataBlocks' in metadata['datasetVersion']
-    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
+    assert "metadataBlocks" in metadata["datasetVersion"]
+    citation = metadata["datasetVersion"]["metadataBlocks"].get("citation", {})
 
     # Check fields in citation section
-    assert 'fields' in citation
-    fields = citation['fields']
+    assert "fields" in citation
+    fields = citation["fields"]
 
     # Check for author and datasetContact fields
-    field_names = [field.get('typeName') for field in fields]
-    assert 'author' in field_names
-    assert 'datasetContact' in field_names
+    field_names = [field.get("typeName") for field in fields]
+    assert "author" in field_names
+    assert "datasetContact" in field_names
 
     # Verify these are compound fields with actual entries
     for field in fields:
-        if field.get('typeName') == 'author':
-            assert 'value' in field
-            assert isinstance(field['value'], list)
-            assert len(field['value']) > 0
+        if field.get("typeName") == "author":
+            assert "value" in field
+            assert isinstance(field["value"], list)
+            assert len(field["value"]) > 0
 
-        if field.get('typeName') == 'datasetContact':
-            assert 'value' in field
-            assert isinstance(field['value'], list)
+        if field.get("typeName") == "datasetContact":
+            assert "value" in field
+            assert isinstance(field["value"], list)
             # The datasetContact might be empty in test environment
             # Just check it exists rather than asserting length
 
 
-def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, monkeypatch):
+def test_build_metadata_keywords_and_topics(
+    metadata_processor, openalex_data, monkeypatch
+):
     """Test that _build_metadata correctly extracts keywords and topics"""
     # Mock the console to avoid print errors
     metadata_processor.console = MagicMock()
@@ -121,7 +131,10 @@ def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, m
     abstract_mock = MagicMock()
     abstract_mock.text = "This is a sample abstract"
     abstract_mock.source = "openalex"
-    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
+    monkeypatch.setattr(
+        "doi2dataset.AbstractProcessor.get_abstract",
+        lambda *args, **kwargs: abstract_mock,
+    )
 
     # Mock the _fetch_data method to return our test data
     metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
@@ -134,27 +147,27 @@ def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, m
     metadata = metadata_processor._build_metadata(openalex_data)
 
     # Examine the fields inside datasetVersion.metadataBlocks
-    assert 'metadataBlocks' in metadata['datasetVersion']
-    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
+    assert "metadataBlocks" in metadata["datasetVersion"]
+    citation = metadata["datasetVersion"]["metadataBlocks"].get("citation", {})
 
     # Check fields in citation section
-    assert 'fields' in citation
-    fields = citation['fields']
+    assert "fields" in citation
+    fields = citation["fields"]
 
     # Check for keyword and subject fields
-    field_names = [field.get('typeName') for field in fields]
+    field_names = [field.get("typeName") for field in fields]
 
     # If keywords exist, verify structure
-    if 'keyword' in field_names:
+    if "keyword" in field_names:
         for field in fields:
-            if field.get('typeName') == 'keyword':
-                assert 'value' in field
-                assert isinstance(field['value'], list)
+            if field.get("typeName") == "keyword":
+                assert "value" in field
+                assert isinstance(field["value"], list)
 
     # Check for subject field which should definitely exist
-    assert 'subject' in field_names
+    assert "subject" in field_names
     for field in fields:
-        if field.get('typeName') == 'subject':
-            assert 'value' in field
-            assert isinstance(field['value'], list)
-            assert len(field['value']) > 0
+        if field.get("typeName") == "subject":
+            assert "value" in field
+            assert isinstance(field["value"], list)
+            assert len(field["value"]) > 0
diff --git a/tests/test_person.py b/tests/test_person.py
index 2e1e030..61e081d 100644
--- a/tests/test_person.py
+++ b/tests/test_person.py
@@ -8,7 +8,7 @@ def test_person_to_dict_with_string_affiliation():
         given_name="John",
         orcid="0000-0001-2345-6789",
         email="john.doe@example.org",
-        affiliation="Test University"
+        affiliation="Test University",
     )
 
     result = person.to_dict()
@@ -29,7 +29,7 @@ def test_person_to_dict_with_institution_ror():
         given_name="John",
         orcid="0000-0001-2345-6789",
         email="john.doe@example.org",
-        affiliation=inst
+        affiliation=inst,
     )
 
     result = person.to_dict()
@@ -48,7 +48,7 @@ def test_person_to_dict_with_institution_display_name_only():
         family_name="Smith",
         given_name="Jane",
         orcid="0000-0001-9876-5432",
-        affiliation=inst
+        affiliation=inst,
     )
 
     result = person.to_dict()
@@ -63,11 +63,7 @@ def test_person_to_dict_with_empty_institution():
     # Create an Institution with empty values
     inst = Institution("")
 
-    person = Person(
-        family_name="Brown",
-        given_name="Robert",
-        affiliation=inst
-    )
+    person = Person(family_name="Brown", given_name="Robert", affiliation=inst)
 
     result = person.to_dict()
 
@@ -79,9 +75,7 @@ def test_person_to_dict_with_empty_institution():
 def test_person_to_dict_with_no_affiliation():
     """Test Person.to_dict() with no affiliation."""
     person = Person(
-        family_name="Green",
-        given_name="Alice",
-        orcid="0000-0002-1111-2222"
+        family_name="Green", given_name="Alice", orcid="0000-0002-1111-2222"
     )
 
     result = person.to_dict()
diff --git a/tests/test_publication_utils.py b/tests/test_publication_utils.py
index d9dc978..40b506e 100644
--- a/tests/test_publication_utils.py
+++ b/tests/test_publication_utils.py
@@ -14,44 +14,44 @@ def metadata_processor():
     processor.console = MagicMock()
     return processor
 
+
 def test_get_publication_year_with_publication_year(metadata_processor):
     """Test that _get_publication_year extracts year from publication_year field"""
     data = {"publication_year": 2020}
     year = metadata_processor._get_publication_year(data)
     assert year == 2020
 
+
 def test_get_publication_year_with_date(metadata_processor):
     """Test that _get_publication_year returns empty string when publication_year is missing"""
     data = {"publication_date": "2019-05-15"}
     year = metadata_processor._get_publication_year(data)
     assert year == ""
 
+
 def test_get_publication_year_with_both_fields(metadata_processor):
     """Test that _get_publication_year prioritizes publication_year over date"""
-    data = {
-        "publication_year": 2020,
-        "publication_date": "2019-05-15"
-    }
+    data = {"publication_year": 2020, "publication_date": "2019-05-15"}
     year = metadata_processor._get_publication_year(data)
     assert year == 2020
 
+
 def test_get_publication_year_with_partial_date(metadata_processor):
     """Test that _get_publication_year returns empty string when only publication_date is present"""
     data = {"publication_date": "2018"}
     year = metadata_processor._get_publication_year(data)
     assert year == ""
 
+
 def test_get_publication_year_with_missing_data(metadata_processor):
     """Test that _get_publication_year handles missing data"""
     data = {"other_field": "value"}
     year = metadata_processor._get_publication_year(data)
     assert year == ""
 
+
 def test_get_publication_year_with_invalid_data(metadata_processor):
     """Test that _get_publication_year returns whatever is in publication_year field"""
-    data = {
-        "publication_year": "not-a-year",
-        "publication_date": "invalid-date"
-    }
+    data = {"publication_year": "not-a-year", "publication_date": "invalid-date"}
     year = metadata_processor._get_publication_year(data)
     assert year == "not-a-year"