From 67b46d5140691cb154924171b823c78c9246fea1 Mon Sep 17 00:00:00 2001
From: Alexander Minges <alexander.minges@uni-due.de>
Date: Mon, 7 Jul 2025 14:41:39 +0200
Subject: [PATCH] feat!: generalize script by removing organizational metadata

Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration
to use 'default_grants' and simplify PI usage to fallback corresponding author determination only.

BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants'
instead of 'default_grant' - Generate only standard Dataverse citation metadata
---
 .gitignore                       |   1 +
 README.md                        |  80 +++++++++++++++++---
 __init__.py                      |   5 +-
 config_example.yaml              |  22 +++---
 doi2dataset.py                   | 123 ++++---------------------------
 tests/config_test.yaml           |   9 +--
 tests/test_citation_builder.py   |  65 ++++++++--------
 tests/test_doi2dataset.py        |  14 +---
 tests/test_fetch_doi_mock.py     |  62 ++++++++--------
 tests/test_metadata_processor.py |  56 +++++++-------
 tests/test_person.py             |  39 +++++-----
 11 files changed, 207 insertions(+), 269 deletions(-)

diff --git a/.gitignore b/.gitignore
index 6ff6e4c..73052a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Config file
 config.yaml
+.config.yaml
 
 # Processed DOIs
 *.json
diff --git a/README.md b/README.md
index 8b66b2a..6470fb4 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,14 @@
 
 - **DOI Validation and Normalization:** Validates DOIs and converts them into a standardized format.
 - **Metadata Retrieval:** Fetches metadata such as title, abstract, license, and author information from external sources.
-- **Metadata Mapping:** Automatically maps and generates metadata fields (e.g., title, description, keywords) including support for controlled vocabularies and compound fields.
+- **Standard Dataverse Metadata:** Generates standard Dataverse citation metadata including:
+  - Title, publication date, and alternative URL
+  - Author information with affiliations and ORCID identifiers
+  - Dataset contact information (corresponding authors)
+  - Abstract and description
+  - Keywords and subject classification
+  - Grant/funding information
+  - License information when available
 - **Optional Upload:** Allows uploading of metadata directly to a Dataverse.org server.
 - **Progress Tracking:** Uses the Rich library for user-friendly progress tracking and error handling.
 
@@ -23,14 +30,41 @@ cd doi2dataset
 
 ## Configuration
 
-Configuration
-
 Before running the tool, configure the necessary settings in the `config.yaml` file located in the project root. This file contains configuration details such as:
 
-- Connection details (URL, API token, authentication credentials)
-- Mapping of project phases
-- Principal Investigator (PI) information
-- Default grant configurations
+- **Connection details**: URL, API token, authentication credentials for Dataverse server
+- **Principal Investigator (PI) information**: Optional - used for fallback determination of corresponding authors when not explicitly specified in the publication
+- **Default grant configurations**: Funding information to be included in the metadata (supports multiple grants)
+
+### Configuration File Structure
+
+The configuration file should follow this structure:
+
+```yaml
+# Dataverse server connection details
+dataverse:
+  url: "https://your-dataverse-instance.org"
+  api_token: "your-api-token"
+
+# Default grant information (supports multiple grants)
+default_grants:
+  - funder: "Your Funding Agency"
+    id: "GRANT123456"
+  - funder: "Another Funding Agency"
+    id: "GRANT789012"
+
+# Principal investigators for fallback corresponding author determination (optional)
+pis:
+  - family_name: "Doe"
+    given_name: "John"
+    orcid: "0000-0000-0000-0000"
+    email: "john.doe@university.edu"
+    affiliation: "Department of Science, University"
+```
+
+See `config_example.yaml` for a complete example configuration.
+
+**Note**: The PI section is optional. If no corresponding authors are found in the publication metadata and no PIs are configured, the tool will still generate metadata but may issue a warning about missing corresponding author information.
 
 ## Usage
 
@@ -102,11 +136,13 @@ pytest --cov=. --cov-report=html
 This creates a `htmlcov` directory. Open `htmlcov/index.html` in a browser to view the detailed coverage report.
 
 A `.coveragerc` configuration file is provided that:
+
 - Excludes test files, documentation, and boilerplate code from coverage analysis
 - Configures reporting to ignore common non-testable lines (like defensive imports)
 - Sets the output directory for HTML reports
 
 Recent improvements have increased coverage from 48% to 61% by adding focused tests for:
+
 - Citation building functionality
 - License processing and validation
 - Metadata field extraction
@@ -114,6 +150,7 @@ Recent improvements have increased coverage from 48% to 61% by adding focused te
 - Publication data parsing and validation
 
 Areas that could benefit from additional testing:
+
 - More edge cases in the MetadataProcessor class workflow
 - Additional CitationBuilder scenarios with diverse inputs
 - Complex network interactions and error handling
@@ -122,7 +159,7 @@ Areas that could benefit from additional testing:
 
 The test suite is organized into six main files:
 
-1. **test_doi2dataset.py**: Basic tests for core functions like phase checking, name splitting and DOI validation.
+1. **test_doi2dataset.py**: Basic tests for core functions like name splitting, DOI validation, and filename sanitization.
 2. **test_fetch_doi_mock.py**: Tests API interactions using a mock OpenAlex response stored in `srep45389.json`.
 3. **test_citation_builder.py**: Tests for building citation metadata from API responses.
 4. **test_metadata_processor.py**: Tests for the metadata processing workflow.
@@ -136,7 +173,6 @@ The test suite covers the following categories of functionality:
 #### Core Functionality Tests
 
 - **DOI Validation and Processing**: Parameterized tests for DOI normalization, validation, and filename sanitization with various inputs.
-- **Phase Management**: Tests for checking publication year against defined project phases, including boundary cases.
 - **Name Processing**: Extensive tests for parsing and splitting author names in different formats (with/without commas, middle initials, etc.).
 - **Email Validation**: Tests for proper validation of email addresses with various domain configurations.
 
@@ -151,12 +187,36 @@ The test suite covers the following categories of functionality:
 
 - **Citation Building**: Tests for properly building citation metadata from API responses.
 - **License Processing**: Tests for correctly identifying and formatting license information from various license IDs.
-- **Principal Investigator Matching**: Tests for finding project PIs based on ORCID identifiers.
+- **Principal Investigator Matching**: Tests for finding project PIs based on ORCID identifiers (used for fallback corresponding author determination).
 - **Configuration Loading**: Tests for properly loading and validating configuration from files.
 - **Metadata Workflow**: Tests for the complete metadata processing workflow.
 
 These tests ensure that all components work correctly in isolation and together as a system, with special attention to edge cases and error handling.
 
+## Changelog
+
+### Version 0.2.0 - Generalization Update
+
+This version has been updated to make the tool more generalized and suitable for broader use cases:
+
+**Breaking Changes:**
+
+- Removed organizational-specific metadata blocks (project phases, organizational fields)
+- Removed `Phase` class and phase-related configuration
+- Simplified configuration structure
+
+**What's New:**
+
+- Streamlined metadata generation focusing on standard Dataverse citation metadata
+- Reduced configuration requirements for easier adoption
+- Maintained PI information support for corresponding author fallback functionality
+
+**Migration Guide:**
+
+- Remove the `phase` section from your configuration file
+- The tool will now generate only standard citation metadata blocks
+- PI information is still supported and used for fallback corresponding author determination
+
 ## Contributing
 
 Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
diff --git a/__init__.py b/__init__.py
index 0db05d7..f1bd259 100644
--- a/__init__.py
+++ b/__init__.py
@@ -8,10 +8,9 @@ from .doi2dataset import (
     LicenseProcessor,
     MetadataProcessor,
     NameProcessor,
+    Person,
     PIFinder,
-    Person, 
-    Phase,
     SubjectMapper,
     sanitize_filename,
     validate_email_address,
-)
\ No newline at end of file
+)
diff --git a/config_example.yaml b/config_example.yaml
index f14cdad..d00d523 100644
--- a/config_example.yaml
+++ b/config_example.yaml
@@ -1,23 +1,25 @@
-default_grant:
+dataverse:
+  url: "https://your-dataverse-instance.org"
+  api_token: "your-api-token-here"
+  dataverse: "your-dataverse-alias"
+  auth_user: "your-username"
+  auth_password: "your-password"
+
+default_grants:
   - funder: "Awesome Funding Agency"
     id: "ABC12345"
-
-phase:
-  "Phase 1 (2021/2025)":
-    start: 2021
-    end: 2025
+  - funder: "Another Funding Agency"
+    id: "DEF67890"
 
 pis:
   - family_name: "Doe"
     given_name: "Jon"
     orcid: "0000-0000-0000-0000"
-    email: "jon.doe@some-university.edu"
+    email: "jon.doe@iana.org"
     affiliation: "Institute of Science, Some University"
-    project: ["Project A01"]
 
   - family_name: "Doe"
     given_name: "Jane"
     orcid: "0000-0000-0000-0001"
-    email: "jane.doe@some-university.edu"
+    email: "jane.doe@iana.org"
     affiliation: "Institute of Science, Some University"
-    project: ["Project A02"]
diff --git a/doi2dataset.py b/doi2dataset.py
index 162a53a..438e3a2 100755
--- a/doi2dataset.py
+++ b/doi2dataset.py
@@ -109,36 +109,6 @@ class FieldType(Enum):
     COMPOUND = "compound"
     VOCABULARY = "controlledVocabulary"
 
-@dataclass
-class Phase:
-    """
-    Represents a project phase with a defined time span.
-
-    Attributes:
-        name (str): The name of the project phase.
-        start (int): The start year of the project phase.
-        end (int): The end year of the project phase.
-    """
-
-    name: str
-    start: int
-    end: int
-
-    def check_year(self, year: int) -> bool:
-        """
-        Checks whether a given year falls within the project's phase boundaries.
-
-        Args:
-            year (int): The year to check.
-
-        Returns:
-            bool: True if the year is within the phase boundaries, otherwise False.
-        """
-
-        if self.start <= year <= self.end:
-            return True
-        return False
-
 @dataclass
 class BaseMetadataField[T]:
     """
@@ -301,7 +271,7 @@ class Institution:
                 "termName": self.display_name,
                 "@type": "https://schema.org/Organization"
             }
-            return PrimitiveMetadataField("authorAffiliation", False, self.ror, expanded_value)
+            return PrimitiveMetadataField("authorAffiliation", False, self.ror, expanded_value=expanded_value)
         else:
             return PrimitiveMetadataField("authorAffiliation", False, self.display_name)
 
@@ -316,14 +286,12 @@ class Person:
         orcid (str): ORCID identifier (optional).
         email (str): Email address (optional).
         affiliation (Institution): Affiliation of the person (optional).
-        project (list[str]): List of associated projects.
     """
     family_name: str
     given_name: str
     orcid: str = ""
     email: str = ""
     affiliation: Institution | str = ""
-    project: list[str] = field(default_factory=list)
 
     def to_dict(self) -> dict[str, str | list[str] | dict[str, str]]:
             """
@@ -340,8 +308,7 @@ class Person:
                 "family_name": self.family_name,
                 "given_name": self.given_name,
                 "orcid": self.orcid,
-                "email": self.email,
-                "project": self.project
+                "email": self.email
             }
 
             if isinstance(self.affiliation, Institution):
@@ -464,12 +431,10 @@ class ConfigData:
 
     Attributes:
         dataverse (dict[str, str]): Dataverse-related configuration.
-        phase (dict[str, dict[str, int]]): Mapping of project phases.
         pis (list[dict[str, Any]]): List of principal investigator configurations.
         default_grants (list[dict[str, str]]): Default grant configurations.
     """
     dataverse: dict[str, str]
-    phase: dict[str, dict[str, int]]
     pis: list[dict[str, Any]]
     default_grants: list[dict[str, str]]
 
@@ -523,7 +488,6 @@ class Config:
 
         cls._config_data = ConfigData(
             dataverse=config_data.get('dataverse', {}),
-            phase=config_data.get('phase', {}),
             pis=config_data.get('pis', []),
             default_grants=config_data.get('default_grants', [])
         )
@@ -545,16 +509,6 @@ class Config:
             raise RuntimeError("Failed to load configuration")
         return cls._config_data
 
-    @property
-    def PHASE(self) -> dict[str, dict[str, int]]:
-        """
-        Get phase configuration.
-
-        Returns:
-            dict[str, dict[str, int]]: Mapping of phases.
-        """
-        return self.get_config().phase
-
     @property
     def PIS(self) -> list[dict[str, Any]]:
         """
@@ -833,7 +787,10 @@ class AbstractProcessor:
             else:
                 console.print(f"\n{ICONS['warning']} No abstract found in CrossRef!", style="warning")
         else:
-            console.print(f"\n{ICONS['info']} License {license.name} does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
+            if license.name:
+                console.print(f"\n{ICONS['info']} License {license.name} does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
+            else:
+                console.print(f"\n{ICONS['info']} Custom license does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
 
 
         openalex_abstract = self._get_openalex_abstract(data)
@@ -1406,8 +1363,7 @@ class MetadataProcessor:
                             CompoundMetadataField("grantNumber", True, grants).to_dict()
                         ],
                         "displayName": "Citation Metadata"
-                    },
-                    "crc1430_org_v1": self._build_organization_metadata(data)
+                    }
                 },
                 "files": []
             }
@@ -1473,71 +1429,22 @@ class MetadataProcessor:
         """
         return data.get("publication_year", "")
 
-    def _build_organization_metadata(self, data: dict[str, Any]) -> dict[str, Any]:
-        """
-        Build organization metadata fields (phase, project, PI names).
 
-        Args:
-            data (dict[str, Any]): The metadata.
-
-        Returns:
-            dict[str, Any]: Organization metadata.
-        """
-        publication_year = self._get_publication_year(data)
-        if publication_year:
-            phases = self._get_phases(int(publication_year))
-        else:
-            phases = []
-
-        pis = self._get_involved_pis(data)
-        projects: list[str] = []
-        for pi in pis:
-            for project in pi.project:
-                projects.append(project)
-
-        pi_names: list[str] = []
-        for pi in pis:
-            pi_names.append(pi.format_name())
-
-        # Deduplicate projects and PI names
-        unique_projects = list(set(projects))
-        unique_pi_names = list(set(pi_names))
-
-        return {
-            "fields": [
-                ControlledVocabularyMetadataField("crc1430OrgV1Phase", True, phases).to_dict(),
-                ControlledVocabularyMetadataField("crc1430OrgV1Project", True, unique_projects).to_dict(),
-                ControlledVocabularyMetadataField("crc1430OrgV1PI", True, unique_pi_names).to_dict()
-            ]
-        }
-
-    def _get_phases(self, year: int) -> list[str]:
-        """
-        Determine the project phases matching a given publication year.
-
-        Args:
-            year (int): The publication year.
-
-        Returns:
-            list[str]: List of matching phase names.
-        """
-        config = Config()
-        matching_phases: list[str] = []
-        for phase_name, phase_info in config.PHASE.items():
-            phase = Phase(phase_name, phase_info["start"], phase_info["end"])
-            if phase.check_year(year):
-                matching_phases.append(phase.name)
-        return matching_phases
 
     def _get_involved_pis(self, data: dict[str, Any]) -> list[Person]:
         """
-        Identify involved principal investigators from the metadata.
+        Identify involved principal investigators from the metadata for use as fallback
+        corresponding authors.
+
+        This method matches authors in the publication metadata against the configured
+        PIs and returns matching PIs. It is used as a fallback when no corresponding
+        authors are explicitly declared in the publication metadata.
 
         Args:
-            data (dict[str, Any]): The metadata.
+            data (dict[str, Any]): The metadata from OpenAlex.
 
         Returns:
-            list[Person]: List of PIs.
+            list[Person]: List of matching PIs for use as corresponding authors.
         """
         involved_pis: list[Person] = []
         for authorship in data.get("authorships", []):
diff --git a/tests/config_test.yaml b/tests/config_test.yaml
index e17f88d..9130659 100644
--- a/tests/config_test.yaml
+++ b/tests/config_test.yaml
@@ -1,23 +1,16 @@
-default_grant:
+default_grants:
   - funder: "Awesome Funding Agency"
     id: "ABC12345"
 
-phase:
-  "Phase 1 (2021/2025)":
-    start: 2021
-    end: 2025
-
 pis:
   - family_name: "Doe"
     given_name: "Jon"
     orcid: "0000-0000-0000-0000"
     email: "jon.doe@iana.org"
     affiliation: "Institute of Science, Some University"
-    project: ["Project A01"]
 
   - family_name: "Doe"
     given_name: "Jane"
     orcid: "0000-0000-0000-0001"
     email: "jane.doe@iana.org"
     affiliation: "Institute of Science, Some University"
-    project: ["Project A02"]
diff --git a/tests/test_citation_builder.py b/tests/test_citation_builder.py
index f66aa1a..055e93e 100644
--- a/tests/test_citation_builder.py
+++ b/tests/test_citation_builder.py
@@ -1,13 +1,9 @@
 import json
 import os
-import pytest
-from unittest.mock import MagicMock
 
-from doi2dataset import (
-    CitationBuilder,
-    PIFinder,
-    Person
-)
+import pytest
+
+from doi2dataset import CitationBuilder, Person, PIFinder
 
 
 @pytest.fixture
@@ -27,8 +23,7 @@ def test_pi():
         given_name="Author",
         orcid="0000-0000-0000-1234",
         email="test.author@example.org",
-        affiliation="Test University",
-        project=["Test Project"]
+        affiliation="Test University"
     )
 
 
@@ -43,15 +38,15 @@ def test_build_authors(openalex_data, pi_finder):
     """Test that CitationBuilder.build_authors correctly processes author information"""
     doi = "10.1038/srep45389"
     builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
-    
+
     # Call the build_authors method - returns tuple of (authors, corresponding_authors)
     authors, corresponding_authors = builder.build_authors()
-    
+
     # Verify that authors were created
     assert authors is not None
     assert isinstance(authors, list)
     assert len(authors) > 0
-    
+
     # Check the structure of the authors
     for author in authors:
         assert hasattr(author, "given_name")
@@ -64,17 +59,17 @@ def test_build_authors_with_affiliations(openalex_data, pi_finder):
     """Test that author affiliations are correctly processed"""
     doi = "10.1038/srep45389"
     builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
-    
+
     # Call the build_authors method
     authors, _ = builder.build_authors()
-    
+
     # Check if any authors have affiliation
     affiliation_found = False
     for author in authors:
         if hasattr(author, "affiliation") and author.affiliation:
             affiliation_found = True
             break
-    
+
     # We may not have affiliations in the test data, so only assert if we found any
     if affiliation_found:
         assert affiliation_found, "No author with affiliation found"
@@ -84,14 +79,14 @@ def test_build_authors_with_corresponding_author(openalex_data, pi_finder):
     """Test that corresponding authors are correctly identified"""
     doi = "10.1038/srep45389"
     builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
-    
+
     # Process authors
     authors, corresponding_authors = builder.build_authors()
-    
+
     # Verify that corresponding authors were identified
     if len(corresponding_authors) > 0:
         assert len(corresponding_authors) > 0, "No corresponding authors identified"
-        
+
         # Check structure of corresponding authors
         for author in corresponding_authors:
             assert hasattr(author, "given_name")
@@ -103,7 +98,7 @@ def test_build_authors_with_corresponding_author(openalex_data, pi_finder):
 def test_build_authors_with_ror(openalex_data, pi_finder):
     """Test that ROR (Research Organization Registry) identifiers are correctly used when ror=True"""
     doi = "10.1038/srep45389"
-    
+
     # First confirm the sample data contains at least one institution with a ROR identifier
     has_ror_institution = False
     for authorship in openalex_data.get("authorships", []):
@@ -114,61 +109,61 @@ def test_build_authors_with_ror(openalex_data, pi_finder):
                 break
         if has_ror_institution:
             break
-    
+
     # Skip test if no ROR identifiers in sample data
     if not has_ror_institution:
         pytest.skip("Test data doesn't contain any ROR identifiers")
-    
+
     # Create builder with ror=True to enable ROR identifiers
     builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True)
-    
+
     # Get authors
     authors, _ = builder.build_authors()
-    
+
     # Verify we got authors back
     assert len(authors) > 0, "No authors were extracted from the test data"
-    
+
     # Check for at least one Institution with a ROR ID
     ror_found = False
     institution_with_ror = None
-    
+
     for author in authors:
         # Check if author has affiliation
         if not hasattr(author, 'affiliation') or not author.affiliation:
             continue
-        
+
         # Check if affiliation is an Institution with a ROR ID
         if not hasattr(author.affiliation, 'ror'):
             continue
-            
+
         # Check if ROR ID is present and contains "ror.org"
         if author.affiliation.ror and "ror.org" in author.affiliation.ror:
             ror_found = True
             institution_with_ror = author.affiliation
             break
-    
+
     # Verify ROR IDs are used when ror=True
     assert ror_found, "Expected at least one author with a ROR ID when ror=True"
-    
+
     # Check expanded_value in the affiliation field when ROR is used
     if institution_with_ror:
         # Get the affiliation field
         affiliation_field = institution_with_ror.affiliation_field()
-        
+
         # Verify it's set up correctly with the ROR ID as the value
         assert affiliation_field.value == institution_with_ror.ror
-        
+
         # Verify the expanded_value dictionary has the expected structure
         assert hasattr(affiliation_field, 'expanded_value')
         assert isinstance(affiliation_field.expanded_value, dict)
-        
+
         # Check specific fields in the expanded_value
         expanded_value = affiliation_field.expanded_value
         assert "scheme" in expanded_value
         assert expanded_value["scheme"] == "http://www.grid.ac/ontology/"
-        
+
         assert "termName" in expanded_value
         assert expanded_value["termName"] == institution_with_ror.display_name
-        
+
         assert "@type" in expanded_value
-        assert expanded_value["@type"] == "https://schema.org/Organization"
\ No newline at end of file
+        assert expanded_value["@type"] == "https://schema.org/Organization"
diff --git a/tests/test_doi2dataset.py b/tests/test_doi2dataset.py
index 65ceecb..e5515d8 100644
--- a/tests/test_doi2dataset.py
+++ b/tests/test_doi2dataset.py
@@ -3,21 +3,9 @@ import sys
 
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
 
-from doi2dataset import NameProcessor, Phase, sanitize_filename, validate_email_address
+from doi2dataset import NameProcessor, sanitize_filename, validate_email_address
 
 
-def test_phase_check_year():
-    """Test that check_year correctly determines if a year is within the phase boundaries."""
-    phase = Phase("TestPhase", 2000, 2010)
-    # Within boundaries
-    assert phase.check_year(2005) is True
-    # Outside boundaries
-    assert phase.check_year(1999) is False
-    assert phase.check_year(2011) is False
-    # Boundary cases
-    assert phase.check_year(2000) is True
-    assert phase.check_year(2010) is True
-
 def test_sanitize_filename():
     """Test the sanitize_filename function to convert DOI to a valid filename."""
     doi = "10.1234/abc.def"
diff --git a/tests/test_fetch_doi_mock.py b/tests/test_fetch_doi_mock.py
index e9f1f44..6e2745c 100644
--- a/tests/test_fetch_doi_mock.py
+++ b/tests/test_fetch_doi_mock.py
@@ -4,16 +4,15 @@ import os
 import pytest
 
 from doi2dataset import (
-    AbstractProcessor, 
+    AbstractProcessor,
     APIClient,
-    CitationBuilder, 
-    Config, 
-    License,
-    LicenseProcessor, 
+    CitationBuilder,
+    Config,
+    LicenseProcessor,
     MetadataProcessor,
     Person,
     PIFinder,
-    SubjectMapper
+    SubjectMapper,
 )
 
 
@@ -78,16 +77,16 @@ def test_openalex_abstract_extraction(mocker, fake_openalex_response):
     """Test the extraction of abstracts from OpenAlex inverted index data."""
     # Create API client for AbstractProcessor
     api_client = APIClient()
-    
+
     # Create processor
     processor = AbstractProcessor(api_client=api_client)
-    
+
     # Call the protected method directly with the fake response
     abstract_text = processor._get_openalex_abstract(fake_openalex_response)
-    
+
     # Verify abstract was extracted
     assert abstract_text is not None
-    
+
     # If abstract exists in the response, it should be properly extracted
     if 'abstract_inverted_index' in fake_openalex_response:
         assert len(abstract_text) > 0
@@ -97,15 +96,15 @@ def test_subject_mapper(fake_openalex_response):
     """Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
     # Extract topics from the OpenAlex response
     topics = fake_openalex_response.get("topics", [])
-    
+
     # Convert topics to strings - we'll use display_name
     topic_names = []
     if topics:
         topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
-    
+
     # Get subjects using the class method
     subjects = SubjectMapper.get_subjects({"topics": topics})
-    
+
     # Verify subjects were returned
     assert subjects is not None
     assert isinstance(subjects, list)
@@ -114,21 +113,21 @@ def test_subject_mapper(fake_openalex_response):
 def test_citation_builder(fake_openalex_response):
     """Test that the CitationBuilder correctly builds author information."""
     doi = "10.1038/srep45389"
-    
+
     # Mock PIFinder with an empty list of PIs
     pi_finder = PIFinder(pis=[])
-    
+
     # Create builder with required arguments
     builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
-    
+
     # Test building other IDs
     other_ids = builder.build_other_ids()
     assert isinstance(other_ids, list)
-    
+
     # Test building grants
     grants = builder.build_grants()
     assert isinstance(grants, list)
-    
+
     # Test building topics
     topics = builder.build_topics()
     assert isinstance(topics, list)
@@ -140,10 +139,10 @@ def test_license_processor(fake_openalex_response):
     license_data = {
         "primary_location": fake_openalex_response.get("primary_location", {})
     }
-    
+
     # Process the license
     license_obj = LicenseProcessor.process_license(license_data)
-    
+
     # Verify license processing
     assert license_obj is not None
     assert hasattr(license_obj, "name")
@@ -158,16 +157,15 @@ def test_pi_finder_find_by_orcid():
         given_name="Jon",
         orcid="0000-0000-0000-0000",
         email="jon.doe@iana.org",
-        affiliation="Institute of Science, Some University",
-        project=["Project A01"]
+        affiliation="Institute of Science, Some University"
     )
-    
+
     # Create PIFinder with our test PI
     finder = PIFinder(pis=[test_pi])
-    
+
     # Find PI by ORCID
     pi = finder._find_by_orcid("0000-0000-0000-0000")
-    
+
     # Verify the PI was found
     assert pi is not None
     assert pi.family_name == "Doe"
@@ -177,7 +175,7 @@ def test_pi_finder_find_by_orcid():
 def test_config_load_invalid_path():
     """Test that Config.load_config raises an error when an invalid path is provided."""
     invalid_path = "non_existent_config.yaml"
-    
+
     # Verify that attempting to load a non-existent config raises an error
     with pytest.raises(FileNotFoundError):
         Config.load_config(config_path=invalid_path)
@@ -186,20 +184,20 @@ def test_config_load_invalid_path():
 def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
     """Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
     doi = "10.1038/srep45389"
-    
+
     # Mock API response
-    mocker.patch("doi2dataset.APIClient.make_request", 
+    mocker.patch("doi2dataset.APIClient.make_request",
                  return_value=FakeResponse(fake_openalex_response, 200))
-    
+
     # Create processor with upload disabled and progress disabled
     processor = MetadataProcessor(doi=doi, upload=False, progress=False)
-    
+
     # Test the _fetch_data method directly
     data = processor._fetch_data()
-    
+
     # Verify that data was fetched correctly
     assert data is not None
     assert data == fake_openalex_response
-    
+
     # Verify the DOI is correctly stored
     assert processor.doi == doi
diff --git a/tests/test_metadata_processor.py b/tests/test_metadata_processor.py
index fcca30d..b8a3c62 100644
--- a/tests/test_metadata_processor.py
+++ b/tests/test_metadata_processor.py
@@ -1,7 +1,8 @@
 import json
 import os
+from unittest.mock import MagicMock
+
 import pytest
-from unittest.mock import MagicMock, patch
 
 from doi2dataset import MetadataProcessor
 
@@ -27,36 +28,35 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
     """Test that _build_metadata correctly extracts basic metadata fields"""
     # Mock the console to avoid print errors
     metadata_processor.console = MagicMock()
-    
+
     # Mock the Abstract related methods and objects to avoid console errors
     abstract_mock = MagicMock()
     abstract_mock.text = "This is a sample abstract"
     abstract_mock.source = "openalex"
     monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
-    
+
     # Mock the _fetch_data method to return our test data
     metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
-    
+
     # Mock methods that might cause issues in isolation
     metadata_processor._build_description = MagicMock(return_value="Test description")
     metadata_processor._get_involved_pis = MagicMock(return_value=[])
-    metadata_processor._build_organization_metadata = MagicMock(return_value={})
-    
+
     # Call the method we're testing
     metadata = metadata_processor._build_metadata(openalex_data)
 
     # Verify the basic metadata fields were extracted correctly
     assert metadata is not None
     assert 'datasetVersion' in metadata
-    
+
     # Examine the fields inside datasetVersion.metadataBlocks
     assert 'metadataBlocks' in metadata['datasetVersion']
     citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
-    
+
     # Check fields in citation section
     assert 'fields' in citation
     fields = citation['fields']
-    
+
     # Check for basic metadata fields in a more flexible way
     field_names = [field.get('typeName') for field in fields]
     assert 'title' in field_names
@@ -68,44 +68,43 @@ def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
     """Test that _build_metadata correctly processes author information"""
     # Mock the console to avoid print errors
     metadata_processor.console = MagicMock()
-    
+
     # Mock the Abstract related methods and objects to avoid console errors
     abstract_mock = MagicMock()
     abstract_mock.text = "This is a sample abstract"
     abstract_mock.source = "openalex"
     monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
-    
+
     # Mock the _fetch_data method to return our test data
     metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
-    
+
     # Mock methods that might cause issues in isolation
     metadata_processor._build_description = MagicMock(return_value="Test description")
     metadata_processor._get_involved_pis = MagicMock(return_value=[])
-    metadata_processor._build_organization_metadata = MagicMock(return_value={})
-    
+
     # Call the method we're testing
     metadata = metadata_processor._build_metadata(openalex_data)
 
     # Examine the fields inside datasetVersion.metadataBlocks
     assert 'metadataBlocks' in metadata['datasetVersion']
     citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
-    
+
     # Check fields in citation section
     assert 'fields' in citation
     fields = citation['fields']
-    
+
     # Check for author and datasetContact fields
     field_names = [field.get('typeName') for field in fields]
     assert 'author' in field_names
     assert 'datasetContact' in field_names
-    
+
     # Verify these are compound fields with actual entries
     for field in fields:
         if field.get('typeName') == 'author':
             assert 'value' in field
             assert isinstance(field['value'], list)
             assert len(field['value']) > 0
-        
+
         if field.get('typeName') == 'datasetContact':
             assert 'value' in field
             assert isinstance(field['value'], list)
@@ -117,46 +116,45 @@ def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, m
     """Test that _build_metadata correctly extracts keywords and topics"""
     # Mock the console to avoid print errors
     metadata_processor.console = MagicMock()
-    
+
     # Mock the Abstract related methods and objects to avoid console errors
     abstract_mock = MagicMock()
     abstract_mock.text = "This is a sample abstract"
     abstract_mock.source = "openalex"
     monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
-    
+
     # Mock the _fetch_data method to return our test data
     metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
-    
+
     # Mock methods that might cause issues in isolation
     metadata_processor._build_description = MagicMock(return_value="Test description")
     metadata_processor._get_involved_pis = MagicMock(return_value=[])
-    metadata_processor._build_organization_metadata = MagicMock(return_value={})
-    
+
     # Call the method we're testing
     metadata = metadata_processor._build_metadata(openalex_data)
-        
+
     # Examine the fields inside datasetVersion.metadataBlocks
     assert 'metadataBlocks' in metadata['datasetVersion']
     citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
-    
+
     # Check fields in citation section
     assert 'fields' in citation
     fields = citation['fields']
-    
+
     # Check for keyword and subject fields
     field_names = [field.get('typeName') for field in fields]
-    
+
     # If keywords exist, verify structure
     if 'keyword' in field_names:
         for field in fields:
             if field.get('typeName') == 'keyword':
                 assert 'value' in field
                 assert isinstance(field['value'], list)
-    
+
     # Check for subject field which should definitely exist
     assert 'subject' in field_names
     for field in fields:
         if field.get('typeName') == 'subject':
             assert 'value' in field
             assert isinstance(field['value'], list)
-            assert len(field['value']) > 0
\ No newline at end of file
+            assert len(field['value']) > 0
diff --git a/tests/test_person.py b/tests/test_person.py
index 3086088..2e1e030 100644
--- a/tests/test_person.py
+++ b/tests/test_person.py
@@ -1,5 +1,5 @@
-import pytest
-from doi2dataset import Person, Institution
+from doi2dataset import Institution, Person
+
 
 def test_person_to_dict_with_string_affiliation():
     """Test Person.to_dict() with a string affiliation."""
@@ -8,35 +8,32 @@ def test_person_to_dict_with_string_affiliation():
         given_name="John",
         orcid="0000-0001-2345-6789",
         email="john.doe@example.org",
-        affiliation="Test University",
-        project=["Project A"]
+        affiliation="Test University"
     )
-    
+
     result = person.to_dict()
-    
+
     assert result["family_name"] == "Doe"
     assert result["given_name"] == "John"
     assert result["orcid"] == "0000-0001-2345-6789"
     assert result["email"] == "john.doe@example.org"
-    assert result["project"] == ["Project A"]
     assert result["affiliation"] == "Test University"
 
 
 def test_person_to_dict_with_institution_ror():
     """Test Person.to_dict() with an Institution that has a ROR ID."""
     inst = Institution("Test University", "https://ror.org/12345")
-    
+
     person = Person(
         family_name="Doe",
         given_name="John",
         orcid="0000-0001-2345-6789",
         email="john.doe@example.org",
-        affiliation=inst,
-        project=["Project A"]
+        affiliation=inst
     )
-    
+
     result = person.to_dict()
-    
+
     assert result["affiliation"] == "https://ror.org/12345"
     # Check other fields too
     assert result["family_name"] == "Doe"
@@ -46,16 +43,16 @@ def test_person_to_dict_with_institution_ror():
 def test_person_to_dict_with_institution_display_name_only():
     """Test Person.to_dict() with an Institution that has only a display_name."""
     inst = Institution("Test University")  # No ROR ID
-    
+
     person = Person(
         family_name="Smith",
         given_name="Jane",
         orcid="0000-0001-9876-5432",
         affiliation=inst
     )
-    
+
     result = person.to_dict()
-    
+
     assert result["affiliation"] == "Test University"
     assert result["family_name"] == "Smith"
     assert result["given_name"] == "Jane"
@@ -65,15 +62,15 @@ def test_person_to_dict_with_empty_institution():
     """Test Person.to_dict() with an Institution that has neither ROR nor display_name."""
     # Create an Institution with empty values
     inst = Institution("")
-    
+
     person = Person(
         family_name="Brown",
         given_name="Robert",
         affiliation=inst
     )
-    
+
     result = person.to_dict()
-    
+
     assert result["affiliation"] == ""
     assert result["family_name"] == "Brown"
     assert result["given_name"] == "Robert"
@@ -86,10 +83,10 @@ def test_person_to_dict_with_no_affiliation():
         given_name="Alice",
         orcid="0000-0002-1111-2222"
     )
-    
+
     result = person.to_dict()
-    
+
     assert result["affiliation"] == ""
     assert result["family_name"] == "Green"
     assert result["given_name"] == "Alice"
-    assert result["orcid"] == "0000-0002-1111-2222"
\ No newline at end of file
+    assert result["orcid"] == "0000-0002-1111-2222"