11 changed files with 3 additions and 825 deletions
--- a/.coveragerc
+++ b/.coveragerc
@ -1,23 +0,0 @@
-[run]
-source = doi2dataset
-omit = 
-    */tests/*
-    */docs/*
-    setup.py
-    conf.py
-    __init__.py
-
-[report]
-exclude_lines =
-    pragma: no cover
-    def __repr__
-    if self.debug:
-    raise NotImplementedError
-    if __name__ == .__main__.:
-    pass
-    raise ImportError
-    except ImportError
-    def __str__
-
-[html]
-directory = htmlcov
--- a/README.md
+++ b/README.md
@ -69,94 +69,12 @@ Documentation is generated using Sphinx. See the `docs/` directory for detailed

 ## Testing

-## Testing
-
-Tests are implemented with pytest. The test suite provides comprehensive coverage of core functionalities. To run the tests, execute:
+Tests are implemented with pytest. To run the tests, execute:

 ```bash
 pytest
 ```

-Or using the Python module syntax:
-
-```bash
-python -m pytest
-```
-
-### Code Coverage
-
-The project includes code coverage analysis using pytest-cov. Current coverage is approximately 61% of the codebase, with key utilities and test infrastructure at 99-100% coverage.
-
-To run tests with code coverage analysis:
-
-```bash
-pytest --cov=.
-```
-
-Generate a detailed HTML coverage report:
-
-```bash
-pytest --cov=. --cov-report=html
-```
-
-This creates a `htmlcov` directory. Open `htmlcov/index.html` in a browser to view the detailed coverage report.
-
-A `.coveragerc` configuration file is provided that:
- Excludes test files, documentation, and boilerplate code from coverage analysis
- Configures reporting to ignore common non-testable lines (like defensive imports)
- Sets the output directory for HTML reports
-
-Recent improvements have increased coverage from 48% to 61% by adding focused tests for:
- Citation building functionality
- License processing and validation
- Metadata field extraction
- OpenAlex integration
- Publication data parsing and validation
-
-Areas that could benefit from additional testing:
- More edge cases in the MetadataProcessor class workflow
- Additional CitationBuilder scenarios with diverse inputs
- Complex network interactions and error handling
-
-### Test Structure
-
-The test suite is organized into six main files:
-
-1. **test_doi2dataset.py**: Basic tests for core functions like phase checking, name splitting and DOI validation.
-2. **test_fetch_doi_mock.py**: Tests API interactions using a mock OpenAlex response stored in `srep45389.json`.
-3. **test_citation_builder.py**: Tests for building citation metadata from API responses.
-4. **test_metadata_processor.py**: Tests for the metadata processing workflow.
-5. **test_license_processor.py**: Tests for license processing and validation.
-6. **test_publication_utils.py**: Tests for publication year extraction and date handling.
-
-### Test Categories
-
-The test suite covers the following categories of functionality:
-
-#### Core Functionality Tests
-
- **DOI Validation and Processing**: Parameterized tests for DOI normalization, validation, and filename sanitization with various inputs.
- **Phase Management**: Tests for checking publication year against defined project phases, including boundary cases.
- **Name Processing**: Extensive tests for parsing and splitting author names in different formats (with/without commas, middle initials, etc.).
- **Email Validation**: Tests for proper validation of email addresses with various domain configurations.
-
-#### API Integration Tests
-
- **Mock API Responses**: Tests that use a saved OpenAlex API response (`srep45389.json`) to simulate API interactions without making actual network requests.
- **Data Fetching**: Tests for retrieving and parsing data from the OpenAlex API.
- **Abstract Extraction**: Tests for extracting and cleaning abstracts from OpenAlex's inverted index format, including handling of empty or malformed abstracts.
- **Subject Mapping**: Tests for mapping OpenAlex topics to controlled vocabulary subject terms.
-
-#### Metadata Processing Tests
-
- **Citation Building**: Tests for properly building citation metadata from API responses.
- **License Processing**: Tests for correctly identifying and formatting license information from various license IDs.
- **Principal Investigator Matching**: Tests for finding project PIs based on ORCID identifiers.
- **Configuration Loading**: Tests for properly loading and validating configuration from files.
- **Metadata Workflow**: Tests for the complete metadata processing workflow.
-
-These tests ensure that all components work correctly in isolation and together as a system, with special attention to edge cases and error handling.
-
 ## Contributing

 Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
--- a/init.py
+++ b/init.py
@ -1,17 +0,0 @@
-# Import all classes and functions needed for testing
-from .doi2dataset import (
-    AbstractProcessor,
-    APIClient,
-    CitationBuilder,
-    Config,
-    License,
-    LicenseProcessor,
-    MetadataProcessor,
-    NameProcessor,
-    PIFinder,
-    Person, 
-    Phase,
-    SubjectMapper,
-    sanitize_filename,
-    validate_email_address,
-)
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -1,8 +0,0 @@
-import os
-import sys
-
-# Get the path to the parent directory of tests
-parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-
-# Add the parent directory to sys.path
-sys.path.insert(0, parent_dir)
--- a/tests/srep45389.json
+++ b/tests/srep45389.json
--- a/tests/test_citation_builder.py
+++ b/tests/test_citation_builder.py
@ -1,174 +0,0 @@
-import json
-import os
-import pytest
-from unittest.mock import MagicMock
-
-from doi2dataset import (
-    CitationBuilder,
-    PIFinder,
-    Person
-)
-
-
-@pytest.fixture
-def openalex_data():
-    """Load the saved JSON response from the file 'srep45389.json'"""
-    json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
-    with open(json_path, "r", encoding="utf-8") as f:
-        data = json.load(f)
-    return data
-
-
-@pytest.fixture
-def test_pi():
-    """Create a test PI for matching in tests"""
-    return Person(
-        family_name="Test",
-        given_name="Author",
-        orcid="0000-0000-0000-1234",
-        email="test.author@example.org",
-        affiliation="Test University",
-        project=["Test Project"]
-    )
-
-
-@pytest.fixture
-def pi_finder(test_pi):
-    """Create a PIFinder with a test PI"""
-    finder = PIFinder(pis=[test_pi])
-    return finder
-
-
-def test_build_authors(openalex_data, pi_finder):
-    """Test that CitationBuilder.build_authors correctly processes author information"""
-    doi = "10.1038/srep45389"
-    builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
-    
-    # Call the build_authors method - returns tuple of (authors, corresponding_authors)
-    authors, corresponding_authors = builder.build_authors()
-    
-    # Verify that authors were created
-    assert authors is not None
-    assert isinstance(authors, list)
-    assert len(authors) > 0
-    
-    # Check the structure of the authors
-    for author in authors:
-        assert hasattr(author, "given_name")
-        assert hasattr(author, "family_name")
-        assert isinstance(author.given_name, str)
-        assert isinstance(author.family_name, str)
-
-
-def test_build_authors_with_affiliations(openalex_data, pi_finder):
-    """Test that author affiliations are correctly processed"""
-    doi = "10.1038/srep45389"
-    builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
-    
-    # Call the build_authors method
-    authors, _ = builder.build_authors()
-    
-    # Check if any authors have affiliation
-    affiliation_found = False
-    for author in authors:
-        if hasattr(author, "affiliation") and author.affiliation:
-            affiliation_found = True
-            break
-    
-    # We may not have affiliations in the test data, so only assert if we found any
-    if affiliation_found:
-        assert affiliation_found, "No author with affiliation found"
-
-
-def test_build_authors_with_corresponding_author(openalex_data, pi_finder):
-    """Test that corresponding authors are correctly identified"""
-    doi = "10.1038/srep45389"
-    builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
-    
-    # Process authors
-    authors, corresponding_authors = builder.build_authors()
-    
-    # Verify that corresponding authors were identified
-    if len(corresponding_authors) > 0:
-        assert len(corresponding_authors) > 0, "No corresponding authors identified"
-        
-        # Check structure of corresponding authors
-        for author in corresponding_authors:
-            assert hasattr(author, "given_name")
-            assert hasattr(author, "family_name")
-            assert isinstance(author.given_name, str)
-            assert isinstance(author.family_name, str)
-
-
-def test_build_authors_with_ror(openalex_data, pi_finder):
-    """Test that ROR (Research Organization Registry) identifiers are correctly used when ror=True"""
-    doi = "10.1038/srep45389"
-    
-    # First confirm the sample data contains at least one institution with a ROR identifier
-    has_ror_institution = False
-    for authorship in openalex_data.get("authorships", []):
-        for institution in authorship.get("institutions", []):
-            ror_id = institution.get("ror")
-            if ror_id and "ror.org" in ror_id:
-                has_ror_institution = True
-                break
-        if has_ror_institution:
-            break
-    
-    # Skip test if no ROR identifiers in sample data
-    if not has_ror_institution:
-        pytest.skip("Test data doesn't contain any ROR identifiers")
-    
-    # Create builder with ror=True to enable ROR identifiers
-    builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True)
-    
-    # Get authors
-    authors, _ = builder.build_authors()
-    
-    # Verify we got authors back
-    assert len(authors) > 0, "No authors were extracted from the test data"
-    
-    # Check for at least one Institution with a ROR ID
-    ror_found = False
-    institution_with_ror = None
-    
-    for author in authors:
-        # Check if author has affiliation
-        if not hasattr(author, 'affiliation') or not author.affiliation:
-            continue
-        
-        # Check if affiliation is an Institution with a ROR ID
-        if not hasattr(author.affiliation, 'ror'):
-            continue
-            
-        # Check if ROR ID is present and contains "ror.org"
-        if author.affiliation.ror and "ror.org" in author.affiliation.ror:
-            ror_found = True
-            institution_with_ror = author.affiliation
-            break
-    
-    # Verify ROR IDs are used when ror=True
-    assert ror_found, "Expected at least one author with a ROR ID when ror=True"
-    
-    # Check expanded_value in the affiliation field when ROR is used
-    if institution_with_ror:
-        # Get the affiliation field
-        affiliation_field = institution_with_ror.affiliation_field()
-        
-        # Verify it's set up correctly with the ROR ID as the value
-        assert affiliation_field.value == institution_with_ror.ror
-        
-        # Verify the expanded_value dictionary has the expected structure
-        assert hasattr(affiliation_field, 'expanded_value')
-        assert isinstance(affiliation_field.expanded_value, dict)
-        
-        # Check specific fields in the expanded_value
-        expanded_value = affiliation_field.expanded_value
-        assert "scheme" in expanded_value
-        assert expanded_value["scheme"] == "http://www.grid.ac/ontology/"
-        
-        assert "termName" in expanded_value
-        assert expanded_value["termName"] == institution_with_ror.display_name
-        
-        assert "@type" in expanded_value
-        assert expanded_value["@type"] == "https://schema.org/Organization"
--- a/tests/test_fetch_doi_mock.py
+++ b/tests/test_fetch_doi_mock.py
@ -3,18 +3,7 @@ import os

 import pytest

-from doi2dataset import (
-    AbstractProcessor, 
-    APIClient,
-    CitationBuilder, 
-    Config, 
-    License,
-    LicenseProcessor, 
-    MetadataProcessor,
-    Person,
-    PIFinder,
-    SubjectMapper
-)
+from doi2dataset import Config, MetadataProcessor


 class FakeResponse:
@ -72,134 +61,3 @@ def test_fetch_doi_data_with_file(mocker, fake_openalex_response):

    # Verify that the fetched data matches the fake JSON data.
    assert data == fake_openalex_response
-
-
-def test_openalex_abstract_extraction(mocker, fake_openalex_response):
-    """Test the extraction of abstracts from OpenAlex inverted index data."""
-    # Create API client for AbstractProcessor
-    api_client = APIClient()
-    
-    # Create processor
-    processor = AbstractProcessor(api_client=api_client)
-    
-    # Call the protected method directly with the fake response
-    abstract_text = processor._get_openalex_abstract(fake_openalex_response)
-    
-    # Verify abstract was extracted
-    assert abstract_text is not None
-    
-    # If abstract exists in the response, it should be properly extracted
-    if 'abstract_inverted_index' in fake_openalex_response:
-        assert len(abstract_text) > 0
-
-
-def test_subject_mapper(fake_openalex_response):
-    """Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
-    # Extract topics from the OpenAlex response
-    topics = fake_openalex_response.get("topics", [])
-    
-    # Convert topics to strings - we'll use display_name
-    topic_names = []
-    if topics:
-        topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
-    
-    # Get subjects using the class method
-    subjects = SubjectMapper.get_subjects({"topics": topics})
-    
-    # Verify subjects were returned
-    assert subjects is not None
-    assert isinstance(subjects, list)
-
-
-def test_citation_builder(fake_openalex_response):
-    """Test that the CitationBuilder correctly builds author information."""
-    doi = "10.1038/srep45389"
-    
-    # Mock PIFinder with an empty list of PIs
-    pi_finder = PIFinder(pis=[])
-    
-    # Create builder with required arguments
-    builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
-    
-    # Test building other IDs
-    other_ids = builder.build_other_ids()
-    assert isinstance(other_ids, list)
-    
-    # Test building grants
-    grants = builder.build_grants()
-    assert isinstance(grants, list)
-    
-    # Test building topics
-    topics = builder.build_topics()
-    assert isinstance(topics, list)
-
-
-def test_license_processor(fake_openalex_response):
-    """Test that the LicenseProcessor correctly identifies and processes licenses."""
-    # Create a simplified data structure that contains license info
-    license_data = {
-        "primary_location": fake_openalex_response.get("primary_location", {})
-    }
-    
-    # Process the license
-    license_obj = LicenseProcessor.process_license(license_data)
-    
-    # Verify license processing
-    assert license_obj is not None
-    assert hasattr(license_obj, "name")
-    assert hasattr(license_obj, "uri")
-
-
-def test_pi_finder_find_by_orcid():
-    """Test that PIFinder can find a PI by ORCID."""
-    # Create a Person object that matches the test config
-    test_pi = Person(
-        family_name="Doe",
-        given_name="Jon",
-        orcid="0000-0000-0000-0000",
-        email="jon.doe@iana.org",
-        affiliation="Institute of Science, Some University",
-        project=["Project A01"]
-    )
-    
-    # Create PIFinder with our test PI
-    finder = PIFinder(pis=[test_pi])
-    
-    # Find PI by ORCID
-    pi = finder._find_by_orcid("0000-0000-0000-0000")
-    
-    # Verify the PI was found
-    assert pi is not None
-    assert pi.family_name == "Doe"
-    assert pi.given_name == "Jon"
-
-
-def test_config_load_invalid_path():
-    """Test that Config.load_config raises an error when an invalid path is provided."""
-    invalid_path = "non_existent_config.yaml"
-    
-    # Verify that attempting to load a non-existent config raises an error
-    with pytest.raises(FileNotFoundError):
-        Config.load_config(config_path=invalid_path)
-
-
-def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
-    """Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
-    doi = "10.1038/srep45389"
-    
-    # Mock API response
-    mocker.patch("doi2dataset.APIClient.make_request", 
-                 return_value=FakeResponse(fake_openalex_response, 200))
-    
-    # Create processor with upload disabled and progress disabled
-    processor = MetadataProcessor(doi=doi, upload=False, progress=False)
-    
-    # Test the _fetch_data method directly
-    data = processor._fetch_data()
-    
-    # Verify that data was fetched correctly
-    assert data is not None
-    assert data == fake_openalex_response
-    
-    # Verify the DOI is correctly stored
-    assert processor.doi == doi
--- a/tests/test_license_processor.py
+++ b/tests/test_license_processor.py
@ -1,62 +0,0 @@
-import pytest
-from doi2dataset import LicenseProcessor, License
-
-def test_license_processor_cc_by():
-    """Test processing a CC BY license"""
-    data = {
-        "primary_location": {
-            "license": "cc-by"
-        }
-    }
-    license_obj = LicenseProcessor.process_license(data)
-    assert isinstance(license_obj, License)
-    assert license_obj.short == "cc-by"
-    assert license_obj.name == "CC BY 4.0"
-    assert license_obj.uri == "https://creativecommons.org/licenses/by/4.0/"
-
-def test_license_processor_cc0():
-    """Test processing a CC0 license"""
-    data = {
-        "primary_location": {
-            "license": "cc0"
-        }
-    }
-    license_obj = LicenseProcessor.process_license(data)
-    assert isinstance(license_obj, License)
-    assert license_obj.short == "cc0"
-    assert license_obj.name == "CC0 1.0"
-    assert license_obj.uri == "https://creativecommons.org/publicdomain/zero/1.0/"
-
-def test_license_processor_unknown_license():
-    """Test processing an unknown license"""
-    data = {
-        "primary_location": {
-            "license": "unknown-license"
-        }
-    }
-    license_obj = LicenseProcessor.process_license(data)
-    assert isinstance(license_obj, License)
-    assert license_obj.short == "unknown-license"
-    # Verify properties exist and have expected values based on implementation
-    assert license_obj.name == "unknown-license" or license_obj.name == ""
-    assert hasattr(license_obj, "uri")
-
-def test_license_processor_no_license():
-    """Test processing with no license information"""
-    data = {
-        "primary_location": {}
-    }
-    license_obj = LicenseProcessor.process_license(data)
-    assert isinstance(license_obj, License)
-    assert license_obj.short == "unknown"
-    assert license_obj.name == ""
-    assert license_obj.uri == ""
-
-def test_license_processor_no_primary_location():
-    """Test processing with no primary location"""
-    data = {}
-    license_obj = LicenseProcessor.process_license(data)
-    assert isinstance(license_obj, License)
-    assert license_obj.short == "unknown"
-    assert license_obj.name == ""
-    assert license_obj.uri == ""
--- a/tests/test_metadata_processor.py
+++ b/tests/test_metadata_processor.py
@ -1,162 +0,0 @@
-import json
-import os
-import pytest
-from unittest.mock import MagicMock, patch
-
-from doi2dataset import MetadataProcessor
-
-
-@pytest.fixture
-def openalex_data():
-    """Load the saved JSON response from the file 'srep45389.json'"""
-    json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
-    with open(json_path, "r", encoding="utf-8") as f:
-        data = json.load(f)
-    return data
-
-
-@pytest.fixture
-def metadata_processor():
-    """Create a MetadataProcessor instance with mocked dependencies"""
-    doi = "10.1038/srep45389"
-    processor = MetadataProcessor(doi=doi, upload=False, progress=False)
-    return processor
-
-
-def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypatch):
-    """Test that _build_metadata correctly extracts basic metadata fields"""
-    # Mock the console to avoid print errors
-    metadata_processor.console = MagicMock()
-    
-    # Mock the Abstract related methods and objects to avoid console errors
-    abstract_mock = MagicMock()
-    abstract_mock.text = "This is a sample abstract"
-    abstract_mock.source = "openalex"
-    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
-    
-    # Mock the _fetch_data method to return our test data
-    metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
-    
-    # Mock methods that might cause issues in isolation
-    metadata_processor._build_description = MagicMock(return_value="Test description")
-    metadata_processor._get_involved_pis = MagicMock(return_value=[])
-    metadata_processor._build_organization_metadata = MagicMock(return_value={})
-    
-    # Call the method we're testing
-    metadata = metadata_processor._build_metadata(openalex_data)
-
-    # Verify the basic metadata fields were extracted correctly
-    assert metadata is not None
-    assert 'datasetVersion' in metadata
-    
-    # Examine the fields inside datasetVersion.metadataBlocks
-    assert 'metadataBlocks' in metadata['datasetVersion']
-    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
-    
-    # Check fields in citation section
-    assert 'fields' in citation
-    fields = citation['fields']
-    
-    # Check for basic metadata fields in a more flexible way
-    field_names = [field.get('typeName') for field in fields]
-    assert 'title' in field_names
-    assert 'subject' in field_names
-    assert 'dsDescription' in field_names  # Description is named 'dsDescription' in the schema
-
-
-def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
-    """Test that _build_metadata correctly processes author information"""
-    # Mock the console to avoid print errors
-    metadata_processor.console = MagicMock()
-    
-    # Mock the Abstract related methods and objects to avoid console errors
-    abstract_mock = MagicMock()
-    abstract_mock.text = "This is a sample abstract"
-    abstract_mock.source = "openalex"
-    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
-    
-    # Mock the _fetch_data method to return our test data
-    metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
-    
-    # Mock methods that might cause issues in isolation
-    metadata_processor._build_description = MagicMock(return_value="Test description")
-    metadata_processor._get_involved_pis = MagicMock(return_value=[])
-    metadata_processor._build_organization_metadata = MagicMock(return_value={})
-    
-    # Call the method we're testing
-    metadata = metadata_processor._build_metadata(openalex_data)
-
-    # Examine the fields inside datasetVersion.metadataBlocks
-    assert 'metadataBlocks' in metadata['datasetVersion']
-    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
-    
-    # Check fields in citation section
-    assert 'fields' in citation
-    fields = citation['fields']
-    
-    # Check for author and datasetContact fields
-    field_names = [field.get('typeName') for field in fields]
-    assert 'author' in field_names
-    assert 'datasetContact' in field_names
-    
-    # Verify these are compound fields with actual entries
-    for field in fields:
-        if field.get('typeName') == 'author':
-            assert 'value' in field
-            assert isinstance(field['value'], list)
-            assert len(field['value']) > 0
-        
-        if field.get('typeName') == 'datasetContact':
-            assert 'value' in field
-            assert isinstance(field['value'], list)
-            # The datasetContact might be empty in test environment
-            # Just check it exists rather than asserting length
-
-
-def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, monkeypatch):
-    """Test that _build_metadata correctly extracts keywords and topics"""
-    # Mock the console to avoid print errors
-    metadata_processor.console = MagicMock()
-    
-    # Mock the Abstract related methods and objects to avoid console errors
-    abstract_mock = MagicMock()
-    abstract_mock.text = "This is a sample abstract"
-    abstract_mock.source = "openalex"
-    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
-    
-    # Mock the _fetch_data method to return our test data
-    metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
-    
-    # Mock methods that might cause issues in isolation
-    metadata_processor._build_description = MagicMock(return_value="Test description")
-    metadata_processor._get_involved_pis = MagicMock(return_value=[])
-    metadata_processor._build_organization_metadata = MagicMock(return_value={})
-    
-    # Call the method we're testing
-    metadata = metadata_processor._build_metadata(openalex_data)
-        
-    # Examine the fields inside datasetVersion.metadataBlocks
-    assert 'metadataBlocks' in metadata['datasetVersion']
-    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
-    
-    # Check fields in citation section
-    assert 'fields' in citation
-    fields = citation['fields']
-    
-    # Check for keyword and subject fields
-    field_names = [field.get('typeName') for field in fields]
-    
-    # If keywords exist, verify structure
-    if 'keyword' in field_names:
-        for field in fields:
-            if field.get('typeName') == 'keyword':
-                assert 'value' in field
-                assert isinstance(field['value'], list)
-    
-    # Check for subject field which should definitely exist
-    assert 'subject' in field_names
-    for field in fields:
-        if field.get('typeName') == 'subject':
-            assert 'value' in field
-            assert isinstance(field['value'], list)
-            assert len(field['value']) > 0
--- a/tests/test_person.py
+++ b/tests/test_person.py
@ -1,95 +0,0 @@
-import pytest
-from doi2dataset import Person, Institution
-
-def test_person_to_dict_with_string_affiliation():
-    """Test Person.to_dict() with a string affiliation."""
-    person = Person(
-        family_name="Doe",
-        given_name="John",
-        orcid="0000-0001-2345-6789",
-        email="john.doe@example.org",
-        affiliation="Test University",
-        project=["Project A"]
-    )
-    
-    result = person.to_dict()
-    
-    assert result["family_name"] == "Doe"
-    assert result["given_name"] == "John"
-    assert result["orcid"] == "0000-0001-2345-6789"
-    assert result["email"] == "john.doe@example.org"
-    assert result["project"] == ["Project A"]
-    assert result["affiliation"] == "Test University"
-
-
-def test_person_to_dict_with_institution_ror():
-    """Test Person.to_dict() with an Institution that has a ROR ID."""
-    inst = Institution("Test University", "https://ror.org/12345")
-    
-    person = Person(
-        family_name="Doe",
-        given_name="John",
-        orcid="0000-0001-2345-6789",
-        email="john.doe@example.org",
-        affiliation=inst,
-        project=["Project A"]
-    )
-    
-    result = person.to_dict()
-    
-    assert result["affiliation"] == "https://ror.org/12345"
-    # Check other fields too
-    assert result["family_name"] == "Doe"
-    assert result["given_name"] == "John"
-
-
-def test_person_to_dict_with_institution_display_name_only():
-    """Test Person.to_dict() with an Institution that has only a display_name."""
-    inst = Institution("Test University")  # No ROR ID
-    
-    person = Person(
-        family_name="Smith",
-        given_name="Jane",
-        orcid="0000-0001-9876-5432",
-        affiliation=inst
-    )
-    
-    result = person.to_dict()
-    
-    assert result["affiliation"] == "Test University"
-    assert result["family_name"] == "Smith"
-    assert result["given_name"] == "Jane"
-
-
-def test_person_to_dict_with_empty_institution():
-    """Test Person.to_dict() with an Institution that has neither ROR nor display_name."""
-    # Create an Institution with empty values
-    inst = Institution("")
-    
-    person = Person(
-        family_name="Brown",
-        given_name="Robert",
-        affiliation=inst
-    )
-    
-    result = person.to_dict()
-    
-    assert result["affiliation"] == ""
-    assert result["family_name"] == "Brown"
-    assert result["given_name"] == "Robert"
-
-
-def test_person_to_dict_with_no_affiliation():
-    """Test Person.to_dict() with no affiliation."""
-    person = Person(
-        family_name="Green",
-        given_name="Alice",
-        orcid="0000-0002-1111-2222"
-    )
-    
-    result = person.to_dict()
-    
-    assert result["affiliation"] == ""
-    assert result["family_name"] == "Green"
-    assert result["given_name"] == "Alice"
-    assert result["orcid"] == "0000-0002-1111-2222"
--- a/tests/test_publication_utils.py
+++ b/tests/test_publication_utils.py
@ -1,57 +0,0 @@
-import json
-import os
-import pytest
-from unittest.mock import MagicMock
-
-from doi2dataset import MetadataProcessor
-
-@pytest.fixture
-def metadata_processor():
-    """Create a MetadataProcessor instance with mocked dependencies"""
-    doi = "10.1038/srep45389"
-    processor = MetadataProcessor(doi=doi, upload=False, progress=False)
-    # Mock the console to avoid print errors
-    processor.console = MagicMock()
-    return processor
-
-def test_get_publication_year_with_publication_year(metadata_processor):
-    """Test that _get_publication_year extracts year from publication_year field"""
-    data = {"publication_year": 2020}
-    year = metadata_processor._get_publication_year(data)
-    assert year == 2020
-
-def test_get_publication_year_with_date(metadata_processor):
-    """Test that _get_publication_year returns empty string when publication_year is missing"""
-    data = {"publication_date": "2019-05-15"}
-    year = metadata_processor._get_publication_year(data)
-    assert year == ""
-
-def test_get_publication_year_with_both_fields(metadata_processor):
-    """Test that _get_publication_year prioritizes publication_year over date"""
-    data = {
-        "publication_year": 2020,
-        "publication_date": "2019-05-15"
-    }
-    year = metadata_processor._get_publication_year(data)
-    assert year == 2020
-
-def test_get_publication_year_with_partial_date(metadata_processor):
-    """Test that _get_publication_year returns empty string when only publication_date is present"""
-    data = {"publication_date": "2018"}
-    year = metadata_processor._get_publication_year(data)
-    assert year == ""
-
-def test_get_publication_year_with_missing_data(metadata_processor):
-    """Test that _get_publication_year handles missing data"""
-    data = {"other_field": "value"}
-    year = metadata_processor._get_publication_year(data)
-    assert year == ""
-
-def test_get_publication_year_with_invalid_data(metadata_processor):
-    """Test that _get_publication_year returns whatever is in publication_year field"""
-    data = {
-        "publication_year": "not-a-year",
-        "publication_date": "invalid-date"
-    }
-    year = metadata_processor._get_publication_year(data)
-    assert year == "not-a-year"