feat!: generalize script by removing organizational metadata
All checks were successful
Test pipeline / test (push) Successful in 14s

Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration
to use 'default_grants' and simplify PI usage to fallback corresponding author determination only.

BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants'
instead of 'default_grant' - Generate only standard Dataverse citation metadata
This commit is contained in:
Alexander Minges 2025-07-07 14:41:39 +02:00
parent 01bc537bd8
commit 67b46d5140
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4
11 changed files with 207 additions and 269 deletions

View file

@ -4,16 +4,15 @@ import os
import pytest
from doi2dataset import (
AbstractProcessor,
AbstractProcessor,
APIClient,
CitationBuilder,
Config,
License,
LicenseProcessor,
CitationBuilder,
Config,
LicenseProcessor,
MetadataProcessor,
Person,
PIFinder,
SubjectMapper
SubjectMapper,
)
@ -78,16 +77,16 @@ def test_openalex_abstract_extraction(mocker, fake_openalex_response):
"""Test the extraction of abstracts from OpenAlex inverted index data."""
# Create API client for AbstractProcessor
api_client = APIClient()
# Create processor
processor = AbstractProcessor(api_client=api_client)
# Call the protected method directly with the fake response
abstract_text = processor._get_openalex_abstract(fake_openalex_response)
# Verify abstract was extracted
assert abstract_text is not None
# If abstract exists in the response, it should be properly extracted
if 'abstract_inverted_index' in fake_openalex_response:
assert len(abstract_text) > 0
@ -97,15 +96,15 @@ def test_subject_mapper(fake_openalex_response):
"""Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
# Extract topics from the OpenAlex response
topics = fake_openalex_response.get("topics", [])
# Convert topics to strings - we'll use display_name
topic_names = []
if topics:
topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
# Get subjects using the class method
subjects = SubjectMapper.get_subjects({"topics": topics})
# Verify subjects were returned
assert subjects is not None
assert isinstance(subjects, list)
@ -114,21 +113,21 @@ def test_subject_mapper(fake_openalex_response):
def test_citation_builder(fake_openalex_response):
"""Test that the CitationBuilder correctly builds author information."""
doi = "10.1038/srep45389"
# Mock PIFinder with an empty list of PIs
pi_finder = PIFinder(pis=[])
# Create builder with required arguments
builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
# Test building other IDs
other_ids = builder.build_other_ids()
assert isinstance(other_ids, list)
# Test building grants
grants = builder.build_grants()
assert isinstance(grants, list)
# Test building topics
topics = builder.build_topics()
assert isinstance(topics, list)
@ -140,10 +139,10 @@ def test_license_processor(fake_openalex_response):
license_data = {
"primary_location": fake_openalex_response.get("primary_location", {})
}
# Process the license
license_obj = LicenseProcessor.process_license(license_data)
# Verify license processing
assert license_obj is not None
assert hasattr(license_obj, "name")
@ -158,16 +157,15 @@ def test_pi_finder_find_by_orcid():
given_name="Jon",
orcid="0000-0000-0000-0000",
email="jon.doe@iana.org",
affiliation="Institute of Science, Some University",
project=["Project A01"]
affiliation="Institute of Science, Some University"
)
# Create PIFinder with our test PI
finder = PIFinder(pis=[test_pi])
# Find PI by ORCID
pi = finder._find_by_orcid("0000-0000-0000-0000")
# Verify the PI was found
assert pi is not None
assert pi.family_name == "Doe"
@ -177,7 +175,7 @@ def test_pi_finder_find_by_orcid():
def test_config_load_invalid_path():
"""Test that Config.load_config raises an error when an invalid path is provided."""
invalid_path = "non_existent_config.yaml"
# Verify that attempting to load a non-existent config raises an error
with pytest.raises(FileNotFoundError):
Config.load_config(config_path=invalid_path)
@ -186,20 +184,20 @@ def test_config_load_invalid_path():
def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
"""Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
doi = "10.1038/srep45389"
# Mock API response
mocker.patch("doi2dataset.APIClient.make_request",
mocker.patch("doi2dataset.APIClient.make_request",
return_value=FakeResponse(fake_openalex_response, 200))
# Create processor with upload disabled and progress disabled
processor = MetadataProcessor(doi=doi, upload=False, progress=False)
# Test the _fetch_data method directly
data = processor._fetch_data()
# Verify that data was fetched correctly
assert data is not None
assert data == fake_openalex_response
# Verify the DOI is correctly stored
assert processor.doi == doi