feat!: generalize script by removing organizational metadata
All checks were successful
Test pipeline / test (push) Successful in 14s
All checks were successful
Test pipeline / test (push) Successful in 14s
Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration to use 'default_grants' and simplify PI usage to fallback corresponding author determination only. BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants' instead of 'default_grant' - Generate only standard Dataverse citation metadata
This commit is contained in:
parent
01bc537bd8
commit
67b46d5140
11 changed files with 207 additions and 269 deletions
|
@ -4,16 +4,15 @@ import os
|
|||
import pytest
|
||||
|
||||
from doi2dataset import (
|
||||
AbstractProcessor,
|
||||
AbstractProcessor,
|
||||
APIClient,
|
||||
CitationBuilder,
|
||||
Config,
|
||||
License,
|
||||
LicenseProcessor,
|
||||
CitationBuilder,
|
||||
Config,
|
||||
LicenseProcessor,
|
||||
MetadataProcessor,
|
||||
Person,
|
||||
PIFinder,
|
||||
SubjectMapper
|
||||
SubjectMapper,
|
||||
)
|
||||
|
||||
|
||||
|
@ -78,16 +77,16 @@ def test_openalex_abstract_extraction(mocker, fake_openalex_response):
|
|||
"""Test the extraction of abstracts from OpenAlex inverted index data."""
|
||||
# Create API client for AbstractProcessor
|
||||
api_client = APIClient()
|
||||
|
||||
|
||||
# Create processor
|
||||
processor = AbstractProcessor(api_client=api_client)
|
||||
|
||||
|
||||
# Call the protected method directly with the fake response
|
||||
abstract_text = processor._get_openalex_abstract(fake_openalex_response)
|
||||
|
||||
|
||||
# Verify abstract was extracted
|
||||
assert abstract_text is not None
|
||||
|
||||
|
||||
# If abstract exists in the response, it should be properly extracted
|
||||
if 'abstract_inverted_index' in fake_openalex_response:
|
||||
assert len(abstract_text) > 0
|
||||
|
@ -97,15 +96,15 @@ def test_subject_mapper(fake_openalex_response):
|
|||
"""Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
|
||||
# Extract topics from the OpenAlex response
|
||||
topics = fake_openalex_response.get("topics", [])
|
||||
|
||||
|
||||
# Convert topics to strings - we'll use display_name
|
||||
topic_names = []
|
||||
if topics:
|
||||
topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
|
||||
|
||||
|
||||
# Get subjects using the class method
|
||||
subjects = SubjectMapper.get_subjects({"topics": topics})
|
||||
|
||||
|
||||
# Verify subjects were returned
|
||||
assert subjects is not None
|
||||
assert isinstance(subjects, list)
|
||||
|
@ -114,21 +113,21 @@ def test_subject_mapper(fake_openalex_response):
|
|||
def test_citation_builder(fake_openalex_response):
|
||||
"""Test that the CitationBuilder correctly builds author information."""
|
||||
doi = "10.1038/srep45389"
|
||||
|
||||
|
||||
# Mock PIFinder with an empty list of PIs
|
||||
pi_finder = PIFinder(pis=[])
|
||||
|
||||
|
||||
# Create builder with required arguments
|
||||
builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
|
||||
|
||||
|
||||
# Test building other IDs
|
||||
other_ids = builder.build_other_ids()
|
||||
assert isinstance(other_ids, list)
|
||||
|
||||
|
||||
# Test building grants
|
||||
grants = builder.build_grants()
|
||||
assert isinstance(grants, list)
|
||||
|
||||
|
||||
# Test building topics
|
||||
topics = builder.build_topics()
|
||||
assert isinstance(topics, list)
|
||||
|
@ -140,10 +139,10 @@ def test_license_processor(fake_openalex_response):
|
|||
license_data = {
|
||||
"primary_location": fake_openalex_response.get("primary_location", {})
|
||||
}
|
||||
|
||||
|
||||
# Process the license
|
||||
license_obj = LicenseProcessor.process_license(license_data)
|
||||
|
||||
|
||||
# Verify license processing
|
||||
assert license_obj is not None
|
||||
assert hasattr(license_obj, "name")
|
||||
|
@ -158,16 +157,15 @@ def test_pi_finder_find_by_orcid():
|
|||
given_name="Jon",
|
||||
orcid="0000-0000-0000-0000",
|
||||
email="jon.doe@iana.org",
|
||||
affiliation="Institute of Science, Some University",
|
||||
project=["Project A01"]
|
||||
affiliation="Institute of Science, Some University"
|
||||
)
|
||||
|
||||
|
||||
# Create PIFinder with our test PI
|
||||
finder = PIFinder(pis=[test_pi])
|
||||
|
||||
|
||||
# Find PI by ORCID
|
||||
pi = finder._find_by_orcid("0000-0000-0000-0000")
|
||||
|
||||
|
||||
# Verify the PI was found
|
||||
assert pi is not None
|
||||
assert pi.family_name == "Doe"
|
||||
|
@ -177,7 +175,7 @@ def test_pi_finder_find_by_orcid():
|
|||
def test_config_load_invalid_path():
|
||||
"""Test that Config.load_config raises an error when an invalid path is provided."""
|
||||
invalid_path = "non_existent_config.yaml"
|
||||
|
||||
|
||||
# Verify that attempting to load a non-existent config raises an error
|
||||
with pytest.raises(FileNotFoundError):
|
||||
Config.load_config(config_path=invalid_path)
|
||||
|
@ -186,20 +184,20 @@ def test_config_load_invalid_path():
|
|||
def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
|
||||
"""Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
|
||||
doi = "10.1038/srep45389"
|
||||
|
||||
|
||||
# Mock API response
|
||||
mocker.patch("doi2dataset.APIClient.make_request",
|
||||
mocker.patch("doi2dataset.APIClient.make_request",
|
||||
return_value=FakeResponse(fake_openalex_response, 200))
|
||||
|
||||
|
||||
# Create processor with upload disabled and progress disabled
|
||||
processor = MetadataProcessor(doi=doi, upload=False, progress=False)
|
||||
|
||||
|
||||
# Test the _fetch_data method directly
|
||||
data = processor._fetch_data()
|
||||
|
||||
|
||||
# Verify that data was fetched correctly
|
||||
assert data is not None
|
||||
assert data == fake_openalex_response
|
||||
|
||||
|
||||
# Verify the DOI is correctly stored
|
||||
assert processor.doi == doi
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue