doi2dataset/tests/test_fetch_doi_mock.py
Alexander Minges 67b46d5140
All checks were successful
Test pipeline / test (push) Successful in 14s
feat!: generalize script by removing organizational metadata
Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration
to use 'default_grants' and simplify PI usage to fallback corresponding author determination only.

BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants'
instead of 'default_grant' - Generate only standard Dataverse citation metadata
2025-07-07 14:41:39 +02:00

203 lines
6.3 KiB
Python

import json
import os
import pytest
from doi2dataset import (
AbstractProcessor,
APIClient,
CitationBuilder,
Config,
LicenseProcessor,
MetadataProcessor,
Person,
PIFinder,
SubjectMapper,
)
class FakeResponse:
"""
A fake response object to simulate an API response.
"""
def __init__(self, json_data, status_code=200):
self._json = json_data
self.status_code = status_code
def json(self):
return self._json
def raise_for_status(self):
pass
@pytest.fixture(autouse=True)
def load_config_test():
"""
Automatically load the configuration from 'config_test.yaml'
located in the same directory as this test file.
"""
config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml")
Config.load_config(config_path=config_path)
@pytest.fixture
def fake_openalex_response():
"""
Load the saved JSON response from the file 'srep45389.json'
located in the same directory as this test file.
"""
json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
return data
def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
"""
Test fetching DOI metadata by simulating the API call with a locally saved JSON response.
The APIClient.make_request method is patched to return a fake response built from the contents
of 'srep45389.json', ensuring that the configuration is loaded from 'config_test.yaml'.
"""
doi = "10.1038/srep45389"
fake_response = FakeResponse(fake_openalex_response, 200)
# Patch the make_request method of APIClient to return our fake_response.
mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response)
# Instantiate MetadataProcessor without upload and progress.
processor = MetadataProcessor(doi=doi, upload=False)
# Call _fetch_data(), which should now return our fake JSON data.
data = processor._fetch_data()
# Verify that the fetched data matches the fake JSON data.
assert data == fake_openalex_response
def test_openalex_abstract_extraction(mocker, fake_openalex_response):
"""Test the extraction of abstracts from OpenAlex inverted index data."""
# Create API client for AbstractProcessor
api_client = APIClient()
# Create processor
processor = AbstractProcessor(api_client=api_client)
# Call the protected method directly with the fake response
abstract_text = processor._get_openalex_abstract(fake_openalex_response)
# Verify abstract was extracted
assert abstract_text is not None
# If abstract exists in the response, it should be properly extracted
if 'abstract_inverted_index' in fake_openalex_response:
assert len(abstract_text) > 0
def test_subject_mapper(fake_openalex_response):
"""Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
# Extract topics from the OpenAlex response
topics = fake_openalex_response.get("topics", [])
# Convert topics to strings - we'll use display_name
topic_names = []
if topics:
topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
# Get subjects using the class method
subjects = SubjectMapper.get_subjects({"topics": topics})
# Verify subjects were returned
assert subjects is not None
assert isinstance(subjects, list)
def test_citation_builder(fake_openalex_response):
"""Test that the CitationBuilder correctly builds author information."""
doi = "10.1038/srep45389"
# Mock PIFinder with an empty list of PIs
pi_finder = PIFinder(pis=[])
# Create builder with required arguments
builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
# Test building other IDs
other_ids = builder.build_other_ids()
assert isinstance(other_ids, list)
# Test building grants
grants = builder.build_grants()
assert isinstance(grants, list)
# Test building topics
topics = builder.build_topics()
assert isinstance(topics, list)
def test_license_processor(fake_openalex_response):
"""Test that the LicenseProcessor correctly identifies and processes licenses."""
# Create a simplified data structure that contains license info
license_data = {
"primary_location": fake_openalex_response.get("primary_location", {})
}
# Process the license
license_obj = LicenseProcessor.process_license(license_data)
# Verify license processing
assert license_obj is not None
assert hasattr(license_obj, "name")
assert hasattr(license_obj, "uri")
def test_pi_finder_find_by_orcid():
"""Test that PIFinder can find a PI by ORCID."""
# Create a Person object that matches the test config
test_pi = Person(
family_name="Doe",
given_name="Jon",
orcid="0000-0000-0000-0000",
email="jon.doe@iana.org",
affiliation="Institute of Science, Some University"
)
# Create PIFinder with our test PI
finder = PIFinder(pis=[test_pi])
# Find PI by ORCID
pi = finder._find_by_orcid("0000-0000-0000-0000")
# Verify the PI was found
assert pi is not None
assert pi.family_name == "Doe"
assert pi.given_name == "Jon"
def test_config_load_invalid_path():
"""Test that Config.load_config raises an error when an invalid path is provided."""
invalid_path = "non_existent_config.yaml"
# Verify that attempting to load a non-existent config raises an error
with pytest.raises(FileNotFoundError):
Config.load_config(config_path=invalid_path)
def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
"""Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
doi = "10.1038/srep45389"
# Mock API response
mocker.patch("doi2dataset.APIClient.make_request",
return_value=FakeResponse(fake_openalex_response, 200))
# Create processor with upload disabled and progress disabled
processor = MetadataProcessor(doi=doi, upload=False, progress=False)
# Test the _fetch_data method directly
data = processor._fetch_data()
# Verify that data was fetched correctly
assert data is not None
assert data == fake_openalex_response
# Verify the DOI is correctly stored
assert processor.doi == doi