All checks were successful
Test pipeline / test (push) Successful in 14s
Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration to use 'default_grants' and simplify PI usage to fallback corresponding author determination only. BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants' instead of 'default_grant' - Generate only standard Dataverse citation metadata
203 lines
6.3 KiB
Python
203 lines
6.3 KiB
Python
import json
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from doi2dataset import (
|
|
AbstractProcessor,
|
|
APIClient,
|
|
CitationBuilder,
|
|
Config,
|
|
LicenseProcessor,
|
|
MetadataProcessor,
|
|
Person,
|
|
PIFinder,
|
|
SubjectMapper,
|
|
)
|
|
|
|
|
|
class FakeResponse:
|
|
"""
|
|
A fake response object to simulate an API response.
|
|
"""
|
|
def __init__(self, json_data, status_code=200):
|
|
self._json = json_data
|
|
self.status_code = status_code
|
|
|
|
def json(self):
|
|
return self._json
|
|
|
|
def raise_for_status(self):
|
|
pass
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def load_config_test():
|
|
"""
|
|
Automatically load the configuration from 'config_test.yaml'
|
|
located in the same directory as this test file.
|
|
"""
|
|
config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml")
|
|
Config.load_config(config_path=config_path)
|
|
|
|
@pytest.fixture
|
|
def fake_openalex_response():
|
|
"""
|
|
Load the saved JSON response from the file 'srep45389.json'
|
|
located in the same directory as this test file.
|
|
"""
|
|
json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
|
|
with open(json_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return data
|
|
|
|
def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
|
|
"""
|
|
Test fetching DOI metadata by simulating the API call with a locally saved JSON response.
|
|
|
|
The APIClient.make_request method is patched to return a fake response built from the contents
|
|
of 'srep45389.json', ensuring that the configuration is loaded from 'config_test.yaml'.
|
|
"""
|
|
doi = "10.1038/srep45389"
|
|
fake_response = FakeResponse(fake_openalex_response, 200)
|
|
|
|
# Patch the make_request method of APIClient to return our fake_response.
|
|
mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response)
|
|
|
|
# Instantiate MetadataProcessor without upload and progress.
|
|
processor = MetadataProcessor(doi=doi, upload=False)
|
|
|
|
# Call _fetch_data(), which should now return our fake JSON data.
|
|
data = processor._fetch_data()
|
|
|
|
# Verify that the fetched data matches the fake JSON data.
|
|
assert data == fake_openalex_response
|
|
|
|
|
|
def test_openalex_abstract_extraction(mocker, fake_openalex_response):
|
|
"""Test the extraction of abstracts from OpenAlex inverted index data."""
|
|
# Create API client for AbstractProcessor
|
|
api_client = APIClient()
|
|
|
|
# Create processor
|
|
processor = AbstractProcessor(api_client=api_client)
|
|
|
|
# Call the protected method directly with the fake response
|
|
abstract_text = processor._get_openalex_abstract(fake_openalex_response)
|
|
|
|
# Verify abstract was extracted
|
|
assert abstract_text is not None
|
|
|
|
# If abstract exists in the response, it should be properly extracted
|
|
if 'abstract_inverted_index' in fake_openalex_response:
|
|
assert len(abstract_text) > 0
|
|
|
|
|
|
def test_subject_mapper(fake_openalex_response):
|
|
"""Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
|
|
# Extract topics from the OpenAlex response
|
|
topics = fake_openalex_response.get("topics", [])
|
|
|
|
# Convert topics to strings - we'll use display_name
|
|
topic_names = []
|
|
if topics:
|
|
topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
|
|
|
|
# Get subjects using the class method
|
|
subjects = SubjectMapper.get_subjects({"topics": topics})
|
|
|
|
# Verify subjects were returned
|
|
assert subjects is not None
|
|
assert isinstance(subjects, list)
|
|
|
|
|
|
def test_citation_builder(fake_openalex_response):
|
|
"""Test that the CitationBuilder correctly builds author information."""
|
|
doi = "10.1038/srep45389"
|
|
|
|
# Mock PIFinder with an empty list of PIs
|
|
pi_finder = PIFinder(pis=[])
|
|
|
|
# Create builder with required arguments
|
|
builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
|
|
|
|
# Test building other IDs
|
|
other_ids = builder.build_other_ids()
|
|
assert isinstance(other_ids, list)
|
|
|
|
# Test building grants
|
|
grants = builder.build_grants()
|
|
assert isinstance(grants, list)
|
|
|
|
# Test building topics
|
|
topics = builder.build_topics()
|
|
assert isinstance(topics, list)
|
|
|
|
|
|
def test_license_processor(fake_openalex_response):
|
|
"""Test that the LicenseProcessor correctly identifies and processes licenses."""
|
|
# Create a simplified data structure that contains license info
|
|
license_data = {
|
|
"primary_location": fake_openalex_response.get("primary_location", {})
|
|
}
|
|
|
|
# Process the license
|
|
license_obj = LicenseProcessor.process_license(license_data)
|
|
|
|
# Verify license processing
|
|
assert license_obj is not None
|
|
assert hasattr(license_obj, "name")
|
|
assert hasattr(license_obj, "uri")
|
|
|
|
|
|
def test_pi_finder_find_by_orcid():
|
|
"""Test that PIFinder can find a PI by ORCID."""
|
|
# Create a Person object that matches the test config
|
|
test_pi = Person(
|
|
family_name="Doe",
|
|
given_name="Jon",
|
|
orcid="0000-0000-0000-0000",
|
|
email="jon.doe@iana.org",
|
|
affiliation="Institute of Science, Some University"
|
|
)
|
|
|
|
# Create PIFinder with our test PI
|
|
finder = PIFinder(pis=[test_pi])
|
|
|
|
# Find PI by ORCID
|
|
pi = finder._find_by_orcid("0000-0000-0000-0000")
|
|
|
|
# Verify the PI was found
|
|
assert pi is not None
|
|
assert pi.family_name == "Doe"
|
|
assert pi.given_name == "Jon"
|
|
|
|
|
|
def test_config_load_invalid_path():
|
|
"""Test that Config.load_config raises an error when an invalid path is provided."""
|
|
invalid_path = "non_existent_config.yaml"
|
|
|
|
# Verify that attempting to load a non-existent config raises an error
|
|
with pytest.raises(FileNotFoundError):
|
|
Config.load_config(config_path=invalid_path)
|
|
|
|
|
|
def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
|
|
"""Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
|
|
doi = "10.1038/srep45389"
|
|
|
|
# Mock API response
|
|
mocker.patch("doi2dataset.APIClient.make_request",
|
|
return_value=FakeResponse(fake_openalex_response, 200))
|
|
|
|
# Create processor with upload disabled and progress disabled
|
|
processor = MetadataProcessor(doi=doi, upload=False, progress=False)
|
|
|
|
# Test the _fetch_data method directly
|
|
data = processor._fetch_data()
|
|
|
|
# Verify that data was fetched correctly
|
|
assert data is not None
|
|
assert data == fake_openalex_response
|
|
|
|
# Verify the DOI is correctly stored
|
|
assert processor.doi == doi
|