Adds .coveragerc configuration file to control coverage analysis settings. Expands test suite with additional unit tests for AbstractProcessor, SubjectMapper, CitationBuilder, LicenseProcessor, PIFinder, and MetadataProcessor classes. Updates README with comprehensive testing documentation, including information about current code coverage (53%) and instructions for running tests with coverage analysis.
205 lines
6.5 KiB
Python
205 lines
6.5 KiB
Python
import json
|
|
import os
|
|
|
|
import pytest
|
|
|
|
from doi2dataset import (
|
|
AbstractProcessor,
|
|
APIClient,
|
|
CitationBuilder,
|
|
Config,
|
|
License,
|
|
LicenseProcessor,
|
|
MetadataProcessor,
|
|
Person,
|
|
PIFinder,
|
|
SubjectMapper
|
|
)
|
|
|
|
|
|
class FakeResponse:
|
|
"""
|
|
A fake response object to simulate an API response.
|
|
"""
|
|
def __init__(self, json_data, status_code=200):
|
|
self._json = json_data
|
|
self.status_code = status_code
|
|
|
|
def json(self):
|
|
return self._json
|
|
|
|
def raise_for_status(self):
|
|
pass
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def load_config_test():
|
|
"""
|
|
Automatically load the configuration from 'config_test.yaml'
|
|
located in the same directory as this test file.
|
|
"""
|
|
config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml")
|
|
Config.load_config(config_path=config_path)
|
|
|
|
@pytest.fixture
|
|
def fake_openalex_response():
|
|
"""
|
|
Load the saved JSON response from the file 'srep45389.json'
|
|
located in the same directory as this test file.
|
|
"""
|
|
json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
|
|
with open(json_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
return data
|
|
|
|
def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
|
|
"""
|
|
Test fetching DOI metadata by simulating the API call with a locally saved JSON response.
|
|
|
|
The APIClient.make_request method is patched to return a fake response built from the contents
|
|
of 'srep45389.json', ensuring that the configuration is loaded from 'config_test.yaml'.
|
|
"""
|
|
doi = "10.1038/srep45389"
|
|
fake_response = FakeResponse(fake_openalex_response, 200)
|
|
|
|
# Patch the make_request method of APIClient to return our fake_response.
|
|
mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response)
|
|
|
|
# Instantiate MetadataProcessor without upload and progress.
|
|
processor = MetadataProcessor(doi=doi, upload=False)
|
|
|
|
# Call _fetch_data(), which should now return our fake JSON data.
|
|
data = processor._fetch_data()
|
|
|
|
# Verify that the fetched data matches the fake JSON data.
|
|
assert data == fake_openalex_response
|
|
|
|
|
|
def test_openalex_abstract_extraction(mocker, fake_openalex_response):
|
|
"""Test the extraction of abstracts from OpenAlex inverted index data."""
|
|
# Create API client for AbstractProcessor
|
|
api_client = APIClient()
|
|
|
|
# Create processor
|
|
processor = AbstractProcessor(api_client=api_client)
|
|
|
|
# Call the protected method directly with the fake response
|
|
abstract_text = processor._get_openalex_abstract(fake_openalex_response)
|
|
|
|
# Verify abstract was extracted
|
|
assert abstract_text is not None
|
|
|
|
# If abstract exists in the response, it should be properly extracted
|
|
if 'abstract_inverted_index' in fake_openalex_response:
|
|
assert len(abstract_text) > 0
|
|
|
|
|
|
def test_subject_mapper(fake_openalex_response):
|
|
"""Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
|
|
# Extract topics from the OpenAlex response
|
|
topics = fake_openalex_response.get("topics", [])
|
|
|
|
# Convert topics to strings - we'll use display_name
|
|
topic_names = []
|
|
if topics:
|
|
topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
|
|
|
|
# Get subjects using the class method
|
|
subjects = SubjectMapper.get_subjects({"topics": topics})
|
|
|
|
# Verify subjects were returned
|
|
assert subjects is not None
|
|
assert isinstance(subjects, list)
|
|
|
|
|
|
def test_citation_builder(fake_openalex_response):
|
|
"""Test that the CitationBuilder correctly builds author information."""
|
|
doi = "10.1038/srep45389"
|
|
|
|
# Mock PIFinder with an empty list of PIs
|
|
pi_finder = PIFinder(pis=[])
|
|
|
|
# Create builder with required arguments
|
|
builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
|
|
|
|
# Test building other IDs
|
|
other_ids = builder.build_other_ids()
|
|
assert isinstance(other_ids, list)
|
|
|
|
# Test building grants
|
|
grants = builder.build_grants()
|
|
assert isinstance(grants, list)
|
|
|
|
# Test building topics
|
|
topics = builder.build_topics()
|
|
assert isinstance(topics, list)
|
|
|
|
|
|
def test_license_processor(fake_openalex_response):
|
|
"""Test that the LicenseProcessor correctly identifies and processes licenses."""
|
|
# Create a simplified data structure that contains license info
|
|
license_data = {
|
|
"primary_location": fake_openalex_response.get("primary_location", {})
|
|
}
|
|
|
|
# Process the license
|
|
license_obj = LicenseProcessor.process_license(license_data)
|
|
|
|
# Verify license processing
|
|
assert license_obj is not None
|
|
assert hasattr(license_obj, "name")
|
|
assert hasattr(license_obj, "uri")
|
|
|
|
|
|
def test_pi_finder_find_by_orcid():
|
|
"""Test that PIFinder can find a PI by ORCID."""
|
|
# Create a Person object that matches the test config
|
|
test_pi = Person(
|
|
family_name="Doe",
|
|
given_name="Jon",
|
|
orcid="0000-0000-0000-0000",
|
|
email="jon.doe@iana.org",
|
|
affiliation="Institute of Science, Some University",
|
|
project=["Project A01"]
|
|
)
|
|
|
|
# Create PIFinder with our test PI
|
|
finder = PIFinder(pis=[test_pi])
|
|
|
|
# Find PI by ORCID
|
|
pi = finder._find_by_orcid("0000-0000-0000-0000")
|
|
|
|
# Verify the PI was found
|
|
assert pi is not None
|
|
assert pi.family_name == "Doe"
|
|
assert pi.given_name == "Jon"
|
|
|
|
|
|
def test_config_load_invalid_path():
|
|
"""Test that Config.load_config raises an error when an invalid path is provided."""
|
|
invalid_path = "non_existent_config.yaml"
|
|
|
|
# Verify that attempting to load a non-existent config raises an error
|
|
with pytest.raises(FileNotFoundError):
|
|
Config.load_config(config_path=invalid_path)
|
|
|
|
|
|
def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
|
|
"""Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
|
|
doi = "10.1038/srep45389"
|
|
|
|
# Mock API response
|
|
mocker.patch("doi2dataset.APIClient.make_request",
|
|
return_value=FakeResponse(fake_openalex_response, 200))
|
|
|
|
# Create processor with upload disabled and progress disabled
|
|
processor = MetadataProcessor(doi=doi, upload=False, progress=False)
|
|
|
|
# Test the _fetch_data method directly
|
|
data = processor._fetch_data()
|
|
|
|
# Verify that data was fetched correctly
|
|
assert data is not None
|
|
assert data == fake_openalex_response
|
|
|
|
# Verify the DOI is correctly stored
|
|
assert processor.doi == doi
|