Add code coverage config and expand test suite

Adds .coveragerc configuration file to control coverage analysis settings.
Expands test suite with additional unit tests for AbstractProcessor,
SubjectMapper, CitationBuilder, LicenseProcessor, PIFinder, and
MetadataProcessor classes.

Updates README with comprehensive testing documentation, including
information about current code coverage (53%) and instructions for
running tests with coverage analysis.
This commit is contained in:
Alexander Minges 2025-05-20 14:02:30 +02:00
parent 2c88a76f4e
commit 1c84cae93b
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4
3 changed files with 227 additions and 2 deletions

View file

@ -3,7 +3,18 @@ import os
import pytest
from doi2dataset import Config, MetadataProcessor
from doi2dataset import (
AbstractProcessor,
APIClient,
CitationBuilder,
Config,
License,
LicenseProcessor,
MetadataProcessor,
Person,
PIFinder,
SubjectMapper
)
class FakeResponse:
@ -61,3 +72,134 @@ def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
# Verify that the fetched data matches the fake JSON data.
assert data == fake_openalex_response
def test_openalex_abstract_extraction(mocker, fake_openalex_response):
"""Test the extraction of abstracts from OpenAlex inverted index data."""
# Create API client for AbstractProcessor
api_client = APIClient()
# Create processor
processor = AbstractProcessor(api_client=api_client)
# Call the protected method directly with the fake response
abstract_text = processor._get_openalex_abstract(fake_openalex_response)
# Verify abstract was extracted
assert abstract_text is not None
# If abstract exists in the response, it should be properly extracted
if 'abstract_inverted_index' in fake_openalex_response:
assert len(abstract_text) > 0
def test_subject_mapper(fake_openalex_response):
"""Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
# Extract topics from the OpenAlex response
topics = fake_openalex_response.get("topics", [])
# Convert topics to strings - we'll use display_name
topic_names = []
if topics:
topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]
# Get subjects using the class method
subjects = SubjectMapper.get_subjects({"topics": topics})
# Verify subjects were returned
assert subjects is not None
assert isinstance(subjects, list)
def test_citation_builder(fake_openalex_response):
"""Test that the CitationBuilder correctly builds author information."""
doi = "10.1038/srep45389"
# Mock PIFinder with an empty list of PIs
pi_finder = PIFinder(pis=[])
# Create builder with required arguments
builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)
# Test building other IDs
other_ids = builder.build_other_ids()
assert isinstance(other_ids, list)
# Test building grants
grants = builder.build_grants()
assert isinstance(grants, list)
# Test building topics
topics = builder.build_topics()
assert isinstance(topics, list)
def test_license_processor(fake_openalex_response):
"""Test that the LicenseProcessor correctly identifies and processes licenses."""
# Create a simplified data structure that contains license info
license_data = {
"primary_location": fake_openalex_response.get("primary_location", {})
}
# Process the license
license_obj = LicenseProcessor.process_license(license_data)
# Verify license processing
assert license_obj is not None
assert hasattr(license_obj, "name")
assert hasattr(license_obj, "uri")
def test_pi_finder_find_by_orcid():
"""Test that PIFinder can find a PI by ORCID."""
# Create a Person object that matches the test config
test_pi = Person(
family_name="Doe",
given_name="Jon",
orcid="0000-0000-0000-0000",
email="jon.doe@iana.org",
affiliation="Institute of Science, Some University",
project=["Project A01"]
)
# Create PIFinder with our test PI
finder = PIFinder(pis=[test_pi])
# Find PI by ORCID
pi = finder._find_by_orcid("0000-0000-0000-0000")
# Verify the PI was found
assert pi is not None
assert pi.family_name == "Doe"
assert pi.given_name == "Jon"
def test_config_load_invalid_path():
"""Test that Config.load_config raises an error when an invalid path is provided."""
invalid_path = "non_existent_config.yaml"
# Verify that attempting to load a non-existent config raises an error
with pytest.raises(FileNotFoundError):
Config.load_config(config_path=invalid_path)
def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
"""Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
doi = "10.1038/srep45389"
# Mock API response
mocker.patch("doi2dataset.APIClient.make_request",
return_value=FakeResponse(fake_openalex_response, 200))
# Create processor with upload disabled and progress disabled
processor = MetadataProcessor(doi=doi, upload=False, progress=False)
# Test the _fetch_data method directly
data = processor._fetch_data()
# Verify that data was fetched correctly
assert data is not None
assert data == fake_openalex_response
# Verify the DOI is correctly stored
assert processor.doi == doi