doi2dataset/tests/test_fetch_doi_mock.py

import json
import os

import pytest

from doi2dataset import (
    AbstractProcessor,
    APIClient,
    CitationBuilder,
    Config,
    License,
    LicenseProcessor,
    MetadataProcessor,
    Person,
    PIFinder,
    SubjectMapper
)


class FakeResponse:
    """
    A fake response object to simulate an API response.
    """
    def __init__(self, json_data, status_code=200):
        self._json = json_data
        self.status_code = status_code

    def json(self):
        return self._json

    def raise_for_status(self):
        pass

@pytest.fixture(autouse=True)
def load_config_test():
    """
    Automatically load the configuration from 'config_test.yaml'
    located in the same directory as this test file.
    """
    config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml")
    Config.load_config(config_path=config_path)

@pytest.fixture
def fake_openalex_response():
    """
    Load the saved JSON response from the file 'srep45389.json'
    located in the same directory as this test file.
    """
    json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data

def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
    """
    Test fetching DOI metadata by simulating the API call with a locally saved JSON response.

    The APIClient.make_request method is patched to return a fake response built from the contents
    of 'srep45389.json', ensuring that the configuration is loaded from 'config_test.yaml'.
    """
    doi = "10.1038/srep45389"
    fake_response = FakeResponse(fake_openalex_response, 200)

    # Patch the make_request method of APIClient to return our fake_response.
    mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response)

    # Instantiate MetadataProcessor without upload and progress.
    processor = MetadataProcessor(doi=doi, upload=False)

    # Call _fetch_data(), which should now return our fake JSON data.
    data = processor._fetch_data()

    # Verify that the fetched data matches the fake JSON data.
    assert data == fake_openalex_response


def test_openalex_abstract_extraction(mocker, fake_openalex_response):
    """Test the extraction of abstracts from OpenAlex inverted index data."""
    # Create API client for AbstractProcessor
    api_client = APIClient()

    # Create processor
    processor = AbstractProcessor(api_client=api_client)

    # Call the protected method directly with the fake response
    abstract_text = processor._get_openalex_abstract(fake_openalex_response)

    # Verify abstract was extracted
    assert abstract_text is not None

    # If abstract exists in the response, it should be properly extracted
    if 'abstract_inverted_index' in fake_openalex_response:
        assert len(abstract_text) > 0


def test_subject_mapper(fake_openalex_response):
    """Test that the SubjectMapper correctly maps OpenAlex topics to subjects."""
    # Extract topics from the OpenAlex response
    topics = fake_openalex_response.get("topics", [])

    # Convert topics to strings - we'll use display_name
    topic_names = []
    if topics:
        topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")]

    # Get subjects using the class method
    subjects = SubjectMapper.get_subjects({"topics": topics})

    # Verify subjects were returned
    assert subjects is not None
    assert isinstance(subjects, list)


def test_citation_builder(fake_openalex_response):
    """Test that the CitationBuilder correctly builds author information."""
    doi = "10.1038/srep45389"

    # Mock PIFinder with an empty list of PIs
    pi_finder = PIFinder(pis=[])

    # Create builder with required arguments
    builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder)

    # Test building other IDs
    other_ids = builder.build_other_ids()
    assert isinstance(other_ids, list)

    # Test building grants
    grants = builder.build_grants()
    assert isinstance(grants, list)

    # Test building topics
    topics = builder.build_topics()
    assert isinstance(topics, list)


def test_license_processor(fake_openalex_response):
    """Test that the LicenseProcessor correctly identifies and processes licenses."""
    # Create a simplified data structure that contains license info
    license_data = {
        "primary_location": fake_openalex_response.get("primary_location", {})
    }

    # Process the license
    license_obj = LicenseProcessor.process_license(license_data)

    # Verify license processing
    assert license_obj is not None
    assert hasattr(license_obj, "name")
    assert hasattr(license_obj, "uri")


def test_pi_finder_find_by_orcid():
    """Test that PIFinder can find a PI by ORCID."""
    # Create a Person object that matches the test config
    test_pi = Person(
        family_name="Doe",
        given_name="Jon",
        orcid="0000-0000-0000-0000",
        email="jon.doe@iana.org",
        affiliation="Institute of Science, Some University",
        project=["Project A01"]
    )

    # Create PIFinder with our test PI
    finder = PIFinder(pis=[test_pi])

    # Find PI by ORCID
    pi = finder._find_by_orcid("0000-0000-0000-0000")

    # Verify the PI was found
    assert pi is not None
    assert pi.family_name == "Doe"
    assert pi.given_name == "Jon"


def test_config_load_invalid_path():
    """Test that Config.load_config raises an error when an invalid path is provided."""
    invalid_path = "non_existent_config.yaml"

    # Verify that attempting to load a non-existent config raises an error
    with pytest.raises(FileNotFoundError):
        Config.load_config(config_path=invalid_path)


def test_metadata_processor_fetch_data(mocker, fake_openalex_response):
    """Test the _fetch_data method of the MetadataProcessor class with mocked responses."""
    doi = "10.1038/srep45389"

    # Mock API response
    mocker.patch("doi2dataset.APIClient.make_request",
                 return_value=FakeResponse(fake_openalex_response, 200))

    # Create processor with upload disabled and progress disabled
    processor = MetadataProcessor(doi=doi, upload=False, progress=False)

    # Test the _fetch_data method directly
    data = processor._fetch_data()

    # Verify that data was fetched correctly
    assert data is not None
    assert data == fake_openalex_response

    # Verify the DOI is correctly stored
    assert processor.doi == doi