import json import os import pytest from doi2dataset import ( AbstractProcessor, APIClient, CitationBuilder, Config, License, LicenseProcessor, MetadataProcessor, Person, PIFinder, SubjectMapper ) class FakeResponse: """ A fake response object to simulate an API response. """ def __init__(self, json_data, status_code=200): self._json = json_data self.status_code = status_code def json(self): return self._json def raise_for_status(self): pass @pytest.fixture(autouse=True) def load_config_test(): """ Automatically load the configuration from 'config_test.yaml' located in the same directory as this test file. """ config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml") Config.load_config(config_path=config_path) @pytest.fixture def fake_openalex_response(): """ Load the saved JSON response from the file 'srep45389.json' located in the same directory as this test file. """ json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") with open(json_path, "r", encoding="utf-8") as f: data = json.load(f) return data def test_fetch_doi_data_with_file(mocker, fake_openalex_response): """ Test fetching DOI metadata by simulating the API call with a locally saved JSON response. The APIClient.make_request method is patched to return a fake response built from the contents of 'srep45389.json', ensuring that the configuration is loaded from 'config_test.yaml'. """ doi = "10.1038/srep45389" fake_response = FakeResponse(fake_openalex_response, 200) # Patch the make_request method of APIClient to return our fake_response. mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response) # Instantiate MetadataProcessor without upload and progress. processor = MetadataProcessor(doi=doi, upload=False) # Call _fetch_data(), which should now return our fake JSON data. data = processor._fetch_data() # Verify that the fetched data matches the fake JSON data. assert data == fake_openalex_response def test_openalex_abstract_extraction(mocker, fake_openalex_response): """Test the extraction of abstracts from OpenAlex inverted index data.""" # Create API client for AbstractProcessor api_client = APIClient() # Create processor processor = AbstractProcessor(api_client=api_client) # Call the protected method directly with the fake response abstract_text = processor._get_openalex_abstract(fake_openalex_response) # Verify abstract was extracted assert abstract_text is not None # If abstract exists in the response, it should be properly extracted if 'abstract_inverted_index' in fake_openalex_response: assert len(abstract_text) > 0 def test_subject_mapper(fake_openalex_response): """Test that the SubjectMapper correctly maps OpenAlex topics to subjects.""" # Extract topics from the OpenAlex response topics = fake_openalex_response.get("topics", []) # Convert topics to strings - we'll use display_name topic_names = [] if topics: topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")] # Get subjects using the class method subjects = SubjectMapper.get_subjects({"topics": topics}) # Verify subjects were returned assert subjects is not None assert isinstance(subjects, list) def test_citation_builder(fake_openalex_response): """Test that the CitationBuilder correctly builds author information.""" doi = "10.1038/srep45389" # Mock PIFinder with an empty list of PIs pi_finder = PIFinder(pis=[]) # Create builder with required arguments builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder) # Test building other IDs other_ids = builder.build_other_ids() assert isinstance(other_ids, list) # Test building grants grants = builder.build_grants() assert isinstance(grants, list) # Test building topics topics = builder.build_topics() assert isinstance(topics, list) def test_license_processor(fake_openalex_response): """Test that the LicenseProcessor correctly identifies and processes licenses.""" # Create a simplified data structure that contains license info license_data = { "primary_location": fake_openalex_response.get("primary_location", {}) } # Process the license license_obj = LicenseProcessor.process_license(license_data) # Verify license processing assert license_obj is not None assert hasattr(license_obj, "name") assert hasattr(license_obj, "uri") def test_pi_finder_find_by_orcid(): """Test that PIFinder can find a PI by ORCID.""" # Create a Person object that matches the test config test_pi = Person( family_name="Doe", given_name="Jon", orcid="0000-0000-0000-0000", email="jon.doe@iana.org", affiliation="Institute of Science, Some University", project=["Project A01"] ) # Create PIFinder with our test PI finder = PIFinder(pis=[test_pi]) # Find PI by ORCID pi = finder._find_by_orcid("0000-0000-0000-0000") # Verify the PI was found assert pi is not None assert pi.family_name == "Doe" assert pi.given_name == "Jon" def test_config_load_invalid_path(): """Test that Config.load_config raises an error when an invalid path is provided.""" invalid_path = "non_existent_config.yaml" # Verify that attempting to load a non-existent config raises an error with pytest.raises(FileNotFoundError): Config.load_config(config_path=invalid_path) def test_metadata_processor_fetch_data(mocker, fake_openalex_response): """Test the _fetch_data method of the MetadataProcessor class with mocked responses.""" doi = "10.1038/srep45389" # Mock API response mocker.patch("doi2dataset.APIClient.make_request", return_value=FakeResponse(fake_openalex_response, 200)) # Create processor with upload disabled and progress disabled processor = MetadataProcessor(doi=doi, upload=False, progress=False) # Test the _fetch_data method directly data = processor._fetch_data() # Verify that data was fetched correctly assert data is not None assert data == fake_openalex_response # Verify the DOI is correctly stored assert processor.doi == doi