import json import os import pytest from unittest.mock import MagicMock from doi2dataset import ( CitationBuilder, PIFinder, Person ) @pytest.fixture def openalex_data(): """Load the saved JSON response from the file 'srep45389.json'""" json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") with open(json_path, "r", encoding="utf-8") as f: data = json.load(f) return data @pytest.fixture def test_pi(): """Create a test PI for matching in tests""" return Person( family_name="Test", given_name="Author", orcid="0000-0000-0000-1234", email="test.author@example.org", affiliation="Test University", project=["Test Project"] ) @pytest.fixture def pi_finder(test_pi): """Create a PIFinder with a test PI""" finder = PIFinder(pis=[test_pi]) return finder def test_build_authors(openalex_data, pi_finder): """Test that CitationBuilder.build_authors correctly processes author information""" doi = "10.1038/srep45389" builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) # Call the build_authors method - returns tuple of (authors, corresponding_authors) authors, corresponding_authors = builder.build_authors() # Verify that authors were created assert authors is not None assert isinstance(authors, list) assert len(authors) > 0 # Check the structure of the authors for author in authors: assert hasattr(author, "given_name") assert hasattr(author, "family_name") assert isinstance(author.given_name, str) assert isinstance(author.family_name, str) def test_build_authors_with_affiliations(openalex_data, pi_finder): """Test that author affiliations are correctly processed""" doi = "10.1038/srep45389" builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) # Call the build_authors method authors, _ = builder.build_authors() # Check if any authors have affiliation affiliation_found = False for author in authors: if hasattr(author, "affiliation") and author.affiliation: affiliation_found = True break # We may not have affiliations in the test data, so only assert if we found any if affiliation_found: assert affiliation_found, "No author with affiliation found" def test_build_authors_with_corresponding_author(openalex_data, pi_finder): """Test that corresponding authors are correctly identified""" doi = "10.1038/srep45389" builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) # Process authors authors, corresponding_authors = builder.build_authors() # Verify that corresponding authors were identified if len(corresponding_authors) > 0: assert len(corresponding_authors) > 0, "No corresponding authors identified" # Check structure of corresponding authors for author in corresponding_authors: assert hasattr(author, "given_name") assert hasattr(author, "family_name") assert isinstance(author.given_name, str) assert isinstance(author.family_name, str) def test_build_authors_with_ror(openalex_data, pi_finder): """Test that ROR (Research Organization Registry) identifiers are correctly used when ror=True""" doi = "10.1038/srep45389" # First confirm the sample data contains at least one institution with a ROR identifier has_ror_institution = False for authorship in openalex_data.get("authorships", []): for institution in authorship.get("institutions", []): ror_id = institution.get("ror") if ror_id and "ror.org" in ror_id: has_ror_institution = True break if has_ror_institution: break # Skip test if no ROR identifiers in sample data if not has_ror_institution: pytest.skip("Test data doesn't contain any ROR identifiers") # Create builder with ror=True to enable ROR identifiers builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True) # Get authors authors, _ = builder.build_authors() # Verify we got authors back assert len(authors) > 0, "No authors were extracted from the test data" # Check for at least one Institution with a ROR ID ror_found = False institution_with_ror = None for author in authors: # Check if author has affiliation if not hasattr(author, 'affiliation') or not author.affiliation: continue # Check if affiliation is an Institution with a ROR ID if not hasattr(author.affiliation, 'ror'): continue # Check if ROR ID is present and contains "ror.org" if author.affiliation.ror and "ror.org" in author.affiliation.ror: ror_found = True institution_with_ror = author.affiliation break # Verify ROR IDs are used when ror=True assert ror_found, "Expected at least one author with a ROR ID when ror=True" # Check expanded_value in the affiliation field when ROR is used if institution_with_ror: # Get the affiliation field affiliation_field = institution_with_ror.affiliation_field() # Verify it's set up correctly with the ROR ID as the value assert affiliation_field.value == institution_with_ror.ror # Verify the expanded_value dictionary has the expected structure assert hasattr(affiliation_field, 'expanded_value') assert isinstance(affiliation_field.expanded_value, dict) # Check specific fields in the expanded_value expanded_value = affiliation_field.expanded_value assert "scheme" in expanded_value assert expanded_value["scheme"] == "http://www.grid.ac/ontology/" assert "termName" in expanded_value assert expanded_value["termName"] == institution_with_ror.display_name assert "@type" in expanded_value assert expanded_value["@type"] == "https://schema.org/Organization"