feat!: generalize script by removing organizational metadata
All checks were successful
Test pipeline / test (push) Successful in 14s

Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration
to use 'default_grants' and simplify PI usage to fallback corresponding author determination only.

BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants'
instead of 'default_grant' - Generate only standard Dataverse citation metadata
This commit is contained in:
Alexander Minges 2025-07-07 14:41:39 +02:00
parent 01bc537bd8
commit 67b46d5140
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4
11 changed files with 207 additions and 269 deletions

View file

@ -1,13 +1,9 @@
import json
import os
import pytest
from unittest.mock import MagicMock
from doi2dataset import (
CitationBuilder,
PIFinder,
Person
)
import pytest
from doi2dataset import CitationBuilder, Person, PIFinder
@pytest.fixture
@ -27,8 +23,7 @@ def test_pi():
given_name="Author",
orcid="0000-0000-0000-1234",
email="test.author@example.org",
affiliation="Test University",
project=["Test Project"]
affiliation="Test University"
)
@ -43,15 +38,15 @@ def test_build_authors(openalex_data, pi_finder):
"""Test that CitationBuilder.build_authors correctly processes author information"""
doi = "10.1038/srep45389"
builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
# Call the build_authors method - returns tuple of (authors, corresponding_authors)
authors, corresponding_authors = builder.build_authors()
# Verify that authors were created
assert authors is not None
assert isinstance(authors, list)
assert len(authors) > 0
# Check the structure of the authors
for author in authors:
assert hasattr(author, "given_name")
@ -64,17 +59,17 @@ def test_build_authors_with_affiliations(openalex_data, pi_finder):
"""Test that author affiliations are correctly processed"""
doi = "10.1038/srep45389"
builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
# Call the build_authors method
authors, _ = builder.build_authors()
# Check if any authors have affiliation
affiliation_found = False
for author in authors:
if hasattr(author, "affiliation") and author.affiliation:
affiliation_found = True
break
# We may not have affiliations in the test data, so only assert if we found any
if affiliation_found:
assert affiliation_found, "No author with affiliation found"
@ -84,14 +79,14 @@ def test_build_authors_with_corresponding_author(openalex_data, pi_finder):
"""Test that corresponding authors are correctly identified"""
doi = "10.1038/srep45389"
builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder)
# Process authors
authors, corresponding_authors = builder.build_authors()
# Verify that corresponding authors were identified
if len(corresponding_authors) > 0:
assert len(corresponding_authors) > 0, "No corresponding authors identified"
# Check structure of corresponding authors
for author in corresponding_authors:
assert hasattr(author, "given_name")
@ -103,7 +98,7 @@ def test_build_authors_with_corresponding_author(openalex_data, pi_finder):
def test_build_authors_with_ror(openalex_data, pi_finder):
"""Test that ROR (Research Organization Registry) identifiers are correctly used when ror=True"""
doi = "10.1038/srep45389"
# First confirm the sample data contains at least one institution with a ROR identifier
has_ror_institution = False
for authorship in openalex_data.get("authorships", []):
@ -114,61 +109,61 @@ def test_build_authors_with_ror(openalex_data, pi_finder):
break
if has_ror_institution:
break
# Skip test if no ROR identifiers in sample data
if not has_ror_institution:
pytest.skip("Test data doesn't contain any ROR identifiers")
# Create builder with ror=True to enable ROR identifiers
builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True)
# Get authors
authors, _ = builder.build_authors()
# Verify we got authors back
assert len(authors) > 0, "No authors were extracted from the test data"
# Check for at least one Institution with a ROR ID
ror_found = False
institution_with_ror = None
for author in authors:
# Check if author has affiliation
if not hasattr(author, 'affiliation') or not author.affiliation:
continue
# Check if affiliation is an Institution with a ROR ID
if not hasattr(author.affiliation, 'ror'):
continue
# Check if ROR ID is present and contains "ror.org"
if author.affiliation.ror and "ror.org" in author.affiliation.ror:
ror_found = True
institution_with_ror = author.affiliation
break
# Verify ROR IDs are used when ror=True
assert ror_found, "Expected at least one author with a ROR ID when ror=True"
# Check expanded_value in the affiliation field when ROR is used
if institution_with_ror:
# Get the affiliation field
affiliation_field = institution_with_ror.affiliation_field()
# Verify it's set up correctly with the ROR ID as the value
assert affiliation_field.value == institution_with_ror.ror
# Verify the expanded_value dictionary has the expected structure
assert hasattr(affiliation_field, 'expanded_value')
assert isinstance(affiliation_field.expanded_value, dict)
# Check specific fields in the expanded_value
expanded_value = affiliation_field.expanded_value
assert "scheme" in expanded_value
assert expanded_value["scheme"] == "http://www.grid.ac/ontology/"
assert "termName" in expanded_value
assert expanded_value["termName"] == institution_with_ror.display_name
assert "@type" in expanded_value
assert expanded_value["@type"] == "https://schema.org/Organization"
assert expanded_value["@type"] == "https://schema.org/Organization"