test(metadata): add tests

- missing fieldsd
- unknown license handling
This commit is contained in:
Alexander Minges 2025-07-22 14:58:29 +02:00
parent e556d1be00
commit f585cf436b
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4

View file

@ -63,6 +63,78 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
# Check for basic metadata fields in a more flexible way
field_names = [field.get("typeName") for field in fields]
assert "title" in field_names
def test_build_metadata_missing_critical_fields(
metadata_processor, openalex_data, monkeypatch
):
"""Test _build_metadata behavior when critical fields are missing"""
metadata_processor.console = MagicMock()
data = openalex_data.copy()
# Remove title and publicationDate to simulate missing fields
if "title" in data["title"]:
data.pop("title", None)
if "publicationDate" in data:
data.pop("publicationDate", None)
# Mock abstract retrieval
abstract_mock = MagicMock()
abstract_mock.text = "Abstract text"
abstract_mock.source = "crossref"
monkeypatch.setattr(
"doi2dataset.AbstractProcessor.get_abstract",
lambda *args, **kwargs: abstract_mock,
)
metadata_processor._fetch_data = MagicMock(return_value=data)
metadata_processor._build_description = MagicMock(return_value="Description text")
metadata_processor._get_involved_pis = MagicMock(return_value=[])
metadata = metadata_processor._build_metadata(data)
assert metadata is not None
# It should still produce metadataVersion even with missing fields
assert "datasetVersion" in metadata
def test_license_processing_with_unknown_license(
metadata_processor, openalex_data, monkeypatch
):
"""Test license processing when license info is missing or unknown"""
metadata_processor.console = MagicMock()
data = openalex_data.copy()
# Modify license processing to simulate unknown license
def fake_process_license(_):
from doi2dataset.core.models import License
return License(name="", uri="", short="unknown")
monkeypatch.setattr(
"doi2dataset.LicenseProcessor.process_license", fake_process_license
)
monkeypatch.setattr(
"doi2dataset.AbstractProcessor.get_abstract",
lambda *args, **kwargs: MagicMock(text="Sample abstract", source="openalex"),
)
metadata_processor._fetch_data = MagicMock(return_value=data)
metadata_processor._build_description = MagicMock(return_value="Description text")
monkeypatch.setattr(metadata_processor, "_get_involved_pis", lambda _: [])
metadata = metadata_processor._build_metadata(data)
# It should return a metadata dict without errors even if license is unknown
assert metadata is not None
citation = (
metadata.get("datasetVersion", {}).get("metadataBlocks", {}).get("citation", {})
)
fields = citation.get("fields", [])
field_names = [field.get("typeName") for field in fields]
assert "subject" in field_names
assert (
"dsDescription" in field_names