From f585cf436bcd7f15dab4f4d606e4815c6a55e33e Mon Sep 17 00:00:00 2001 From: Alexander Minges Date: Tue, 22 Jul 2025 14:58:29 +0200 Subject: [PATCH] test(metadata): add tests - missing fieldsd - unknown license handling --- tests/test_metadata_processor.py | 72 ++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/test_metadata_processor.py b/tests/test_metadata_processor.py index e489150..adee531 100644 --- a/tests/test_metadata_processor.py +++ b/tests/test_metadata_processor.py @@ -63,6 +63,78 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa # Check for basic metadata fields in a more flexible way field_names = [field.get("typeName") for field in fields] assert "title" in field_names + + +def test_build_metadata_missing_critical_fields( + metadata_processor, openalex_data, monkeypatch +): + """Test _build_metadata behavior when critical fields are missing""" + + metadata_processor.console = MagicMock() + data = openalex_data.copy() + # Remove title and publicationDate to simulate missing fields + if "title" in data["title"]: + data.pop("title", None) + if "publicationDate" in data: + data.pop("publicationDate", None) + + # Mock abstract retrieval + abstract_mock = MagicMock() + abstract_mock.text = "Abstract text" + abstract_mock.source = "crossref" + monkeypatch.setattr( + "doi2dataset.AbstractProcessor.get_abstract", + lambda *args, **kwargs: abstract_mock, + ) + + metadata_processor._fetch_data = MagicMock(return_value=data) + metadata_processor._build_description = MagicMock(return_value="Description text") + metadata_processor._get_involved_pis = MagicMock(return_value=[]) + + metadata = metadata_processor._build_metadata(data) + + assert metadata is not None + # It should still produce metadataVersion even with missing fields + assert "datasetVersion" in metadata + + +def test_license_processing_with_unknown_license( + metadata_processor, openalex_data, monkeypatch +): + """Test license processing when license info is missing or unknown""" + + metadata_processor.console = MagicMock() + data = openalex_data.copy() + + # Modify license processing to simulate unknown license + def fake_process_license(_): + from doi2dataset.core.models import License + + return License(name="", uri="", short="unknown") + + monkeypatch.setattr( + "doi2dataset.LicenseProcessor.process_license", fake_process_license + ) + + monkeypatch.setattr( + "doi2dataset.AbstractProcessor.get_abstract", + lambda *args, **kwargs: MagicMock(text="Sample abstract", source="openalex"), + ) + metadata_processor._fetch_data = MagicMock(return_value=data) + metadata_processor._build_description = MagicMock(return_value="Description text") + monkeypatch.setattr(metadata_processor, "_get_involved_pis", lambda _: []) + + metadata = metadata_processor._build_metadata(data) + + # It should return a metadata dict without errors even if license is unknown + assert metadata is not None + + citation = ( + metadata.get("datasetVersion", {}).get("metadataBlocks", {}).get("citation", {}) + ) + fields = citation.get("fields", []) + field_names = [field.get("typeName") for field in fields] + assert "subject" in field_names assert ( "dsDescription" in field_names