test(metadata): add tests
- missing fieldsd - unknown license handling
This commit is contained in:
parent
e556d1be00
commit
f585cf436b
1 changed files with 72 additions and 0 deletions
|
@ -63,6 +63,78 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
|
|||
# Check for basic metadata fields in a more flexible way
|
||||
field_names = [field.get("typeName") for field in fields]
|
||||
assert "title" in field_names
|
||||
|
||||
|
||||
def test_build_metadata_missing_critical_fields(
|
||||
metadata_processor, openalex_data, monkeypatch
|
||||
):
|
||||
"""Test _build_metadata behavior when critical fields are missing"""
|
||||
|
||||
metadata_processor.console = MagicMock()
|
||||
data = openalex_data.copy()
|
||||
# Remove title and publicationDate to simulate missing fields
|
||||
if "title" in data["title"]:
|
||||
data.pop("title", None)
|
||||
if "publicationDate" in data:
|
||||
data.pop("publicationDate", None)
|
||||
|
||||
# Mock abstract retrieval
|
||||
abstract_mock = MagicMock()
|
||||
abstract_mock.text = "Abstract text"
|
||||
abstract_mock.source = "crossref"
|
||||
monkeypatch.setattr(
|
||||
"doi2dataset.AbstractProcessor.get_abstract",
|
||||
lambda *args, **kwargs: abstract_mock,
|
||||
)
|
||||
|
||||
metadata_processor._fetch_data = MagicMock(return_value=data)
|
||||
metadata_processor._build_description = MagicMock(return_value="Description text")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
|
||||
metadata = metadata_processor._build_metadata(data)
|
||||
|
||||
assert metadata is not None
|
||||
# It should still produce metadataVersion even with missing fields
|
||||
assert "datasetVersion" in metadata
|
||||
|
||||
|
||||
def test_license_processing_with_unknown_license(
|
||||
metadata_processor, openalex_data, monkeypatch
|
||||
):
|
||||
"""Test license processing when license info is missing or unknown"""
|
||||
|
||||
metadata_processor.console = MagicMock()
|
||||
data = openalex_data.copy()
|
||||
|
||||
# Modify license processing to simulate unknown license
|
||||
def fake_process_license(_):
|
||||
from doi2dataset.core.models import License
|
||||
|
||||
return License(name="", uri="", short="unknown")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"doi2dataset.LicenseProcessor.process_license", fake_process_license
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"doi2dataset.AbstractProcessor.get_abstract",
|
||||
lambda *args, **kwargs: MagicMock(text="Sample abstract", source="openalex"),
|
||||
)
|
||||
metadata_processor._fetch_data = MagicMock(return_value=data)
|
||||
metadata_processor._build_description = MagicMock(return_value="Description text")
|
||||
monkeypatch.setattr(metadata_processor, "_get_involved_pis", lambda _: [])
|
||||
|
||||
metadata = metadata_processor._build_metadata(data)
|
||||
|
||||
# It should return a metadata dict without errors even if license is unknown
|
||||
assert metadata is not None
|
||||
|
||||
citation = (
|
||||
metadata.get("datasetVersion", {}).get("metadataBlocks", {}).get("citation", {})
|
||||
)
|
||||
fields = citation.get("fields", [])
|
||||
field_names = [field.get("typeName") for field in fields]
|
||||
|
||||
assert "subject" in field_names
|
||||
assert (
|
||||
"dsDescription" in field_names
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue