import json import os import pytest from unittest.mock import MagicMock from doi2dataset import MetadataProcessor @pytest.fixture def metadata_processor(): """Create a MetadataProcessor instance with mocked dependencies""" doi = "10.1038/srep45389" processor = MetadataProcessor(doi=doi, upload=False, progress=False) # Mock the console to avoid print errors processor.console = MagicMock() return processor def test_get_publication_year_with_publication_year(metadata_processor): """Test that _get_publication_year extracts year from publication_year field""" data = {"publication_year": 2020} year = metadata_processor._get_publication_year(data) assert year == 2020 def test_get_publication_year_with_date(metadata_processor): """Test that _get_publication_year returns empty string when publication_year is missing""" data = {"publication_date": "2019-05-15"} year = metadata_processor._get_publication_year(data) assert year == "" def test_get_publication_year_with_both_fields(metadata_processor): """Test that _get_publication_year prioritizes publication_year over date""" data = { "publication_year": 2020, "publication_date": "2019-05-15" } year = metadata_processor._get_publication_year(data) assert year == 2020 def test_get_publication_year_with_partial_date(metadata_processor): """Test that _get_publication_year returns empty string when only publication_date is present""" data = {"publication_date": "2018"} year = metadata_processor._get_publication_year(data) assert year == "" def test_get_publication_year_with_missing_data(metadata_processor): """Test that _get_publication_year handles missing data""" data = {"other_field": "value"} year = metadata_processor._get_publication_year(data) assert year == "" def test_get_publication_year_with_invalid_data(metadata_processor): """Test that _get_publication_year returns whatever is in publication_year field""" data = { "publication_year": "not-a-year", "publication_date": "invalid-date" } year = metadata_processor._get_publication_year(data) assert year == "not-a-year"