Update testing documentation and improve test structure

2025-05-20 15:17:18 +02:00 · 2025-05-20 15:17:18 +02:00 · eb270cba9b
commit eb270cba9b
parent 1c84cae93b
9 changed files with 617 additions and 20 deletions
--- a/tests/test_metadata_processor.py
+++ b/tests/test_metadata_processor.py
@ -0,0 +1,162 @@
+import json
+import os
+import pytest
+from unittest.mock import MagicMock, patch
+
+from doi2dataset import MetadataProcessor
+
+
+@pytest.fixture
+def openalex_data():
+    """Load the saved JSON response from the file 'srep45389.json'"""
+    json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
+    with open(json_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    return data
+
+
+@pytest.fixture
+def metadata_processor():
+    """Create a MetadataProcessor instance with mocked dependencies"""
+    doi = "10.1038/srep45389"
+    processor = MetadataProcessor(doi=doi, upload=False, progress=False)
+    return processor
+
+
+def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypatch):
+    """Test that _build_metadata correctly extracts basic metadata fields"""
+    # Mock the console to avoid print errors
+    metadata_processor.console = MagicMock()
+    
+    # Mock the Abstract related methods and objects to avoid console errors
+    abstract_mock = MagicMock()
+    abstract_mock.text = "This is a sample abstract"
+    abstract_mock.source = "openalex"
+    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
+    
+    # Mock the _fetch_data method to return our test data
+    metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
+    
+    # Mock methods that might cause issues in isolation
+    metadata_processor._build_description = MagicMock(return_value="Test description")
+    metadata_processor._get_involved_pis = MagicMock(return_value=[])
+    metadata_processor._build_organization_metadata = MagicMock(return_value={})
+    
+    # Call the method we're testing
+    metadata = metadata_processor._build_metadata(openalex_data)
+
+    # Verify the basic metadata fields were extracted correctly
+    assert metadata is not None
+    assert 'datasetVersion' in metadata
+    
+    # Examine the fields inside datasetVersion.metadataBlocks
+    assert 'metadataBlocks' in metadata['datasetVersion']
+    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
+    
+    # Check fields in citation section
+    assert 'fields' in citation
+    fields = citation['fields']
+    
+    # Check for basic metadata fields in a more flexible way
+    field_names = [field.get('typeName') for field in fields]
+    assert 'title' in field_names
+    assert 'subject' in field_names
+    assert 'dsDescription' in field_names  # Description is named 'dsDescription' in the schema
+
+
+def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
+    """Test that _build_metadata correctly processes author information"""
+    # Mock the console to avoid print errors
+    metadata_processor.console = MagicMock()
+    
+    # Mock the Abstract related methods and objects to avoid console errors
+    abstract_mock = MagicMock()
+    abstract_mock.text = "This is a sample abstract"
+    abstract_mock.source = "openalex"
+    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
+    
+    # Mock the _fetch_data method to return our test data
+    metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
+    
+    # Mock methods that might cause issues in isolation
+    metadata_processor._build_description = MagicMock(return_value="Test description")
+    metadata_processor._get_involved_pis = MagicMock(return_value=[])
+    metadata_processor._build_organization_metadata = MagicMock(return_value={})
+    
+    # Call the method we're testing
+    metadata = metadata_processor._build_metadata(openalex_data)
+
+    # Examine the fields inside datasetVersion.metadataBlocks
+    assert 'metadataBlocks' in metadata['datasetVersion']
+    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
+    
+    # Check fields in citation section
+    assert 'fields' in citation
+    fields = citation['fields']
+    
+    # Check for author and datasetContact fields
+    field_names = [field.get('typeName') for field in fields]
+    assert 'author' in field_names
+    assert 'datasetContact' in field_names
+    
+    # Verify these are compound fields with actual entries
+    for field in fields:
+        if field.get('typeName') == 'author':
+            assert 'value' in field
+            assert isinstance(field['value'], list)
+            assert len(field['value']) > 0
+        
+        if field.get('typeName') == 'datasetContact':
+            assert 'value' in field
+            assert isinstance(field['value'], list)
+            # The datasetContact might be empty in test environment
+            # Just check it exists rather than asserting length
+
+
+def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, monkeypatch):
+    """Test that _build_metadata correctly extracts keywords and topics"""
+    # Mock the console to avoid print errors
+    metadata_processor.console = MagicMock()
+    
+    # Mock the Abstract related methods and objects to avoid console errors
+    abstract_mock = MagicMock()
+    abstract_mock.text = "This is a sample abstract"
+    abstract_mock.source = "openalex"
+    monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
+    
+    # Mock the _fetch_data method to return our test data
+    metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
+    
+    # Mock methods that might cause issues in isolation
+    metadata_processor._build_description = MagicMock(return_value="Test description")
+    metadata_processor._get_involved_pis = MagicMock(return_value=[])
+    metadata_processor._build_organization_metadata = MagicMock(return_value={})
+    
+    # Call the method we're testing
+    metadata = metadata_processor._build_metadata(openalex_data)
+        
+    # Examine the fields inside datasetVersion.metadataBlocks
+    assert 'metadataBlocks' in metadata['datasetVersion']
+    citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
+    
+    # Check fields in citation section
+    assert 'fields' in citation
+    fields = citation['fields']
+    
+    # Check for keyword and subject fields
+    field_names = [field.get('typeName') for field in fields]
+    
+    # If keywords exist, verify structure
+    if 'keyword' in field_names:
+        for field in fields:
+            if field.get('typeName') == 'keyword':
+                assert 'value' in field
+                assert isinstance(field['value'], list)
+    
+    # Check for subject field which should definitely exist
+    assert 'subject' in field_names
+    for field in fields:
+        if field.get('typeName') == 'subject':
+            assert 'value' in field
+            assert isinstance(field['value'], list)
+            assert len(field['value']) > 0