feat!: generalize script by removing organizational metadata
All checks were successful
Test pipeline / test (push) Successful in 14s
All checks were successful
Test pipeline / test (push) Successful in 14s
Remove Phase class, organizational metadata blocks, and unused project fields. Update configuration to use 'default_grants' and simplify PI usage to fallback corresponding author determination only. BREAKING CHANGES: - Remove 'phase' and 'project' fields from configuration - Use 'default_grants' instead of 'default_grant' - Generate only standard Dataverse citation metadata
This commit is contained in:
parent
01bc537bd8
commit
67b46d5140
11 changed files with 207 additions and 269 deletions
|
@ -1,7 +1,8 @@
|
|||
import json
|
||||
import os
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from doi2dataset import MetadataProcessor
|
||||
|
||||
|
@ -27,36 +28,35 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
|
|||
"""Test that _build_metadata correctly extracts basic metadata fields"""
|
||||
# Mock the console to avoid print errors
|
||||
metadata_processor.console = MagicMock()
|
||||
|
||||
|
||||
# Mock the Abstract related methods and objects to avoid console errors
|
||||
abstract_mock = MagicMock()
|
||||
abstract_mock.text = "This is a sample abstract"
|
||||
abstract_mock.source = "openalex"
|
||||
monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
|
||||
|
||||
|
||||
# Mock the _fetch_data method to return our test data
|
||||
metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
|
||||
|
||||
|
||||
# Mock methods that might cause issues in isolation
|
||||
metadata_processor._build_description = MagicMock(return_value="Test description")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
metadata_processor._build_organization_metadata = MagicMock(return_value={})
|
||||
|
||||
|
||||
# Call the method we're testing
|
||||
metadata = metadata_processor._build_metadata(openalex_data)
|
||||
|
||||
# Verify the basic metadata fields were extracted correctly
|
||||
assert metadata is not None
|
||||
assert 'datasetVersion' in metadata
|
||||
|
||||
|
||||
# Examine the fields inside datasetVersion.metadataBlocks
|
||||
assert 'metadataBlocks' in metadata['datasetVersion']
|
||||
citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
|
||||
|
||||
|
||||
# Check fields in citation section
|
||||
assert 'fields' in citation
|
||||
fields = citation['fields']
|
||||
|
||||
|
||||
# Check for basic metadata fields in a more flexible way
|
||||
field_names = [field.get('typeName') for field in fields]
|
||||
assert 'title' in field_names
|
||||
|
@ -68,44 +68,43 @@ def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
|
|||
"""Test that _build_metadata correctly processes author information"""
|
||||
# Mock the console to avoid print errors
|
||||
metadata_processor.console = MagicMock()
|
||||
|
||||
|
||||
# Mock the Abstract related methods and objects to avoid console errors
|
||||
abstract_mock = MagicMock()
|
||||
abstract_mock.text = "This is a sample abstract"
|
||||
abstract_mock.source = "openalex"
|
||||
monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
|
||||
|
||||
|
||||
# Mock the _fetch_data method to return our test data
|
||||
metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
|
||||
|
||||
|
||||
# Mock methods that might cause issues in isolation
|
||||
metadata_processor._build_description = MagicMock(return_value="Test description")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
metadata_processor._build_organization_metadata = MagicMock(return_value={})
|
||||
|
||||
|
||||
# Call the method we're testing
|
||||
metadata = metadata_processor._build_metadata(openalex_data)
|
||||
|
||||
# Examine the fields inside datasetVersion.metadataBlocks
|
||||
assert 'metadataBlocks' in metadata['datasetVersion']
|
||||
citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
|
||||
|
||||
|
||||
# Check fields in citation section
|
||||
assert 'fields' in citation
|
||||
fields = citation['fields']
|
||||
|
||||
|
||||
# Check for author and datasetContact fields
|
||||
field_names = [field.get('typeName') for field in fields]
|
||||
assert 'author' in field_names
|
||||
assert 'datasetContact' in field_names
|
||||
|
||||
|
||||
# Verify these are compound fields with actual entries
|
||||
for field in fields:
|
||||
if field.get('typeName') == 'author':
|
||||
assert 'value' in field
|
||||
assert isinstance(field['value'], list)
|
||||
assert len(field['value']) > 0
|
||||
|
||||
|
||||
if field.get('typeName') == 'datasetContact':
|
||||
assert 'value' in field
|
||||
assert isinstance(field['value'], list)
|
||||
|
@ -117,46 +116,45 @@ def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, m
|
|||
"""Test that _build_metadata correctly extracts keywords and topics"""
|
||||
# Mock the console to avoid print errors
|
||||
metadata_processor.console = MagicMock()
|
||||
|
||||
|
||||
# Mock the Abstract related methods and objects to avoid console errors
|
||||
abstract_mock = MagicMock()
|
||||
abstract_mock.text = "This is a sample abstract"
|
||||
abstract_mock.source = "openalex"
|
||||
monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock)
|
||||
|
||||
|
||||
# Mock the _fetch_data method to return our test data
|
||||
metadata_processor._fetch_data = MagicMock(return_value=openalex_data)
|
||||
|
||||
|
||||
# Mock methods that might cause issues in isolation
|
||||
metadata_processor._build_description = MagicMock(return_value="Test description")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
metadata_processor._build_organization_metadata = MagicMock(return_value={})
|
||||
|
||||
|
||||
# Call the method we're testing
|
||||
metadata = metadata_processor._build_metadata(openalex_data)
|
||||
|
||||
|
||||
# Examine the fields inside datasetVersion.metadataBlocks
|
||||
assert 'metadataBlocks' in metadata['datasetVersion']
|
||||
citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {})
|
||||
|
||||
|
||||
# Check fields in citation section
|
||||
assert 'fields' in citation
|
||||
fields = citation['fields']
|
||||
|
||||
|
||||
# Check for keyword and subject fields
|
||||
field_names = [field.get('typeName') for field in fields]
|
||||
|
||||
|
||||
# If keywords exist, verify structure
|
||||
if 'keyword' in field_names:
|
||||
for field in fields:
|
||||
if field.get('typeName') == 'keyword':
|
||||
assert 'value' in field
|
||||
assert isinstance(field['value'], list)
|
||||
|
||||
|
||||
# Check for subject field which should definitely exist
|
||||
assert 'subject' in field_names
|
||||
for field in fields:
|
||||
if field.get('typeName') == 'subject':
|
||||
assert 'value' in field
|
||||
assert isinstance(field['value'], list)
|
||||
assert len(field['value']) > 0
|
||||
assert len(field['value']) > 0
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue