refactor: centralize constants and improve tests
- Centralize LICENSE_MAP, API_URLS, DERIVATIVE_ALLOWED_LICENSES, and TEMPLATES in core/constants.py - Replace custom HTTP_STATUS dict with Python's standard http.HTTPStatus enum - Add comprehensive tests for derivative license logic in AbstractProcessor - Add unit tests for DERIVATIVE_ALLOWED_LICENSES constant validation - Create helper function create_license_from_map() for consistent test data - Fix formatting inconsistencies in constants.py (remove double empty lines) - Use descriptive _mock_print variable names to satisfy ruff linting - Update CHANGELOG to reflect constants centralization and HTTPStatus usage This eliminates code duplication, improves maintainability, and ensures derivative license detection logic is properly tested.
This commit is contained in:
parent
df007b6076
commit
d660ed457e
7 changed files with 335 additions and 35 deletions
|
@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
### Changed
|
||||
|
||||
- Centralize ICONS definitions in core/constants.py module to eliminate code duplication
|
||||
- Centralize additional constants (LICENSE_MAP, API_URLS, etc.) in core/constants.py module
|
||||
- Remove hardcoded Creative Commons license mappings from processors.py
|
||||
- Remove hardcoded API URLs for OpenAlex and CrossRef from processing modules
|
||||
- Replace custom HTTP status code constants with Python's standard `http.HTTPStatus` enum
|
||||
- Remove hardcoded template strings throughout codebase
|
||||
|
||||
### Fixed
|
||||
|
||||
|
|
|
@ -51,7 +51,13 @@ from .core import (
|
|||
Person,
|
||||
PrimitiveMetadataField,
|
||||
)
|
||||
from .core.constants import ICONS
|
||||
from .core.constants import (
|
||||
API_URLS,
|
||||
DERIVATIVE_ALLOWED_LICENSES,
|
||||
ICONS,
|
||||
LICENSE_MAP,
|
||||
TEMPLATES,
|
||||
)
|
||||
from .processing import (
|
||||
CitationBuilder,
|
||||
MetadataProcessor,
|
||||
|
@ -83,6 +89,10 @@ __all__ = [
|
|||
"Abstract",
|
||||
# Constants
|
||||
"ICONS",
|
||||
"LICENSE_MAP",
|
||||
"API_URLS",
|
||||
"DERIVATIVE_ALLOWED_LICENSES",
|
||||
"TEMPLATES",
|
||||
# Metadata fields
|
||||
"BaseMetadataField",
|
||||
"PrimitiveMetadataField",
|
||||
|
|
|
@ -6,11 +6,17 @@ including license processing and abstract extraction/cleaning.
|
|||
"""
|
||||
|
||||
import re
|
||||
from http import HTTPStatus
|
||||
from typing import Any
|
||||
|
||||
from rich.console import Console
|
||||
|
||||
from ..core.constants import ICONS
|
||||
from ..core.constants import (
|
||||
API_URLS,
|
||||
DERIVATIVE_ALLOWED_LICENSES,
|
||||
ICONS,
|
||||
LICENSE_MAP,
|
||||
)
|
||||
from ..core.models import Abstract, License
|
||||
from .client import APIClient
|
||||
|
||||
|
@ -20,26 +26,6 @@ class LicenseProcessor:
|
|||
Processes license information from metadata.
|
||||
"""
|
||||
|
||||
LICENSE_MAP = {
|
||||
"cc-by": ("https://creativecommons.org/licenses/by/4.0/", "CC BY 4.0"),
|
||||
"cc-by-sa": ("https://creativecommons.org/licenses/by-sa/4.0/", "CC BY-SA 4.0"),
|
||||
"cc-by-nc": ("https://creativecommons.org/licenses/by-nc/4.0/", "CC BY-NC 4.0"),
|
||||
"cc-by-nc-sa": (
|
||||
"https://creativecommons.org/licenses/by-nc-sa/4.0/",
|
||||
"CC BY-NC-SA 4.0",
|
||||
),
|
||||
"cc-by-nc-nd": (
|
||||
"https://creativecommons.org/licenses/by-nc-nd/4.0/",
|
||||
"CC BY-NC-ND 4.0",
|
||||
),
|
||||
"cc-by-nd": ("https://creativecommons.org/licenses/by-nd/4.0/", "CC BY-ND 4.0"),
|
||||
"cc0": ("https://creativecommons.org/publicdomain/zero/1.0/", "CC0 1.0"),
|
||||
"pd": (
|
||||
"https://creativecommons.org/publicdomain/mark/1.0/",
|
||||
"Public Domain Mark 1.0",
|
||||
),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def process_license(cls, data: dict[str, Any]) -> License:
|
||||
"""
|
||||
|
@ -58,7 +44,7 @@ class LicenseProcessor:
|
|||
return License(name="", uri="", short="unknown")
|
||||
|
||||
base_license = license_short.split("/")[0].lower()
|
||||
uri, name = cls.LICENSE_MAP.get(base_license, ("", license_short))
|
||||
uri, name = LICENSE_MAP.get(base_license, ("", license_short))
|
||||
return License(name=name, uri=uri, short=license_short)
|
||||
|
||||
|
||||
|
@ -92,9 +78,7 @@ class AbstractProcessor:
|
|||
Returns:
|
||||
Abstract: The abstract with its source.
|
||||
"""
|
||||
license_ok = {"cc-by", "cc-by-sa", "cc-by-nc", "cc-by-nc-sa", "cc0", "pd"}
|
||||
|
||||
if license.short in license_ok:
|
||||
if license.short in DERIVATIVE_ALLOWED_LICENSES:
|
||||
self.console.print(
|
||||
f"\n{ICONS['info']} License {license.name} allows derivative works. Pulling abstract from CrossRef.",
|
||||
style="info",
|
||||
|
@ -144,10 +128,10 @@ class AbstractProcessor:
|
|||
Returns:
|
||||
str | None: The abstract if found, otherwise None.
|
||||
"""
|
||||
url = f"https://api.crossref.org/works/{doi}"
|
||||
url = f"{API_URLS['crossref_base']}{doi}"
|
||||
response = self.api_client.make_request(url)
|
||||
|
||||
if response and response.status_code == 200:
|
||||
if response and response.status_code == HTTPStatus.OK:
|
||||
abstract_raw = response.json().get("message", {}).get("abstract")
|
||||
return self._clean_jats(abstract_raw)
|
||||
return None
|
||||
|
|
|
@ -41,3 +41,45 @@ EMOJI_ICONS = {
|
|||
|
||||
# Default icon set preference
|
||||
DEFAULT_ICONS = ICONS
|
||||
|
||||
# API endpoint URLs
|
||||
API_URLS = {
|
||||
"openalex_base": "https://api.openalex.org/works/https://doi.org/",
|
||||
"crossref_base": "https://api.crossref.org/works/",
|
||||
}
|
||||
|
||||
# License mapping for Creative Commons and public domain licenses
|
||||
LICENSE_MAP = {
|
||||
"cc-by": ("https://creativecommons.org/licenses/by/4.0/", "CC BY 4.0"),
|
||||
"cc-by-sa": ("https://creativecommons.org/licenses/by-sa/4.0/", "CC BY-SA 4.0"),
|
||||
"cc-by-nc": ("https://creativecommons.org/licenses/by-nc/4.0/", "CC BY-NC 4.0"),
|
||||
"cc-by-nc-sa": (
|
||||
"https://creativecommons.org/licenses/by-nc-sa/4.0/",
|
||||
"CC BY-NC-SA 4.0",
|
||||
),
|
||||
"cc-by-nc-nd": (
|
||||
"https://creativecommons.org/licenses/by-nc-nd/4.0/",
|
||||
"CC BY-NC-ND 4.0",
|
||||
),
|
||||
"cc-by-nd": ("https://creativecommons.org/licenses/by-nd/4.0/", "CC BY-ND 4.0"),
|
||||
"cc0": ("https://creativecommons.org/publicdomain/zero/1.0/", "CC0 1.0"),
|
||||
"pd": (
|
||||
"https://creativecommons.org/publicdomain/mark/1.0/",
|
||||
"Public Domain Mark 1.0",
|
||||
),
|
||||
}
|
||||
|
||||
# Licenses that allow derivative works (for abstract extraction)
|
||||
DERIVATIVE_ALLOWED_LICENSES = {
|
||||
"cc-by",
|
||||
"cc-by-sa",
|
||||
"cc-by-nc",
|
||||
"cc-by-nc-sa",
|
||||
"cc0",
|
||||
"pd",
|
||||
}
|
||||
|
||||
# Template strings
|
||||
TEMPLATES = {
|
||||
"copyright_todo": "All rights reserved. Copyright © {year}, [TODO: Insert copyright holder here!]",
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ of processing DOIs: fetching data, building metadata, and optionally uploading t
|
|||
|
||||
import json
|
||||
import warnings
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
@ -16,7 +17,7 @@ from rich.progress import Progress, TaskID
|
|||
from ..api.client import APIClient
|
||||
from ..api.processors import AbstractProcessor, LicenseProcessor
|
||||
from ..core.config import Config
|
||||
from ..core.constants import ICONS
|
||||
from ..core.constants import API_URLS, ICONS, TEMPLATES
|
||||
from ..core.metadata_fields import (
|
||||
CompoundMetadataField,
|
||||
ControlledVocabularyMetadataField,
|
||||
|
@ -190,10 +191,10 @@ class MetadataProcessor:
|
|||
Raises:
|
||||
ValueError: If data fetching fails.
|
||||
"""
|
||||
url = f"https://api.openalex.org/works/https://doi.org/{self.doi}"
|
||||
url = f"{API_URLS['openalex_base']}{self.doi}"
|
||||
response = self.api_client.make_request(url)
|
||||
|
||||
if response is None or response.status_code != 200:
|
||||
if response is None or response.status_code != HTTPStatus.OK:
|
||||
self.console.print(
|
||||
f"\n{ICONS['error']} Failed to fetch data for DOI: {self.doi}",
|
||||
style="error",
|
||||
|
@ -319,9 +320,9 @@ class MetadataProcessor:
|
|||
"uri": license_info.uri,
|
||||
}
|
||||
else:
|
||||
return_dict["datasetVersion"]["termsOfUse"] = (
|
||||
f"All rights reserved. Copyright © {self._get_publication_year(data)}, [TODO: Insert copyright holder here!]"
|
||||
)
|
||||
return_dict["datasetVersion"]["termsOfUse"] = TEMPLATES[
|
||||
"copyright_todo"
|
||||
].format(year=self._get_publication_year(data))
|
||||
|
||||
return return_dict
|
||||
|
||||
|
|
206
tests/test_abstract_processor.py
Normal file
206
tests/test_abstract_processor.py
Normal file
|
@ -0,0 +1,206 @@
|
|||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from doi2dataset import DERIVATIVE_ALLOWED_LICENSES, LICENSE_MAP, License
|
||||
from doi2dataset.api.client import APIClient
|
||||
from doi2dataset.api.processors import AbstractProcessor
|
||||
|
||||
|
||||
def create_license_from_map(license_short: str) -> License:
|
||||
"""Helper function to create License objects from LICENSE_MAP"""
|
||||
if license_short in LICENSE_MAP:
|
||||
uri, name = LICENSE_MAP[license_short]
|
||||
return License(name=name, uri=uri, short=license_short)
|
||||
else:
|
||||
# For unknown licenses not in the map
|
||||
return License(name="Unknown License", uri="", short=license_short)
|
||||
|
||||
|
||||
class TestAbstractProcessor:
|
||||
"""Test cases for AbstractProcessor derivative license logic"""
|
||||
|
||||
def setup_method(self):
|
||||
"""Setup test fixtures"""
|
||||
self.api_client = APIClient()
|
||||
self.processor = AbstractProcessor(self.api_client)
|
||||
|
||||
def test_derivative_allowed_license_uses_crossref(self):
|
||||
"""Test that licenses allowing derivatives attempt CrossRef first"""
|
||||
# Create a license that allows derivatives using LICENSE_MAP
|
||||
license_obj = create_license_from_map("cc-by")
|
||||
|
||||
# Mock the CrossRef method to return an abstract and console output
|
||||
with patch.object(
|
||||
self.processor,
|
||||
"_get_crossref_abstract",
|
||||
return_value="CrossRef abstract text",
|
||||
) as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor, "_get_openalex_abstract"
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
result = self.processor.get_abstract(
|
||||
"10.1234/test", {}, license_obj
|
||||
)
|
||||
|
||||
# Should call CrossRef and get result
|
||||
mock_crossref.assert_called_once_with("10.1234/test")
|
||||
mock_openalex.assert_not_called()
|
||||
assert result.text == "CrossRef abstract text"
|
||||
assert result.source == "crossref"
|
||||
|
||||
def test_derivative_not_allowed_license_uses_openalex(self):
|
||||
"""Test that licenses not allowing derivatives use OpenAlex reconstruction"""
|
||||
# Create a license that does not allow derivatives using LICENSE_MAP
|
||||
license_obj = create_license_from_map("cc-by-nd")
|
||||
|
||||
# Mock the OpenAlex method to return an abstract
|
||||
with patch.object(self.processor, "_get_crossref_abstract") as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor,
|
||||
"_get_openalex_abstract",
|
||||
return_value="OpenAlex reconstructed text",
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
result = self.processor.get_abstract(
|
||||
"10.1234/test", {}, license_obj
|
||||
)
|
||||
|
||||
# Should skip CrossRef and use OpenAlex
|
||||
mock_crossref.assert_not_called()
|
||||
mock_openalex.assert_called_once_with({})
|
||||
assert result.text == "OpenAlex reconstructed text"
|
||||
assert result.source == "openalex"
|
||||
|
||||
def test_unknown_license_uses_openalex(self):
|
||||
"""Test that unknown licenses default to OpenAlex reconstruction"""
|
||||
# Create an unknown license (not in LICENSE_MAP)
|
||||
license_obj = create_license_from_map("unknown-license")
|
||||
|
||||
# Mock the OpenAlex method to return an abstract
|
||||
with patch.object(self.processor, "_get_crossref_abstract") as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor,
|
||||
"_get_openalex_abstract",
|
||||
return_value="OpenAlex reconstructed text",
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
result = self.processor.get_abstract(
|
||||
"10.1234/test", {}, license_obj
|
||||
)
|
||||
|
||||
# Should skip CrossRef and use OpenAlex
|
||||
mock_crossref.assert_not_called()
|
||||
mock_openalex.assert_called_once_with({})
|
||||
assert result.text == "OpenAlex reconstructed text"
|
||||
assert result.source == "openalex"
|
||||
|
||||
def test_crossref_fallback_to_openalex(self):
|
||||
"""Test fallback to OpenAlex when CrossRef returns no abstract"""
|
||||
# Create a license that allows derivatives using LICENSE_MAP
|
||||
license_obj = create_license_from_map("cc-by")
|
||||
|
||||
# Mock CrossRef to return None (no abstract found)
|
||||
with patch.object(
|
||||
self.processor, "_get_crossref_abstract", return_value=None
|
||||
) as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor,
|
||||
"_get_openalex_abstract",
|
||||
return_value="OpenAlex fallback text",
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
result = self.processor.get_abstract(
|
||||
"10.1234/test", {}, license_obj
|
||||
)
|
||||
|
||||
# Should try CrossRef first, then fall back to OpenAlex
|
||||
mock_crossref.assert_called_once_with("10.1234/test")
|
||||
mock_openalex.assert_called_once_with({})
|
||||
assert result.text == "OpenAlex fallback text"
|
||||
assert result.source == "openalex"
|
||||
|
||||
def test_no_abstract_found_anywhere(self):
|
||||
"""Test when no abstract is found in either source"""
|
||||
# Create a license that allows derivatives using LICENSE_MAP
|
||||
license_obj = create_license_from_map("cc-by")
|
||||
|
||||
# Mock both methods to return None
|
||||
with patch.object(
|
||||
self.processor, "_get_crossref_abstract", return_value=None
|
||||
) as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor, "_get_openalex_abstract", return_value=None
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
result = self.processor.get_abstract(
|
||||
"10.1234/test", {}, license_obj
|
||||
)
|
||||
|
||||
# Should try both sources
|
||||
mock_crossref.assert_called_once_with("10.1234/test")
|
||||
mock_openalex.assert_called_once_with({})
|
||||
assert result.text == ""
|
||||
assert result.source == "none"
|
||||
|
||||
@pytest.mark.parametrize("license_short", DERIVATIVE_ALLOWED_LICENSES)
|
||||
def test_all_derivative_allowed_licenses_use_crossref_first(self, license_short):
|
||||
"""Test that all licenses in DERIVATIVE_ALLOWED_LICENSES use CrossRef first"""
|
||||
# Create license using LICENSE_MAP data
|
||||
license_obj = create_license_from_map(license_short)
|
||||
|
||||
with patch.object(
|
||||
self.processor, "_get_crossref_abstract", return_value="CrossRef text"
|
||||
) as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor, "_get_openalex_abstract"
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
result = self.processor.get_abstract(
|
||||
"10.1234/test", {}, license_obj
|
||||
)
|
||||
|
||||
# Should use CrossRef for all derivative-allowed licenses
|
||||
mock_crossref.assert_called_once()
|
||||
mock_openalex.assert_not_called()
|
||||
assert result.source == "crossref"
|
||||
|
||||
def test_derivative_allowed_licenses_set_matches_usage(self):
|
||||
"""Test that DERIVATIVE_ALLOWED_LICENSES set is correctly used in logic"""
|
||||
# This is a meta-test to ensure the constant is used correctly
|
||||
|
||||
# Test a license that should allow derivatives using LICENSE_MAP
|
||||
allowed_license = create_license_from_map("cc-by")
|
||||
assert allowed_license.short in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
# Test a license that should not allow derivatives using LICENSE_MAP
|
||||
not_allowed_license = create_license_from_map("cc-by-nd")
|
||||
assert not_allowed_license.short not in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
# Test that the processor logic matches the set
|
||||
with patch.object(
|
||||
self.processor, "_get_crossref_abstract", return_value="CrossRef"
|
||||
) as mock_crossref:
|
||||
with patch.object(
|
||||
self.processor, "_get_openalex_abstract", return_value="OpenAlex"
|
||||
) as mock_openalex:
|
||||
with patch.object(self.processor.console, "print") as _mock_print:
|
||||
# Allowed license should use CrossRef
|
||||
result1 = self.processor.get_abstract(
|
||||
"10.1234/test", {}, allowed_license
|
||||
)
|
||||
assert mock_crossref.call_count == 1
|
||||
assert result1.source == "crossref"
|
||||
|
||||
# Reset mocks
|
||||
mock_crossref.reset_mock()
|
||||
mock_openalex.reset_mock()
|
||||
|
||||
# Not allowed license should skip CrossRef
|
||||
result2 = self.processor.get_abstract(
|
||||
"10.1234/test", {}, not_allowed_license
|
||||
)
|
||||
mock_crossref.assert_not_called()
|
||||
mock_openalex.assert_called_once()
|
||||
assert result2.source == "openalex"
|
|
@ -1,4 +1,4 @@
|
|||
from doi2dataset import License, LicenseProcessor
|
||||
from doi2dataset import DERIVATIVE_ALLOWED_LICENSES, License, LicenseProcessor
|
||||
|
||||
|
||||
def test_license_processor_cc_by():
|
||||
|
@ -50,3 +50,55 @@ def test_license_processor_no_primary_location():
|
|||
assert license_obj.short == "unknown"
|
||||
assert license_obj.name == ""
|
||||
assert license_obj.uri == ""
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_cc_by():
|
||||
"""Test that CC BY license allows derivatives"""
|
||||
assert "cc-by" in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_cc_by_sa():
|
||||
"""Test that CC BY-SA license allows derivatives"""
|
||||
assert "cc-by-sa" in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_cc_by_nc():
|
||||
"""Test that CC BY-NC license allows derivatives"""
|
||||
assert "cc-by-nc" in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_cc_by_nc_sa():
|
||||
"""Test that CC BY-NC-SA license allows derivatives"""
|
||||
assert "cc-by-nc-sa" in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_cc0():
|
||||
"""Test that CC0 license allows derivatives"""
|
||||
assert "cc0" in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_public_domain():
|
||||
"""Test that Public Domain license allows derivatives"""
|
||||
assert "pd" in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_not_allowed_licenses_cc_by_nd():
|
||||
"""Test that CC BY-ND license does not allow derivatives"""
|
||||
assert "cc-by-nd" not in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_not_allowed_licenses_cc_by_nc_nd():
|
||||
"""Test that CC BY-NC-ND license does not allow derivatives"""
|
||||
assert "cc-by-nc-nd" not in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_not_allowed_licenses_unknown():
|
||||
"""Test that unknown licenses do not allow derivatives"""
|
||||
assert "unknown-license" not in DERIVATIVE_ALLOWED_LICENSES
|
||||
assert "all-rights-reserved" not in DERIVATIVE_ALLOWED_LICENSES
|
||||
|
||||
|
||||
def test_derivative_allowed_licenses_set_completeness():
|
||||
"""Test that DERIVATIVE_ALLOWED_LICENSES contains expected licenses"""
|
||||
expected_licenses = {"cc-by", "cc-by-sa", "cc-by-nc", "cc-by-nc-sa", "cc0", "pd"}
|
||||
assert DERIVATIVE_ALLOWED_LICENSES == expected_licenses
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue