Initial commit and release of doi2dataset

This commit is contained in:
Alexander Minges 2025-03-21 14:53:23 +01:00
commit 9be53fd2fc
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4
23 changed files with 2482 additions and 0 deletions

50
tests/test_doi2dataset.py Normal file
View file

@ -0,0 +1,50 @@
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from doi2dataset import NameProcessor, Phase, sanitize_filename, validate_email_address
def test_phase_check_year():
"""Test that check_year correctly determines if a year is within the phase boundaries."""
phase = Phase("TestPhase", 2000, 2010)
# Within boundaries
assert phase.check_year(2005) is True
# Outside boundaries
assert phase.check_year(1999) is False
assert phase.check_year(2011) is False
# Boundary cases
assert phase.check_year(2000) is True
assert phase.check_year(2010) is True
def test_sanitize_filename():
"""Test the sanitize_filename function to convert DOI to a valid filename."""
doi = "10.1234/abc.def"
expected = "10_1234_abc_def"
result = sanitize_filename(doi)
assert result == expected
def test_split_name_with_comma():
"""Test splitting a full name that contains a comma."""
full_name = "Doe, John"
given, family = NameProcessor.split_name(full_name)
assert given == "John"
assert family == "Doe"
def test_split_name_without_comma():
"""Test splitting a full name that does not contain a comma."""
full_name = "John Doe"
given, family = NameProcessor.split_name(full_name)
assert given == "John"
assert family == "Doe"
def test_validate_email_address_valid():
"""Test that a valid email address is correctly recognized."""
valid_email = "john.doe@iana.org"
assert validate_email_address(valid_email) is True
def test_validate_email_address_invalid():
"""Test that an invalid email address is correctly rejected."""
invalid_email = "john.doe@invalid_domain"
assert validate_email_address(invalid_email) is False

View file

@ -0,0 +1,57 @@
import json
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import pytest
from doi2dataset import MetadataProcessor
class FakeResponse:
"""
A fake response object to simulate an API response.
"""
def __init__(self, json_data, status_code=200):
self._json = json_data
self.status_code = status_code
def json(self):
return self._json
def raise_for_status(self):
pass
@pytest.fixture
def fake_openalex_response():
"""
Load the saved JSON response from the file 'srep45389.json'
located in the same directory as this test file.
"""
json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
with open(json_path, "r", encoding="utf-8") as f:
data = json.load(f)
return data
def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
"""
Test fetching DOI metadata by simulating the API call with a locally saved JSON response.
The APIClient.make_request method is patched to return a fake response based on
the contents of 'srep45389.json', so that no actual network request is performed.
"""
doi = "10.1038/srep45389"
fake_response = FakeResponse(fake_openalex_response, 200)
# Patch the make_request method of APIClient to return our fake_response.
mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response)
# Instantiate MetadataProcessor without upload and progress.
processor = MetadataProcessor(doi=doi, upload=False)
# Call _fetch_data(), which should now return our fake JSON data.
data = processor._fetch_data()
# Verify that the fetched data matches the fake JSON data.
assert data == fake_openalex_response