Compare commits
5 commits
Author | SHA1 | Date | |
---|---|---|---|
6f5f9a0bf8 | |||
e7ccf3b3c5 | |||
b4188dbe05 | |||
deca1e1d14 | |||
67b46d5140 |
16 changed files with 278 additions and 275 deletions
15
.coveragerc
15
.coveragerc
|
@ -2,7 +2,11 @@
|
|||
source = doi2dataset
|
||||
omit =
|
||||
*/tests/*
|
||||
*/test_*
|
||||
*/docs/*
|
||||
*/__pycache__/*
|
||||
*/venv/*
|
||||
*/.venv/*
|
||||
setup.py
|
||||
conf.py
|
||||
__init__.py
|
||||
|
@ -11,13 +15,22 @@ omit =
|
|||
exclude_lines =
|
||||
pragma: no cover
|
||||
def __repr__
|
||||
def __str__
|
||||
if self.debug:
|
||||
raise NotImplementedError
|
||||
raise AssertionError
|
||||
if __name__ == .__main__.:
|
||||
if TYPE_CHECKING:
|
||||
@abstractmethod
|
||||
pass
|
||||
raise ImportError
|
||||
except ImportError
|
||||
def __str__
|
||||
|
||||
show_missing = true
|
||||
precision = 2
|
||||
|
||||
[html]
|
||||
directory = htmlcov
|
||||
|
||||
[xml]
|
||||
output = coverage.xml
|
||||
|
|
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
|||
# Config file
|
||||
config.yaml
|
||||
.config.yaml
|
||||
|
||||
# Processed DOIs
|
||||
*.json
|
||||
|
@ -58,6 +59,7 @@ htmlcov/
|
|||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
junit.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
|
|
36
.gitlab-ci.yml
Normal file
36
.gitlab-ci.yml
Normal file
|
@ -0,0 +1,36 @@
|
|||
# GitLab CI/CD pipeline for doi2dataset
|
||||
# Compatible with GitLab v18.1.1
|
||||
|
||||
stages:
|
||||
- test
|
||||
|
||||
variables:
|
||||
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
|
||||
|
||||
cache:
|
||||
paths:
|
||||
- .cache/pip/
|
||||
- .venv/
|
||||
|
||||
test:
|
||||
stage: test
|
||||
image: python:3
|
||||
before_script:
|
||||
- python -m pip install --upgrade pip
|
||||
- pip install -r requirements.txt
|
||||
- pip install -r requirements-dev.txt
|
||||
script:
|
||||
- pytest
|
||||
artifacts:
|
||||
reports:
|
||||
junit: junit.xml
|
||||
coverage_report:
|
||||
coverage_format: cobertura
|
||||
path: coverage.xml
|
||||
paths:
|
||||
- htmlcov/
|
||||
expire_in: 1 week
|
||||
coverage: '/(?i)total.*? (100(?:\.0+)?\%|[1-9]?\d(?:\.\d+)?\%)$/'
|
||||
only:
|
||||
- branches
|
||||
- merge_requests
|
82
README.md
82
README.md
|
@ -8,7 +8,14 @@
|
|||
|
||||
- **DOI Validation and Normalization:** Validates DOIs and converts them into a standardized format.
|
||||
- **Metadata Retrieval:** Fetches metadata such as title, abstract, license, and author information from external sources.
|
||||
- **Metadata Mapping:** Automatically maps and generates metadata fields (e.g., title, description, keywords) including support for controlled vocabularies and compound fields.
|
||||
- **Standard Dataverse Metadata:** Generates standard Dataverse citation metadata including:
|
||||
- Title, publication date, and alternative URL
|
||||
- Author information with affiliations and ORCID identifiers
|
||||
- Dataset contact information (corresponding authors)
|
||||
- Abstract and description
|
||||
- Keywords and subject classification
|
||||
- Grant/funding information
|
||||
- License information when available
|
||||
- **Optional Upload:** Allows uploading of metadata directly to a Dataverse.org server.
|
||||
- **Progress Tracking:** Uses the Rich library for user-friendly progress tracking and error handling.
|
||||
|
||||
|
@ -23,14 +30,41 @@ cd doi2dataset
|
|||
|
||||
## Configuration
|
||||
|
||||
Configuration
|
||||
|
||||
Before running the tool, configure the necessary settings in the `config.yaml` file located in the project root. This file contains configuration details such as:
|
||||
|
||||
- Connection details (URL, API token, authentication credentials)
|
||||
- Mapping of project phases
|
||||
- Principal Investigator (PI) information
|
||||
- Default grant configurations
|
||||
- **Connection details**: URL, API token, authentication credentials for Dataverse server
|
||||
- **Principal Investigator (PI) information**: Optional - used for fallback determination of corresponding authors when not explicitly specified in the publication
|
||||
- **Default grant configurations**: Funding information to be included in the metadata (supports multiple grants)
|
||||
|
||||
### Configuration File Structure
|
||||
|
||||
The configuration file should follow this structure:
|
||||
|
||||
```yaml
|
||||
# Dataverse server connection details
|
||||
dataverse:
|
||||
url: "https://your-dataverse-instance.org"
|
||||
api_token: "your-api-token"
|
||||
|
||||
# Default grant information (supports multiple grants)
|
||||
default_grants:
|
||||
- funder: "Your Funding Agency"
|
||||
id: "GRANT123456"
|
||||
- funder: "Another Funding Agency"
|
||||
id: "GRANT789012"
|
||||
|
||||
# Principal investigators for fallback corresponding author determination (optional)
|
||||
pis:
|
||||
- family_name: "Doe"
|
||||
given_name: "John"
|
||||
orcid: "0000-0000-0000-0000"
|
||||
email: "john.doe@university.edu"
|
||||
affiliation: "Department of Science, University"
|
||||
```
|
||||
|
||||
See `config_example.yaml` for a complete example configuration.
|
||||
|
||||
**Note**: The PI section is optional. If no corresponding authors are found in the publication metadata and no PIs are configured, the tool will still generate metadata but may issue a warning about missing corresponding author information.
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -69,8 +103,6 @@ Documentation is generated using Sphinx. See the `docs/` directory for detailed
|
|||
|
||||
## Testing
|
||||
|
||||
## Testing
|
||||
|
||||
Tests are implemented with pytest. The test suite provides comprehensive coverage of core functionalities. To run the tests, execute:
|
||||
|
||||
```bash
|
||||
|
@ -102,11 +134,13 @@ pytest --cov=. --cov-report=html
|
|||
This creates a `htmlcov` directory. Open `htmlcov/index.html` in a browser to view the detailed coverage report.
|
||||
|
||||
A `.coveragerc` configuration file is provided that:
|
||||
|
||||
- Excludes test files, documentation, and boilerplate code from coverage analysis
|
||||
- Configures reporting to ignore common non-testable lines (like defensive imports)
|
||||
- Sets the output directory for HTML reports
|
||||
|
||||
Recent improvements have increased coverage from 48% to 61% by adding focused tests for:
|
||||
|
||||
- Citation building functionality
|
||||
- License processing and validation
|
||||
- Metadata field extraction
|
||||
|
@ -114,6 +148,7 @@ Recent improvements have increased coverage from 48% to 61% by adding focused te
|
|||
- Publication data parsing and validation
|
||||
|
||||
Areas that could benefit from additional testing:
|
||||
|
||||
- More edge cases in the MetadataProcessor class workflow
|
||||
- Additional CitationBuilder scenarios with diverse inputs
|
||||
- Complex network interactions and error handling
|
||||
|
@ -122,7 +157,7 @@ Areas that could benefit from additional testing:
|
|||
|
||||
The test suite is organized into six main files:
|
||||
|
||||
1. **test_doi2dataset.py**: Basic tests for core functions like phase checking, name splitting and DOI validation.
|
||||
1. **test_doi2dataset.py**: Basic tests for core functions like name splitting, DOI validation, and filename sanitization.
|
||||
2. **test_fetch_doi_mock.py**: Tests API interactions using a mock OpenAlex response stored in `srep45389.json`.
|
||||
3. **test_citation_builder.py**: Tests for building citation metadata from API responses.
|
||||
4. **test_metadata_processor.py**: Tests for the metadata processing workflow.
|
||||
|
@ -136,7 +171,6 @@ The test suite covers the following categories of functionality:
|
|||
#### Core Functionality Tests
|
||||
|
||||
- **DOI Validation and Processing**: Parameterized tests for DOI normalization, validation, and filename sanitization with various inputs.
|
||||
- **Phase Management**: Tests for checking publication year against defined project phases, including boundary cases.
|
||||
- **Name Processing**: Extensive tests for parsing and splitting author names in different formats (with/without commas, middle initials, etc.).
|
||||
- **Email Validation**: Tests for proper validation of email addresses with various domain configurations.
|
||||
|
||||
|
@ -151,12 +185,36 @@ The test suite covers the following categories of functionality:
|
|||
|
||||
- **Citation Building**: Tests for properly building citation metadata from API responses.
|
||||
- **License Processing**: Tests for correctly identifying and formatting license information from various license IDs.
|
||||
- **Principal Investigator Matching**: Tests for finding project PIs based on ORCID identifiers.
|
||||
- **Principal Investigator Matching**: Tests for finding project PIs based on ORCID identifiers (used for fallback corresponding author determination).
|
||||
- **Configuration Loading**: Tests for properly loading and validating configuration from files.
|
||||
- **Metadata Workflow**: Tests for the complete metadata processing workflow.
|
||||
|
||||
These tests ensure that all components work correctly in isolation and together as a system, with special attention to edge cases and error handling.
|
||||
|
||||
## Changelog
|
||||
|
||||
### Version 2.0 - Generalization Update
|
||||
|
||||
This version has been updated to make the tool more generalized and suitable for broader use cases:
|
||||
|
||||
**Breaking Changes:**
|
||||
|
||||
- Removed organizational-specific metadata blocks (project phases, organizational fields)
|
||||
- Removed `Phase` class and phase-related configuration
|
||||
- Simplified configuration structure
|
||||
|
||||
**What's New:**
|
||||
|
||||
- Streamlined metadata generation focusing on standard Dataverse citation metadata
|
||||
- Reduced configuration requirements for easier adoption
|
||||
- Maintained PI information support for corresponding author fallback functionality
|
||||
|
||||
**Migration Guide:**
|
||||
|
||||
- Remove the `phase` section from your configuration file
|
||||
- The tool will now generate only standard citation metadata blocks
|
||||
- PI information is still supported and used for fallback corresponding author determination
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
|
||||
|
|
|
@ -8,9 +8,8 @@ from .doi2dataset import (
|
|||
LicenseProcessor,
|
||||
MetadataProcessor,
|
||||
NameProcessor,
|
||||
PIFinder,
|
||||
Person,
|
||||
Phase,
|
||||
PIFinder,
|
||||
SubjectMapper,
|
||||
sanitize_filename,
|
||||
validate_email_address,
|
||||
|
|
|
@ -1,23 +1,25 @@
|
|||
default_grant:
|
||||
dataverse:
|
||||
url: "https://your-dataverse-instance.org"
|
||||
api_token: "your-api-token-here"
|
||||
dataverse: "your-dataverse-alias"
|
||||
auth_user: "your-username"
|
||||
auth_password: "your-password"
|
||||
|
||||
default_grants:
|
||||
- funder: "Awesome Funding Agency"
|
||||
id: "ABC12345"
|
||||
|
||||
phase:
|
||||
"Phase 1 (2021/2025)":
|
||||
start: 2021
|
||||
end: 2025
|
||||
- funder: "Another Funding Agency"
|
||||
id: "DEF67890"
|
||||
|
||||
pis:
|
||||
- family_name: "Doe"
|
||||
given_name: "Jon"
|
||||
orcid: "0000-0000-0000-0000"
|
||||
email: "jon.doe@some-university.edu"
|
||||
email: "jon.doe@iana.org"
|
||||
affiliation: "Institute of Science, Some University"
|
||||
project: ["Project A01"]
|
||||
|
||||
- family_name: "Doe"
|
||||
given_name: "Jane"
|
||||
orcid: "0000-0000-0000-0001"
|
||||
email: "jane.doe@some-university.edu"
|
||||
email: "jane.doe@iana.org"
|
||||
affiliation: "Institute of Science, Some University"
|
||||
project: ["Project A02"]
|
||||
|
|
121
doi2dataset.py
121
doi2dataset.py
|
@ -109,36 +109,6 @@ class FieldType(Enum):
|
|||
COMPOUND = "compound"
|
||||
VOCABULARY = "controlledVocabulary"
|
||||
|
||||
@dataclass
|
||||
class Phase:
|
||||
"""
|
||||
Represents a project phase with a defined time span.
|
||||
|
||||
Attributes:
|
||||
name (str): The name of the project phase.
|
||||
start (int): The start year of the project phase.
|
||||
end (int): The end year of the project phase.
|
||||
"""
|
||||
|
||||
name: str
|
||||
start: int
|
||||
end: int
|
||||
|
||||
def check_year(self, year: int) -> bool:
|
||||
"""
|
||||
Checks whether a given year falls within the project's phase boundaries.
|
||||
|
||||
Args:
|
||||
year (int): The year to check.
|
||||
|
||||
Returns:
|
||||
bool: True if the year is within the phase boundaries, otherwise False.
|
||||
"""
|
||||
|
||||
if self.start <= year <= self.end:
|
||||
return True
|
||||
return False
|
||||
|
||||
@dataclass
|
||||
class BaseMetadataField[T]:
|
||||
"""
|
||||
|
@ -301,7 +271,7 @@ class Institution:
|
|||
"termName": self.display_name,
|
||||
"@type": "https://schema.org/Organization"
|
||||
}
|
||||
return PrimitiveMetadataField("authorAffiliation", False, self.ror, expanded_value)
|
||||
return PrimitiveMetadataField("authorAffiliation", False, self.ror, expanded_value=expanded_value)
|
||||
else:
|
||||
return PrimitiveMetadataField("authorAffiliation", False, self.display_name)
|
||||
|
||||
|
@ -316,14 +286,12 @@ class Person:
|
|||
orcid (str): ORCID identifier (optional).
|
||||
email (str): Email address (optional).
|
||||
affiliation (Institution): Affiliation of the person (optional).
|
||||
project (list[str]): List of associated projects.
|
||||
"""
|
||||
family_name: str
|
||||
given_name: str
|
||||
orcid: str = ""
|
||||
email: str = ""
|
||||
affiliation: Institution | str = ""
|
||||
project: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict[str, str | list[str] | dict[str, str]]:
|
||||
"""
|
||||
|
@ -340,8 +308,7 @@ class Person:
|
|||
"family_name": self.family_name,
|
||||
"given_name": self.given_name,
|
||||
"orcid": self.orcid,
|
||||
"email": self.email,
|
||||
"project": self.project
|
||||
"email": self.email
|
||||
}
|
||||
|
||||
if isinstance(self.affiliation, Institution):
|
||||
|
@ -464,12 +431,10 @@ class ConfigData:
|
|||
|
||||
Attributes:
|
||||
dataverse (dict[str, str]): Dataverse-related configuration.
|
||||
phase (dict[str, dict[str, int]]): Mapping of project phases.
|
||||
pis (list[dict[str, Any]]): List of principal investigator configurations.
|
||||
default_grants (list[dict[str, str]]): Default grant configurations.
|
||||
"""
|
||||
dataverse: dict[str, str]
|
||||
phase: dict[str, dict[str, int]]
|
||||
pis: list[dict[str, Any]]
|
||||
default_grants: list[dict[str, str]]
|
||||
|
||||
|
@ -523,7 +488,6 @@ class Config:
|
|||
|
||||
cls._config_data = ConfigData(
|
||||
dataverse=config_data.get('dataverse', {}),
|
||||
phase=config_data.get('phase', {}),
|
||||
pis=config_data.get('pis', []),
|
||||
default_grants=config_data.get('default_grants', [])
|
||||
)
|
||||
|
@ -545,16 +509,6 @@ class Config:
|
|||
raise RuntimeError("Failed to load configuration")
|
||||
return cls._config_data
|
||||
|
||||
@property
|
||||
def PHASE(self) -> dict[str, dict[str, int]]:
|
||||
"""
|
||||
Get phase configuration.
|
||||
|
||||
Returns:
|
||||
dict[str, dict[str, int]]: Mapping of phases.
|
||||
"""
|
||||
return self.get_config().phase
|
||||
|
||||
@property
|
||||
def PIS(self) -> list[dict[str, Any]]:
|
||||
"""
|
||||
|
@ -833,7 +787,10 @@ class AbstractProcessor:
|
|||
else:
|
||||
console.print(f"\n{ICONS['warning']} No abstract found in CrossRef!", style="warning")
|
||||
else:
|
||||
if license.name:
|
||||
console.print(f"\n{ICONS['info']} License {license.name} does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
|
||||
else:
|
||||
console.print(f"\n{ICONS['info']} Custom license does not allow derivative works. Reconstructing abstract from OpenAlex!", style="info")
|
||||
|
||||
|
||||
openalex_abstract = self._get_openalex_abstract(data)
|
||||
|
@ -1406,8 +1363,7 @@ class MetadataProcessor:
|
|||
CompoundMetadataField("grantNumber", True, grants).to_dict()
|
||||
],
|
||||
"displayName": "Citation Metadata"
|
||||
},
|
||||
"crc1430_org_v1": self._build_organization_metadata(data)
|
||||
}
|
||||
},
|
||||
"files": []
|
||||
}
|
||||
|
@ -1473,71 +1429,22 @@ class MetadataProcessor:
|
|||
"""
|
||||
return data.get("publication_year", "")
|
||||
|
||||
def _build_organization_metadata(self, data: dict[str, Any]) -> dict[str, Any]:
|
||||
"""
|
||||
Build organization metadata fields (phase, project, PI names).
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): The metadata.
|
||||
|
||||
Returns:
|
||||
dict[str, Any]: Organization metadata.
|
||||
"""
|
||||
publication_year = self._get_publication_year(data)
|
||||
if publication_year:
|
||||
phases = self._get_phases(int(publication_year))
|
||||
else:
|
||||
phases = []
|
||||
|
||||
pis = self._get_involved_pis(data)
|
||||
projects: list[str] = []
|
||||
for pi in pis:
|
||||
for project in pi.project:
|
||||
projects.append(project)
|
||||
|
||||
pi_names: list[str] = []
|
||||
for pi in pis:
|
||||
pi_names.append(pi.format_name())
|
||||
|
||||
# Deduplicate projects and PI names
|
||||
unique_projects = list(set(projects))
|
||||
unique_pi_names = list(set(pi_names))
|
||||
|
||||
return {
|
||||
"fields": [
|
||||
ControlledVocabularyMetadataField("crc1430OrgV1Phase", True, phases).to_dict(),
|
||||
ControlledVocabularyMetadataField("crc1430OrgV1Project", True, unique_projects).to_dict(),
|
||||
ControlledVocabularyMetadataField("crc1430OrgV1PI", True, unique_pi_names).to_dict()
|
||||
]
|
||||
}
|
||||
|
||||
def _get_phases(self, year: int) -> list[str]:
|
||||
"""
|
||||
Determine the project phases matching a given publication year.
|
||||
|
||||
Args:
|
||||
year (int): The publication year.
|
||||
|
||||
Returns:
|
||||
list[str]: List of matching phase names.
|
||||
"""
|
||||
config = Config()
|
||||
matching_phases: list[str] = []
|
||||
for phase_name, phase_info in config.PHASE.items():
|
||||
phase = Phase(phase_name, phase_info["start"], phase_info["end"])
|
||||
if phase.check_year(year):
|
||||
matching_phases.append(phase.name)
|
||||
return matching_phases
|
||||
|
||||
def _get_involved_pis(self, data: dict[str, Any]) -> list[Person]:
|
||||
"""
|
||||
Identify involved principal investigators from the metadata.
|
||||
Identify involved principal investigators from the metadata for use as fallback
|
||||
corresponding authors.
|
||||
|
||||
This method matches authors in the publication metadata against the configured
|
||||
PIs and returns matching PIs. It is used as a fallback when no corresponding
|
||||
authors are explicitly declared in the publication metadata.
|
||||
|
||||
Args:
|
||||
data (dict[str, Any]): The metadata.
|
||||
data (dict[str, Any]): The metadata from OpenAlex.
|
||||
|
||||
Returns:
|
||||
list[Person]: List of PIs.
|
||||
list[Person]: List of matching PIs for use as corresponding authors.
|
||||
"""
|
||||
involved_pis: list[Person] = []
|
||||
for authorship in data.get("authorships", []):
|
||||
|
|
15
pytest.ini
Normal file
15
pytest.ini
Normal file
|
@ -0,0 +1,15 @@
|
|||
[pytest]
|
||||
addopts =
|
||||
--cov=doi2dataset
|
||||
--cov-report=html
|
||||
--cov-report=xml
|
||||
--cov-report=term-missing
|
||||
--junitxml=junit.xml
|
||||
--verbose
|
||||
--tb=short
|
||||
|
||||
testpaths = tests
|
||||
|
||||
python_files = test_*.py *_test.py
|
||||
python_functions = test_*
|
||||
python_classes = Test*
|
|
@ -1,3 +1,4 @@
|
|||
pytest>=8.3.5,<9.0
|
||||
pytest-mock>=3.14.0,<4.0
|
||||
pytest-cov>=6.0.0,<7.0
|
||||
ruff>=0.11.1,<0.20
|
||||
|
|
3
setup.py
3
setup.py
|
@ -25,7 +25,8 @@ setup(
|
|||
],
|
||||
"dev": [
|
||||
"pytest>=8.3.5,<9.0",
|
||||
"pytest-mock>=3.14.0,4.0",
|
||||
"pytest-mock>=3.14.0,<4.0",
|
||||
"pytest-cov>=6.0.0,<7.0",
|
||||
"ruff>=0.11.1,<0.20"
|
||||
]
|
||||
},
|
||||
|
|
|
@ -1,23 +1,16 @@
|
|||
default_grant:
|
||||
default_grants:
|
||||
- funder: "Awesome Funding Agency"
|
||||
id: "ABC12345"
|
||||
|
||||
phase:
|
||||
"Phase 1 (2021/2025)":
|
||||
start: 2021
|
||||
end: 2025
|
||||
|
||||
pis:
|
||||
- family_name: "Doe"
|
||||
given_name: "Jon"
|
||||
orcid: "0000-0000-0000-0000"
|
||||
email: "jon.doe@iana.org"
|
||||
affiliation: "Institute of Science, Some University"
|
||||
project: ["Project A01"]
|
||||
|
||||
- family_name: "Doe"
|
||||
given_name: "Jane"
|
||||
orcid: "0000-0000-0000-0001"
|
||||
email: "jane.doe@iana.org"
|
||||
affiliation: "Institute of Science, Some University"
|
||||
project: ["Project A02"]
|
||||
|
|
|
@ -1,13 +1,9 @@
|
|||
import json
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from doi2dataset import (
|
||||
CitationBuilder,
|
||||
PIFinder,
|
||||
Person
|
||||
)
|
||||
import pytest
|
||||
|
||||
from doi2dataset import CitationBuilder, Person, PIFinder
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -27,8 +23,7 @@ def test_pi():
|
|||
given_name="Author",
|
||||
orcid="0000-0000-0000-1234",
|
||||
email="test.author@example.org",
|
||||
affiliation="Test University",
|
||||
project=["Test Project"]
|
||||
affiliation="Test University"
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -3,21 +3,9 @@ import sys
|
|||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
from doi2dataset import NameProcessor, Phase, sanitize_filename, validate_email_address
|
||||
from doi2dataset import NameProcessor, sanitize_filename, validate_email_address
|
||||
|
||||
|
||||
def test_phase_check_year():
|
||||
"""Test that check_year correctly determines if a year is within the phase boundaries."""
|
||||
phase = Phase("TestPhase", 2000, 2010)
|
||||
# Within boundaries
|
||||
assert phase.check_year(2005) is True
|
||||
# Outside boundaries
|
||||
assert phase.check_year(1999) is False
|
||||
assert phase.check_year(2011) is False
|
||||
# Boundary cases
|
||||
assert phase.check_year(2000) is True
|
||||
assert phase.check_year(2010) is True
|
||||
|
||||
def test_sanitize_filename():
|
||||
"""Test the sanitize_filename function to convert DOI to a valid filename."""
|
||||
doi = "10.1234/abc.def"
|
||||
|
|
|
@ -8,12 +8,11 @@ from doi2dataset import (
|
|||
APIClient,
|
||||
CitationBuilder,
|
||||
Config,
|
||||
License,
|
||||
LicenseProcessor,
|
||||
MetadataProcessor,
|
||||
Person,
|
||||
PIFinder,
|
||||
SubjectMapper
|
||||
SubjectMapper,
|
||||
)
|
||||
|
||||
|
||||
|
@ -158,8 +157,7 @@ def test_pi_finder_find_by_orcid():
|
|||
given_name="Jon",
|
||||
orcid="0000-0000-0000-0000",
|
||||
email="jon.doe@iana.org",
|
||||
affiliation="Institute of Science, Some University",
|
||||
project=["Project A01"]
|
||||
affiliation="Institute of Science, Some University"
|
||||
)
|
||||
|
||||
# Create PIFinder with our test PI
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import json
|
||||
import os
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from doi2dataset import MetadataProcessor
|
||||
|
||||
|
@ -40,7 +41,6 @@ def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypa
|
|||
# Mock methods that might cause issues in isolation
|
||||
metadata_processor._build_description = MagicMock(return_value="Test description")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
metadata_processor._build_organization_metadata = MagicMock(return_value={})
|
||||
|
||||
# Call the method we're testing
|
||||
metadata = metadata_processor._build_metadata(openalex_data)
|
||||
|
@ -81,7 +81,6 @@ def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch):
|
|||
# Mock methods that might cause issues in isolation
|
||||
metadata_processor._build_description = MagicMock(return_value="Test description")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
metadata_processor._build_organization_metadata = MagicMock(return_value={})
|
||||
|
||||
# Call the method we're testing
|
||||
metadata = metadata_processor._build_metadata(openalex_data)
|
||||
|
@ -130,7 +129,6 @@ def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, m
|
|||
# Mock methods that might cause issues in isolation
|
||||
metadata_processor._build_description = MagicMock(return_value="Test description")
|
||||
metadata_processor._get_involved_pis = MagicMock(return_value=[])
|
||||
metadata_processor._build_organization_metadata = MagicMock(return_value={})
|
||||
|
||||
# Call the method we're testing
|
||||
metadata = metadata_processor._build_metadata(openalex_data)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import pytest
|
||||
from doi2dataset import Person, Institution
|
||||
from doi2dataset import Institution, Person
|
||||
|
||||
|
||||
def test_person_to_dict_with_string_affiliation():
|
||||
"""Test Person.to_dict() with a string affiliation."""
|
||||
|
@ -8,8 +8,7 @@ def test_person_to_dict_with_string_affiliation():
|
|||
given_name="John",
|
||||
orcid="0000-0001-2345-6789",
|
||||
email="john.doe@example.org",
|
||||
affiliation="Test University",
|
||||
project=["Project A"]
|
||||
affiliation="Test University"
|
||||
)
|
||||
|
||||
result = person.to_dict()
|
||||
|
@ -18,7 +17,6 @@ def test_person_to_dict_with_string_affiliation():
|
|||
assert result["given_name"] == "John"
|
||||
assert result["orcid"] == "0000-0001-2345-6789"
|
||||
assert result["email"] == "john.doe@example.org"
|
||||
assert result["project"] == ["Project A"]
|
||||
assert result["affiliation"] == "Test University"
|
||||
|
||||
|
||||
|
@ -31,8 +29,7 @@ def test_person_to_dict_with_institution_ror():
|
|||
given_name="John",
|
||||
orcid="0000-0001-2345-6789",
|
||||
email="john.doe@example.org",
|
||||
affiliation=inst,
|
||||
project=["Project A"]
|
||||
affiliation=inst
|
||||
)
|
||||
|
||||
result = person.to_dict()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue