Initial commit and release of doi2dataset
This commit is contained in:
commit
9be53fd2fc
23 changed files with 2482 additions and 0 deletions
186
.gitignore
vendored
Normal file
186
.gitignore
vendored
Normal file
|
@ -0,0 +1,186 @@
|
|||
# Config file
|
||||
config.yaml
|
||||
|
||||
# Processed DOIs
|
||||
*.json
|
||||
|
||||
# Typing stubs
|
||||
typing/
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
#uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Typing stubs
|
||||
typings/
|
27
LICENSE.md
Normal file
27
LICENSE.md
Normal file
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
|
||||
**LICENSE.md (MIT License)**
|
||||
|
||||
```markdown
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Alexander Minges
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
```
|
79
README.md
Normal file
79
README.md
Normal file
|
@ -0,0 +1,79 @@
|
|||
# doi2dataset
|
||||
|
||||
**doi2dataset** is a Python tool designed to process DOIs and generate metadata for Dataverse datasets. It retrieves metadata from external APIs (such as OpenAlex and CrossRef), maps metadata fields, and can optionally upload the generated metadata to a Dataverse instance.
|
||||
|
||||
## Features
|
||||
|
||||
- **DOI Validation and Normalization:** Validates DOIs and converts them into a standardized format.
|
||||
- **Metadata Retrieval:** Fetches metadata such as title, abstract, license, and author information from external sources.
|
||||
- **Metadata Mapping:** Automatically maps and generates metadata fields (e.g., title, description, keywords) including support for controlled vocabularies and compound fields.
|
||||
- **Optional Upload:** Allows uploading of metadata directly to a Dataverse server.
|
||||
- **Progress Tracking:** Uses the Rich library for user-friendly progress tracking and error handling.
|
||||
|
||||
## Installation
|
||||
|
||||
Clone the repository from GitHub:
|
||||
|
||||
```bash
|
||||
git clone https://git.athemis.de/Athemis/doi2dataset
|
||||
cd doi2dataset
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Configuration
|
||||
|
||||
Before running the tool, configure the necessary settings in the `config.yaml` file located in the project root. This file contains configuration details such as:
|
||||
|
||||
- Dataverse connection details (URL, API token, authentication credentials)
|
||||
- Mapping of project phases
|
||||
- Principal Investigator (PI) information
|
||||
- Default grant configurations
|
||||
|
||||
## Usage
|
||||
|
||||
Run doi2dataset from the command line by providing one or more DOIs:
|
||||
|
||||
```bash
|
||||
python doi2dataset.py [options] DOI1 DOI2 ...
|
||||
```
|
||||
|
||||
### Command Line Options
|
||||
|
||||
- `-f, --file`
|
||||
Specify a file containing DOIs (one per line).
|
||||
|
||||
- `-o, --output-dir`
|
||||
Directory where metadata files will be saved.
|
||||
|
||||
- `-d, --depositor`
|
||||
Name of the depositor.
|
||||
|
||||
- `-s, --subject`
|
||||
Default subject for the metadata.
|
||||
|
||||
- `-m, --contact-mail`
|
||||
Contact email address.
|
||||
|
||||
- `-u, --upload`
|
||||
Upload metadata to a Dataverse server.
|
||||
|
||||
## Documentation
|
||||
|
||||
Documentation is generated using Sphinx. See the `docs/` directory for detailed API references and usage examples.
|
||||
|
||||
## Testing
|
||||
|
||||
Tests are implemented with pytest. To run the tests, execute:
|
||||
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License. See the [LICENSE.md] file for details.
|
0
__init__.py
Normal file
0
__init__.py
Normal file
23
config_example.yaml
Normal file
23
config_example.yaml
Normal file
|
@ -0,0 +1,23 @@
|
|||
default_grant:
|
||||
- funder: "Awesome Funding Agency"
|
||||
id: "ABC12345"
|
||||
|
||||
phase:
|
||||
"Phase 1 (2021/2025)":
|
||||
start: 2021
|
||||
end: 2025
|
||||
|
||||
pis:
|
||||
- family_name: "Doe"
|
||||
given_name: "Jon"
|
||||
orcid: "0000-0000-0000-0000"
|
||||
email: "jon.doe@some-university.edu"
|
||||
affiliation: "Institute of Science, Some University"
|
||||
project: ["Project A01"]
|
||||
|
||||
- family_name: "Doe"
|
||||
given_name: "Jane"
|
||||
orcid: "0000-0000-0000-0001"
|
||||
email: "jane.doe@some-university.edu"
|
||||
affiliation: "Institute of Science, Some University"
|
||||
project: ["Project A02"]
|
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
|
@ -0,0 +1,20 @@
|
|||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
|
@ -0,0 +1,35 @@
|
|||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
31
docs/source/conf.py
Normal file
31
docs/source/conf.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath('../..'))
|
||||
|
||||
project = 'doi2dataset'
|
||||
copyright = '2025, Alexander Minges'
|
||||
author = 'Alexander Minges'
|
||||
release = '1.0'
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
|
||||
|
||||
templates_path = ['_templates']
|
||||
exclude_patterns = []
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ['_static']
|
7
docs/source/doi2dataset.rst
Normal file
7
docs/source/doi2dataset.rst
Normal file
|
@ -0,0 +1,7 @@
|
|||
doi2dataset module
|
||||
==================
|
||||
|
||||
.. automodule:: doi2dataset
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
14
docs/source/faq.rst
Normal file
14
docs/source/faq.rst
Normal file
|
@ -0,0 +1,14 @@
|
|||
Frequently Asked Questions (FAQ)
|
||||
================================
|
||||
|
||||
Q: What is **doi2dataset**?
|
||||
A: **doi2dataset** is a tool to process DOIs and generate metadata for Dataverse datasets by fetching data from external APIs like OpenAlex and CrossRef.
|
||||
|
||||
Q: How do I install **doi2dataset**?
|
||||
A: You can clone the repository from GitHub or install it via pip. Please refer to the Installation section for details.
|
||||
|
||||
Q: Can I upload metadata directly to a Dataverse server?
|
||||
A: Yes, the tool provides an option to upload metadata via the command line using the ``-u`` flag. Ensure that your configuration in `config.yaml` is correct.
|
||||
|
||||
Q: Where can I find the API documentation?
|
||||
A: The API reference is generated automatically in the Modules section of this documentation.
|
34
docs/source/index.rst
Normal file
34
docs/source/index.rst
Normal file
|
@ -0,0 +1,34 @@
|
|||
.. doi2dataset documentation master file, created by
|
||||
sphinx-quickstart on Fri Mar 21 13:03:59 2025.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
doi2dataset documentation
|
||||
=========================
|
||||
|
||||
Overview
|
||||
--------
|
||||
**doi2dataset** is a Python tool designed to process DOIs and generate metadata for Dataverse datasets.
|
||||
It retrieves data from external APIs such as OpenAlex and CrossRef and converts it into a format that meets Dataverse requirements.
|
||||
|
||||
Key Features:
|
||||
|
||||
- **Validation** and normalization of DOIs
|
||||
- Retrieval and processing of **metadata** (e.g., abstract, license, author information)
|
||||
- Automatic mapping and generation of metadata fields (e.g., title, description, keywords)
|
||||
- Support for controlled vocabularies and complex (compound) metadata fields
|
||||
- Optional **uploading** of metadata to a Dataverse server
|
||||
- **Progress tracking** and error handling using the Rich library
|
||||
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
:titlesonly:
|
||||
|
||||
introduction
|
||||
installation
|
||||
usage
|
||||
modules
|
||||
faq
|
28
docs/source/installation.rst
Normal file
28
docs/source/installation.rst
Normal file
|
@ -0,0 +1,28 @@
|
|||
Installation
|
||||
============
|
||||
|
||||
There are several ways to install **doi2dataset**:
|
||||
|
||||
Using Git
|
||||
---------
|
||||
Clone the repository from GitHub by running the following commands in your terminal:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
git clone https://github.com/your_username/doi2dataset.git
|
||||
cd doi2dataset
|
||||
|
||||
Using pip (if available)
|
||||
-------------------------
|
||||
You can also install **doi2dataset** via pip:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install doi2dataset
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
After installation, ensure that the tool is configured correctly.
|
||||
Check the `config.yaml` file in the project root for necessary settings such as Dataverse connection details and PI information.
|
||||
|
||||
For more detailed instructions, please refer to the README file provided with the project.
|
8
docs/source/introduction.rst
Normal file
8
docs/source/introduction.rst
Normal file
|
@ -0,0 +1,8 @@
|
|||
Introduction
|
||||
============
|
||||
|
||||
Welcome to the **doi2dataset** documentation. This guide provides an in-depth look at the tool, its purpose, and how it can help you generate metadata for Dataverse datasets.
|
||||
|
||||
The **doi2dataset** tool is aimed at researchers, data stewards, and developers who need to convert DOI-based metadata into a format compatible with Dataverse. It automates the retrieval of metadata from external sources (like OpenAlex and CrossRef) and performs necessary data transformations.
|
||||
|
||||
In the following sections, you'll learn about the installation process, usage examples, and a detailed API reference.
|
9
docs/source/modules.rst
Normal file
9
docs/source/modules.rst
Normal file
|
@ -0,0 +1,9 @@
|
|||
API Reference
|
||||
=============
|
||||
|
||||
This section contains the API reference generated from the source code docstrings.
|
||||
|
||||
.. automodule:: doi2dataset
|
||||
:members:
|
||||
:undoc-members:
|
||||
:show-inheritance:
|
7
docs/source/setup.rst
Normal file
7
docs/source/setup.rst
Normal file
|
@ -0,0 +1,7 @@
|
|||
setup module
|
||||
============
|
||||
|
||||
.. automodule:: setup
|
||||
:members:
|
||||
:show-inheritance:
|
||||
:undoc-members:
|
77
docs/source/usage.rst
Normal file
77
docs/source/usage.rst
Normal file
|
@ -0,0 +1,77 @@
|
|||
Usage
|
||||
=====
|
||||
|
||||
Running **doi2dataset** is done from the command line. Below is an example of how to use the tool.
|
||||
|
||||
Basic Example
|
||||
-------------
|
||||
To process one or more DOIs, run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python doi2dataset.py 10.1234/doi1 10.5678/doi2
|
||||
|
||||
Command Line Options
|
||||
--------------------
|
||||
The tool offers several command line options:
|
||||
|
||||
- ``-f, --file``: Specify a file containing DOIs (one per line).
|
||||
- ``-o, --output-dir``: Directory where metadata files will be saved.
|
||||
- ``-d, --depositor``: Name of the depositor.
|
||||
- ``-s, --subject``: Default subject for the metadata.
|
||||
- ``-m, --contact-mail``: Contact email address.
|
||||
- ``-u, --upload``: Flag to upload metadata to a Dataverse server.
|
||||
|
||||
Configuration via config.yaml
|
||||
-------------------------------
|
||||
Some options are also set via the **config.yaml** file. This file includes settings such as:
|
||||
|
||||
- Dataverse connection details (URL, API token, authentication credentials).
|
||||
- Mapping of project phases.
|
||||
- PI (principal investigator) information.
|
||||
- Default grant configurations.
|
||||
|
||||
Make sure that your **config.yaml** is properly configured before running the tool. For example, your **config.yaml** might include:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
dataverse:
|
||||
url: "https://your.dataverse.server"
|
||||
api_token: "your_api_token"
|
||||
auth_user: "your_username"
|
||||
auth_password: "your_password"
|
||||
dataverse: "your_dataverse_name"
|
||||
|
||||
phase:
|
||||
Phase1:
|
||||
start: 2010
|
||||
end: 2015
|
||||
Phase2:
|
||||
start: 2016
|
||||
end: 2020
|
||||
|
||||
pis:
|
||||
- given_name: "John"
|
||||
family_name: "Doe"
|
||||
email: "john.doe@example.com"
|
||||
orcid: "0000-0001-2345-6789"
|
||||
affiliation: "Example University"
|
||||
project:
|
||||
- "Project A"
|
||||
- "Project B"
|
||||
|
||||
default_grants:
|
||||
- funder: "Funder Name"
|
||||
id: "GrantID12345"
|
||||
|
||||
Usage Example with Configuration
|
||||
----------------------------------
|
||||
If you have configured your **config.yaml** and want to process DOIs from a file while uploading the metadata, you could run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
python doi2dataset.py -f dois.txt -o output/ -d "John Doe" -s "Medicine, Health and Life Sciences" -m "john.doe@example.com" -u
|
||||
|
||||
This command will use the options provided on the command line as well as the settings from **config.yaml**.
|
||||
|
||||
For more details on usage and configuration, please refer to the rest of the documentation.
|
1733
doi2dataset.py
Executable file
1733
doi2dataset.py
Executable file
File diff suppressed because it is too large
Load diff
3
requirements-dev.txt
Normal file
3
requirements-dev.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
pytest>=8.3.5,<9.0
|
||||
pytest-mock>=3.14.0,4.0
|
||||
ruff>=0.11.1,<0.20
|
2
requirements-doc.txt
Normal file
2
requirements-doc.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
sphinx>=8.2.3,<9.0.0
|
||||
sphinx_rtd_theme>=3.0,<4.0
|
6
requirements.txt
Normal file
6
requirements.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
dnspython>=2.7.0,<3.0.0
|
||||
requests>=2.32.3,<2.33.0
|
||||
PyYAML>=6.0.2,<7.0
|
||||
email_validator>=2.2.0,<3.0.0
|
||||
rich>=13.9.4,<14.0.0
|
||||
idutils>=1.4.2,<2.0.0
|
46
setup.py
Normal file
46
setup.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
from setuptools import find_packages, setup
|
||||
|
||||
setup(
|
||||
name="doi2dataset",
|
||||
version="1.0",
|
||||
description="A tool to process DOIs and generate metadata for Dataverse.org datasets.",
|
||||
long_description=open("README.md", encoding="utf-8").read() if open("README.md", encoding="utf-8") else "",
|
||||
long_description_content_type="text/markdown",
|
||||
author="Alexander Minges",
|
||||
author_email="alexander.minges@uni-due.de",
|
||||
url="https://github.com/your_username/doi2dataset",
|
||||
packages=find_packages(),
|
||||
install_requires=[
|
||||
"dnspython>=2.7.0,<3.0.0",
|
||||
"requests>=2.32.3,<2.33.0",
|
||||
"PyYAML>=6.0,<7.0",
|
||||
"email_validator>=2.2.0,<3.0.0",
|
||||
"rich>=13.9.4,<14.0.0",
|
||||
"idutils>=1.4.2,<2.0.0"
|
||||
],
|
||||
extras_require={
|
||||
"docs": [
|
||||
"sphinx>=8.2.3,<9.0.0",
|
||||
"sphinx_rtd_theme>=3.0,<4.0"
|
||||
],
|
||||
"dev": [
|
||||
"pytest>=8.3.5,<9.0",
|
||||
"pytest-mock>=3.14.0,4.0",
|
||||
"ruff>=0.11.1,<0.20"
|
||||
]
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"doi2dataset=doi2dataset:main"
|
||||
]
|
||||
},
|
||||
classifiers=[
|
||||
"Programming Language :: Python :: 3",
|
||||
"Operating System :: OS Independent",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Development Status :: 4 - Beta",
|
||||
"Intended Audience :: Developers",
|
||||
"Topic :: Software Development :: Build Tools",
|
||||
],
|
||||
python_requires='>=3.10',
|
||||
)
|
50
tests/test_doi2dataset.py
Normal file
50
tests/test_doi2dataset.py
Normal file
|
@ -0,0 +1,50 @@
|
|||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
from doi2dataset import NameProcessor, Phase, sanitize_filename, validate_email_address
|
||||
|
||||
|
||||
def test_phase_check_year():
|
||||
"""Test that check_year correctly determines if a year is within the phase boundaries."""
|
||||
phase = Phase("TestPhase", 2000, 2010)
|
||||
# Within boundaries
|
||||
assert phase.check_year(2005) is True
|
||||
# Outside boundaries
|
||||
assert phase.check_year(1999) is False
|
||||
assert phase.check_year(2011) is False
|
||||
# Boundary cases
|
||||
assert phase.check_year(2000) is True
|
||||
assert phase.check_year(2010) is True
|
||||
|
||||
def test_sanitize_filename():
|
||||
"""Test the sanitize_filename function to convert DOI to a valid filename."""
|
||||
doi = "10.1234/abc.def"
|
||||
expected = "10_1234_abc_def"
|
||||
result = sanitize_filename(doi)
|
||||
assert result == expected
|
||||
|
||||
def test_split_name_with_comma():
|
||||
"""Test splitting a full name that contains a comma."""
|
||||
full_name = "Doe, John"
|
||||
given, family = NameProcessor.split_name(full_name)
|
||||
assert given == "John"
|
||||
assert family == "Doe"
|
||||
|
||||
def test_split_name_without_comma():
|
||||
"""Test splitting a full name that does not contain a comma."""
|
||||
full_name = "John Doe"
|
||||
given, family = NameProcessor.split_name(full_name)
|
||||
assert given == "John"
|
||||
assert family == "Doe"
|
||||
|
||||
def test_validate_email_address_valid():
|
||||
"""Test that a valid email address is correctly recognized."""
|
||||
valid_email = "john.doe@iana.org"
|
||||
assert validate_email_address(valid_email) is True
|
||||
|
||||
def test_validate_email_address_invalid():
|
||||
"""Test that an invalid email address is correctly rejected."""
|
||||
invalid_email = "john.doe@invalid_domain"
|
||||
assert validate_email_address(invalid_email) is False
|
57
tests/test_fetch_doi_mock.py
Normal file
57
tests/test_fetch_doi_mock.py
Normal file
|
@ -0,0 +1,57 @@
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
|
||||
|
||||
import pytest
|
||||
|
||||
from doi2dataset import MetadataProcessor
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
"""
|
||||
A fake response object to simulate an API response.
|
||||
"""
|
||||
def __init__(self, json_data, status_code=200):
|
||||
self._json = json_data
|
||||
self.status_code = status_code
|
||||
|
||||
def json(self):
|
||||
return self._json
|
||||
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
@pytest.fixture
|
||||
def fake_openalex_response():
|
||||
"""
|
||||
Load the saved JSON response from the file 'srep45389.json'
|
||||
located in the same directory as this test file.
|
||||
"""
|
||||
json_path = os.path.join(os.path.dirname(__file__), "srep45389.json")
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
|
||||
def test_fetch_doi_data_with_file(mocker, fake_openalex_response):
|
||||
"""
|
||||
Test fetching DOI metadata by simulating the API call with a locally saved JSON response.
|
||||
|
||||
The APIClient.make_request method is patched to return a fake response based on
|
||||
the contents of 'srep45389.json', so that no actual network request is performed.
|
||||
"""
|
||||
doi = "10.1038/srep45389"
|
||||
fake_response = FakeResponse(fake_openalex_response, 200)
|
||||
|
||||
# Patch the make_request method of APIClient to return our fake_response.
|
||||
mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response)
|
||||
|
||||
# Instantiate MetadataProcessor without upload and progress.
|
||||
processor = MetadataProcessor(doi=doi, upload=False)
|
||||
|
||||
# Call _fetch_data(), which should now return our fake JSON data.
|
||||
data = processor._fetch_data()
|
||||
|
||||
# Verify that the fetched data matches the fake JSON data.
|
||||
assert data == fake_openalex_response
|
Loading…
Add table
Reference in a new issue