From c97a89967c0620ec9b29e685a3f78fbc8d0756cb Mon Sep 17 00:00:00 2001 From: Alexander Minges Date: Thu, 10 Jul 2025 10:04:38 +0200 Subject: [PATCH] Add documentation infrastructure and improve documentation diff --git a/commit message: Add documentation infrastructure and improve documentation site This commit adds several improvements to the documentation: - Updated Sphinx configuration for better autodoc generation - Added a versions template for Read the Docs - Refined content for index, introduction, and FAQ pages - Updated usage instructions - Incremented project version - Improved setup.py to handle README loading --- .gitlab-ci.yml | 59 ++++++++++++++++++++++------ docs/source/_templates/versions.html | 19 +++++++++ docs/source/conf.py | 15 ++++++- docs/source/doi2dataset.rst | 7 ---- docs/source/faq.rst | 34 +++++++++++++--- docs/source/index.rst | 19 ++++++--- docs/source/introduction.rst | 4 +- docs/source/setup.rst | 7 ---- docs/source/usage.rst | 16 ++------ setup.py | 14 ++++++- 10 files changed, 140 insertions(+), 54 deletions(-) create mode 100644 docs/source/_templates/versions.html delete mode 100644 docs/source/doi2dataset.rst delete mode 100644 docs/source/setup.rst diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6ec2fe5..34082ba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,23 +6,25 @@ # Note that environment variables can be set in several places # See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence stages: -- test -- secret-detection + - test + - secret-detection + - build-docs + - pages variables: PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" cache: paths: - - ".cache/pip/" - - ".venv/" + - ".cache/pip/" + - ".venv/" test: stage: test image: python:3 before_script: - - python -m pip install --upgrade pip - - pip install -r requirements.txt - - pip install -r requirements-dev.txt + - python -m pip install --upgrade pip + - pip install -r requirements.txt + - pip install -r requirements-dev.txt script: - - pytest + - pytest artifacts: reports: junit: junit.xml @@ -30,13 +32,46 @@ test: coverage_format: cobertura path: coverage.xml paths: - - htmlcov/ + - htmlcov/ expire_in: 1 week coverage: "/(?i)total.*? (100(?:\\.0+)?\\%|[1-9]?\\d(?:\\.\\d+)?\\%)$/" only: - - branches - - merge_requests + - branches + - merge_requests secret_detection: stage: secret-detection + +build-docs: + stage: build-docs + image: python:3 + before_script: + - python -m pip install --upgrade pip + - pip install -r requirements.txt + - pip install -r requirements-doc.txt + script: + - cd docs + - make html + artifacts: + paths: + - docs/build/html/ + expire_in: 1 week + only: + - branches + - merge_requests + +pages: + stage: pages + dependencies: + - build-docs + script: + - mkdir -p public + - cp -r docs/build/html/* public/ + artifacts: + paths: + - public + expire_in: 1 week + only: + - main + include: -- template: Security/Secret-Detection.gitlab-ci.yml + - template: Security/Secret-Detection.gitlab-ci.yml diff --git a/docs/source/_templates/versions.html b/docs/source/_templates/versions.html new file mode 100644 index 0000000..237c3fb --- /dev/null +++ b/docs/source/_templates/versions.html @@ -0,0 +1,19 @@ +{%- if versions %} +
+ + Read the Docs + v: {{ current_version }} + + +
+
+
Versions
+ {%- for item in versions %} +
+ {{ item.name }} +
+ {%- endfor %} +
+
+
+{%- endif %} diff --git a/docs/source/conf.py b/docs/source/conf.py index 921497b..054ccb0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,7 +14,7 @@ sys.path.insert(0, os.path.abspath('../..')) project = 'doi2dataset' copyright = '2025, Alexander Minges' author = 'Alexander Minges' -release = '1.0' +release = '2.0.2' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -24,6 +24,19 @@ extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] templates_path = ['_templates'] exclude_patterns = [] +# -- Options for autodoc ---------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html + +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, + 'special-members': '__init__', +} + +# Suppress warnings about duplicate object descriptions +suppress_warnings = ['autodoc.import_object', 'ref.duplicate'] + # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output diff --git a/docs/source/doi2dataset.rst b/docs/source/doi2dataset.rst deleted file mode 100644 index b262195..0000000 --- a/docs/source/doi2dataset.rst +++ /dev/null @@ -1,7 +0,0 @@ -doi2dataset module -================== - -.. automodule:: doi2dataset - :members: - :show-inheritance: - :undoc-members: diff --git a/docs/source/faq.rst b/docs/source/faq.rst index 026e0c5..7b69424 100644 --- a/docs/source/faq.rst +++ b/docs/source/faq.rst @@ -1,14 +1,36 @@ Frequently Asked Questions (FAQ) ================================ -Q: What is **doi2dataset**? -A: **doi2dataset** is a tool to process DOIs and generate metadata for Dataverse datasets by fetching data from external APIs like OpenAlex and CrossRef. +**Q: What is doi2dataset?** + +A: **doi2dataset** is a tool to process DOIs and generate standard Dataverse citation metadata by fetching data from external APIs like OpenAlex and CrossRef. + +---- + +**Q: How do I install doi2dataset?** -Q: How do I install **doi2dataset**? A: You can clone the repository from GitHub or install it via pip. Please refer to the Installation section for details. -Q: Can I upload metadata directly to a Dataverse server? -A: Yes, the tool provides an option to upload metadata via the command line using the ``-u`` flag. Ensure that your configuration in `config.yaml` is correct. +---- + +**Q: Can I upload metadata directly to a Dataverse server?** + +A: Yes, the tool provides an option to upload metadata via the command line using the ``-u`` flag. Ensure that your configuration in `config.yaml` includes the correct Dataverse connection details. + +---- + +**Q: What command line options are available?** + +A: The tool supports several options including ``-f`` for input files, ``-o`` for output directory, ``-d`` for depositor name, ``-s`` for subject, ``-m`` for contact email, ``-u`` for upload, and ``-r`` for using ROR identifiers. + +---- + +**Q: Do I need to configure PIs in the config file?** + +A: No, PI configuration is optional. It's only used as a fallback for determining corresponding authors when they're not explicitly specified in the publication metadata. + +---- + +**Q: Where can I find the API documentation?** -Q: Where can I find the API documentation? A: The API reference is generated automatically in the Modules section of this documentation. diff --git a/docs/source/index.rst b/docs/source/index.rst index e5c1e6e..ee56c03 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,17 +8,24 @@ doi2dataset documentation Overview -------- -**doi2dataset** is a Python tool designed to process DOIs and generate metadata for Dataverse datasets. +**doi2dataset** is a Python tool designed to process DOIs and generate standard citation metadata for Dataverse datasets. It retrieves data from external APIs such as OpenAlex and CrossRef and converts it into a format that meets Dataverse requirements. Key Features: -- **Validation** and normalization of DOIs -- Retrieval and processing of **metadata** (e.g., abstract, license, author information) -- Automatic mapping and generation of metadata fields (e.g., title, description, keywords) -- Support for controlled vocabularies and complex (compound) metadata fields -- Optional **uploading** of metadata to a Dataverse server +- **DOI validation** and normalization +- **Metadata retrieval** from external APIs (OpenAlex, CrossRef) +- **Standard Dataverse metadata** generation including: + - Title, publication date, and alternative URL + - Author information with affiliations and ORCID identifiers + - Dataset contact information (corresponding authors) + - Abstract and description + - Keywords and subject classification + - Grant/funding information + - License information when available +- **Optional uploading** of metadata to a Dataverse server - **Progress tracking** and error handling using the Rich library +- **Research Organization Registry (ROR)** support for institutional identifiers diff --git a/docs/source/introduction.rst b/docs/source/introduction.rst index 3d4f703..18f648d 100644 --- a/docs/source/introduction.rst +++ b/docs/source/introduction.rst @@ -1,8 +1,8 @@ Introduction ============ -Welcome to the **doi2dataset** documentation. This guide provides an in-depth look at the tool, its purpose, and how it can help you generate metadata for Dataverse datasets. +Welcome to the **doi2dataset** documentation. This guide provides an in-depth look at the tool, its purpose, and how it can help you generate standard citation metadata for Dataverse datasets. -The **doi2dataset** tool is aimed at researchers, data stewards, and developers who need to convert DOI-based metadata into a format compatible with Dataverse. It automates the retrieval of metadata from external sources (like OpenAlex and CrossRef) and performs necessary data transformations. +The **doi2dataset** tool is aimed at researchers, data stewards, and developers who need to convert DOI-based metadata into a format compatible with Dataverse. It automates the retrieval of metadata from external sources (like OpenAlex and CrossRef) and generates standard Dataverse citation metadata blocks including title, authors, abstract, keywords, and funding information. In the following sections, you'll learn about the installation process, usage examples, and a detailed API reference. diff --git a/docs/source/setup.rst b/docs/source/setup.rst deleted file mode 100644 index 1084cc6..0000000 --- a/docs/source/setup.rst +++ /dev/null @@ -1,7 +0,0 @@ -setup module -============ - -.. automodule:: setup - :members: - :show-inheritance: - :undoc-members: diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 4e270c3..f287b00 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -21,6 +21,7 @@ The tool offers several command line options: - ``-s, --subject``: Default subject for the metadata. - ``-m, --contact-mail``: Contact email address. - ``-u, --upload``: Flag to upload metadata to a Dataverse server. +- ``-r, --use-ror``: Use Research Organization Registry (ROR) identifiers for institutions when available. Configuration via config.yaml ------------------------------- @@ -42,27 +43,18 @@ Make sure that your **config.yaml** is properly configured before running the to auth_password: "your_password" dataverse: "your_dataverse_name" - phase: - Phase1: - start: 2010 - end: 2015 - Phase2: - start: 2016 - end: 2020 - pis: - given_name: "John" family_name: "Doe" email: "john.doe@example.com" orcid: "0000-0001-2345-6789" affiliation: "Example University" - project: - - "Project A" - - "Project B" default_grants: - funder: "Funder Name" id: "GrantID12345" + - funder: "Another Funding Agency" + id: "GrantID98765" Usage Example with Configuration ---------------------------------- @@ -70,7 +62,7 @@ If you have configured your **config.yaml** and want to process DOIs from a file .. code-block:: bash - python doi2dataset.py -f dois.txt -o output/ -d "John Doe" -s "Medicine, Health and Life Sciences" -m "john.doe@example.com" -u + python doi2dataset.py -f dois.txt -o output/ -d "Doe, John" -s "Medicine, Health and Life Sciences" -m "john.doe@example.com" -u -r This command will use the options provided on the command line as well as the settings from **config.yaml**. diff --git a/setup.py b/setup.py index f88994f..6ca8a0a 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,22 @@ +import os + from setuptools import find_packages, setup +# Get the directory containing this file +here = os.path.abspath(os.path.dirname(__file__)) + +# Read the README file +readme_path = os.path.join(here, "README.md") +long_description = "" +if os.path.exists(readme_path): + with open(readme_path, encoding="utf-8") as f: + long_description = f.read() + setup( name="doi2dataset", version="1.0", description="A tool to process DOIs and generate metadata for Dataverse.org datasets.", - long_description=open("README.md", encoding="utf-8").read() if open("README.md", encoding="utf-8") else "", + long_description=long_description, long_description_content_type="text/markdown", author="Alexander Minges", author_email="alexander.minges@uni-due.de",