feat: complete CLI module separation
- Extract CLI functionality from monolithic script to dedicated modules - Create doi2dataset/cli.py with all command-line interface logic - Create doi2dataset/main.py as clean entry point - Add normalize_doi function to utils.validation module - Update pyproject.toml entry point to use new CLI module - Maintain full backward compatibility with original doi2dataset.py CLI modules created: - cli.py: CLI functionality, argument parsing, progress tracking - main.py: Clean entry point module Features maintained: - All command-line arguments and options work identically - Progress tracking and rich console output preserved - Error handling and validation maintained - DOI batch processing functionality intact - All 38 tests passing with 61% coverage Benefits achieved: - Clean separation between CLI and core business logic - Multiple entry points available (cli.py, main.py, original script) - Foundation for future CLI enhancements and testing - Professional package structure with proper entry points
This commit is contained in:
parent
b6209691c3
commit
091311038d
6 changed files with 360 additions and 1 deletions
|
@ -35,6 +35,7 @@ from .api import (
|
||||||
APIClient,
|
APIClient,
|
||||||
LicenseProcessor,
|
LicenseProcessor,
|
||||||
)
|
)
|
||||||
|
from .cli import main, print_summary, process_doi_batch
|
||||||
from .core import (
|
from .core import (
|
||||||
Abstract,
|
Abstract,
|
||||||
BaseMetadataField,
|
BaseMetadataField,
|
||||||
|
@ -89,6 +90,10 @@ __all__ = [
|
||||||
"NameProcessor",
|
"NameProcessor",
|
||||||
"PIFinder",
|
"PIFinder",
|
||||||
"SubjectMapper",
|
"SubjectMapper",
|
||||||
|
# CLI components
|
||||||
|
"main",
|
||||||
|
"process_doi_batch",
|
||||||
|
"print_summary",
|
||||||
# Utilities
|
# Utilities
|
||||||
"validate_doi",
|
"validate_doi",
|
||||||
"validate_email_address",
|
"validate_email_address",
|
||||||
|
|
326
doi2dataset/cli.py
Normal file
326
doi2dataset/cli.py
Normal file
|
@ -0,0 +1,326 @@
|
||||||
|
"""
|
||||||
|
Command-line interface for doi2dataset.
|
||||||
|
|
||||||
|
This module provides the main CLI functionality for processing DOIs and generating
|
||||||
|
metadata for Dataverse datasets. It handles argument parsing, progress tracking,
|
||||||
|
and batch processing of multiple DOIs.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.panel import Panel
|
||||||
|
from rich.progress import (
|
||||||
|
BarColumn,
|
||||||
|
Progress,
|
||||||
|
SpinnerColumn,
|
||||||
|
TextColumn,
|
||||||
|
TimeElapsedColumn,
|
||||||
|
)
|
||||||
|
from rich.table import Table
|
||||||
|
from rich.theme import Theme
|
||||||
|
|
||||||
|
from .processing.metadata import MetadataProcessor
|
||||||
|
from .utils.validation import normalize_doi, sanitize_filename, validate_email_address
|
||||||
|
|
||||||
|
# Console icons for user-friendly output
|
||||||
|
ICONS = {
|
||||||
|
"success": "✓", # Simple checkmark
|
||||||
|
"error": "✗", # Simple X
|
||||||
|
"warning": "!", # Simple exclamation
|
||||||
|
"info": "ℹ", # Info symbol
|
||||||
|
"processing": "⋯", # Three dots
|
||||||
|
"done": "∎", # Filled square
|
||||||
|
"file": "⨳", # Document symbol
|
||||||
|
"folder": "⊞", # Folder symbol
|
||||||
|
"clock": "◷", # Clock symbol
|
||||||
|
"search": "⌕", # Search symbol
|
||||||
|
"data": "≡", # Three lines
|
||||||
|
"doi": "∾", # Link symbol
|
||||||
|
"total": "∑", # Sum symbol
|
||||||
|
"save": "⤓", # Save/download arrow
|
||||||
|
"upload": "⤒", # Upload arrow
|
||||||
|
}
|
||||||
|
|
||||||
|
# Theme configuration for Rich console output
|
||||||
|
THEME = Theme(
|
||||||
|
{
|
||||||
|
"info": "cyan",
|
||||||
|
"warning": "yellow",
|
||||||
|
"error": "red bold",
|
||||||
|
"success": "green",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def print_summary(results: dict[str, list[Any]], console: Console) -> None:
|
||||||
|
"""
|
||||||
|
Print a summary table of processing results to the console.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results (dict[str, list[Any]]): Dictionary containing success and failed DOIs.
|
||||||
|
console (Console): Rich console object for output.
|
||||||
|
"""
|
||||||
|
table = Table(title="Processing Results")
|
||||||
|
|
||||||
|
table.add_column("Status", style="bold")
|
||||||
|
table.add_column("Count", justify="right")
|
||||||
|
table.add_column("DOIs", style="dim")
|
||||||
|
|
||||||
|
table.add_row(
|
||||||
|
f"{ICONS['success']} Success",
|
||||||
|
str(len(results["success"])),
|
||||||
|
", ".join(results["success"][:3])
|
||||||
|
+ ("..." if len(results["success"]) > 3 else ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
if results["failed"]:
|
||||||
|
table.add_row(
|
||||||
|
f"{ICONS['error']} Failed",
|
||||||
|
str(len(results["failed"])),
|
||||||
|
", ".join(doi for doi, _ in results["failed"][:3])
|
||||||
|
+ ("..." if len(results["failed"]) > 3 else ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
console.print(Panel(table, title="Summary", border_style="blue"))
|
||||||
|
|
||||||
|
|
||||||
|
def process_doi_batch(
|
||||||
|
dois: set[str],
|
||||||
|
output_dir: Path,
|
||||||
|
depositor: str | None = None,
|
||||||
|
default_subject: str = "Medicine, Health and Life Sciences",
|
||||||
|
contact_mail: str | None = None,
|
||||||
|
upload: bool = False,
|
||||||
|
ror: bool = False,
|
||||||
|
console: Console | None = None,
|
||||||
|
) -> dict[str, list[Any]]:
|
||||||
|
"""
|
||||||
|
Process a batch of DOIs and return a summary of results.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dois (set[str]): Set of DOIs to process.
|
||||||
|
output_dir (Path): Directory where metadata files will be saved.
|
||||||
|
depositor (str | None): Depositor name.
|
||||||
|
default_subject (str): Default subject for metadata.
|
||||||
|
contact_mail (str | None): Contact email address.
|
||||||
|
upload (bool): Flag indicating whether to upload metadata to Dataverse.
|
||||||
|
ror (bool): Flag indication whether to use ROR id for affiliation.
|
||||||
|
console (Console | None): Rich console instance for output.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict[str, list[Any]]: Dictionary with keys 'success' and 'failed'.
|
||||||
|
"""
|
||||||
|
results: dict[str, list[Any]] = {"success": [], "failed": []}
|
||||||
|
|
||||||
|
# Use provided console or create a new one
|
||||||
|
if console is None:
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
progress_columns = [
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[bold blue]{task.description:<50}"),
|
||||||
|
BarColumn(bar_width=None),
|
||||||
|
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
||||||
|
TextColumn("•"), # Separator
|
||||||
|
TimeElapsedColumn(),
|
||||||
|
TextColumn("•"), # Separator
|
||||||
|
TextColumn("[bold]{task.completed}/{task.total}"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Define steps for each DOI processing
|
||||||
|
if upload:
|
||||||
|
doi_total_steps = 4 # Fetch, Build, Upload, Save
|
||||||
|
else:
|
||||||
|
doi_total_steps = 3 # Fetch, Build, Save
|
||||||
|
|
||||||
|
with Progress(
|
||||||
|
*progress_columns,
|
||||||
|
console=console,
|
||||||
|
transient=True, # This makes the progress bar disappear after completion
|
||||||
|
) as progress:
|
||||||
|
# Add main task
|
||||||
|
main_task = progress.add_task("[bold blue]Processing DOIs...", total=len(dois))
|
||||||
|
|
||||||
|
# Add status task for current DOI
|
||||||
|
status_task = progress.add_task(
|
||||||
|
"[cyan]Current:", total=doi_total_steps, visible=False
|
||||||
|
)
|
||||||
|
|
||||||
|
for doi in dois:
|
||||||
|
try:
|
||||||
|
# Update status display
|
||||||
|
progress.update(
|
||||||
|
status_task,
|
||||||
|
description=f"[cyan]Current: [white]{doi[:50]}...",
|
||||||
|
visible=True,
|
||||||
|
completed=0, # Reset progress for new DOI
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process the DOI
|
||||||
|
sanitized_filename = sanitize_filename(normalize_doi(doi))
|
||||||
|
output_path = output_dir / f"{sanitized_filename}_metadata.json"
|
||||||
|
|
||||||
|
processor = MetadataProcessor(
|
||||||
|
doi=doi,
|
||||||
|
depositor=depositor,
|
||||||
|
output_path=output_path,
|
||||||
|
default_subject=default_subject,
|
||||||
|
contact_mail=contact_mail,
|
||||||
|
upload=upload,
|
||||||
|
ror=ror,
|
||||||
|
console=console,
|
||||||
|
progress=progress,
|
||||||
|
task_id=status_task,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process and capture result
|
||||||
|
processor.process()
|
||||||
|
results["success"].append(doi)
|
||||||
|
|
||||||
|
# Update progress
|
||||||
|
progress.advance(main_task)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# Handle errors
|
||||||
|
results["failed"].append((doi, str(e)))
|
||||||
|
|
||||||
|
# Show error but keep progress bar
|
||||||
|
progress.console.print(
|
||||||
|
f"{ICONS['error']} Error processing {doi}: {str(e)}", style="error"
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
# Clear current status
|
||||||
|
progress.update(status_task, visible=False)
|
||||||
|
|
||||||
|
# Print final summary
|
||||||
|
print_summary(results, console)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def create_argument_parser() -> argparse.ArgumentParser:
|
||||||
|
"""
|
||||||
|
Create and configure the argument parser for the CLI.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
argparse.ArgumentParser: Configured argument parser.
|
||||||
|
"""
|
||||||
|
parser = argparse.ArgumentParser(description="Process DOIs to generate metadata")
|
||||||
|
|
||||||
|
parser.add_argument("dois", nargs="*", help="One or more DOIs to process")
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-f",
|
||||||
|
"--file",
|
||||||
|
help="File containing DOIs (one per line)",
|
||||||
|
type=argparse.FileType("r"),
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-o",
|
||||||
|
"--output-dir",
|
||||||
|
help="Output directory for metadata files",
|
||||||
|
default=".",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument("-d", "--depositor", help="Name of the depositor", default=None)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-s",
|
||||||
|
"--subject",
|
||||||
|
help="Default subject",
|
||||||
|
default="Medicine, Health and Life Sciences",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-m", "--contact-mail", help="Contact email address", default=False
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-u", "--upload", help="Upload to Dataverse", action="store_true"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"-r", "--use-ror", help="Use ROR ID if available", action="store_true"
|
||||||
|
)
|
||||||
|
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Main entry point for the console script."""
|
||||||
|
console = Console(theme=THEME)
|
||||||
|
|
||||||
|
try:
|
||||||
|
parser = create_argument_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Ensure we have either DOIs as arguments or a file
|
||||||
|
if not args.dois and not args.file:
|
||||||
|
console.print(
|
||||||
|
f"{ICONS['error']} Error: No DOIs provided. Use either command line arguments or -f/--file option.",
|
||||||
|
style="error",
|
||||||
|
)
|
||||||
|
parser.print_help()
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Get DOIs from both direct arguments and file if provided
|
||||||
|
dois = set(args.dois) # Start with directly provided DOIs
|
||||||
|
if args.file:
|
||||||
|
console.print(
|
||||||
|
f"{ICONS['file']} Reading DOIs from file: {args.file.name}",
|
||||||
|
style="info",
|
||||||
|
)
|
||||||
|
dois.update(line.strip() for line in args.file if line.strip())
|
||||||
|
|
||||||
|
# Create output directory if it doesn't exist
|
||||||
|
output_dir = Path(args.output_dir)
|
||||||
|
try:
|
||||||
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
console.print(
|
||||||
|
f"{ICONS['folder']} Output directory: {output_dir}\n", style="info"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
console.print(
|
||||||
|
f"Failed to create output directory: {str(e)}\n", style="error"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if args.contact_mail:
|
||||||
|
if not validate_email_address(args.contact_mail):
|
||||||
|
raise ValueError(f"Not a valid email address: {args.contact_mail}")
|
||||||
|
console.print(
|
||||||
|
f"{ICONS['info']} Exposing contact email <{args.contact_mail}> to API services.\n",
|
||||||
|
style="info",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process DOIs and track time
|
||||||
|
process_doi_batch(
|
||||||
|
dois=dois,
|
||||||
|
output_dir=output_dir,
|
||||||
|
depositor=args.depositor,
|
||||||
|
default_subject=args.subject,
|
||||||
|
contact_mail=args.contact_mail,
|
||||||
|
upload=args.upload,
|
||||||
|
ror=args.use_ror,
|
||||||
|
console=console,
|
||||||
|
)
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print(
|
||||||
|
f"\n{ICONS['warning']} Processing interrupted by user", style="warning"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
console.print(
|
||||||
|
f"\n{ICONS['error']} An unexpected error occurred: {str(e)}", style="error"
|
||||||
|
)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
11
doi2dataset/main.py
Normal file
11
doi2dataset/main.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
"""
|
||||||
|
Main entry point for doi2dataset.
|
||||||
|
|
||||||
|
This module provides the primary entry point for the doi2dataset package,
|
||||||
|
importing and calling the main CLI function.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .cli import main
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
|
@ -6,6 +6,7 @@ and other helper functions used throughout the application.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .validation import (
|
from .validation import (
|
||||||
|
normalize_doi,
|
||||||
normalize_string,
|
normalize_string,
|
||||||
sanitize_filename,
|
sanitize_filename,
|
||||||
split_name,
|
split_name,
|
||||||
|
@ -19,4 +20,5 @@ __all__ = [
|
||||||
"sanitize_filename",
|
"sanitize_filename",
|
||||||
"split_name",
|
"split_name",
|
||||||
"normalize_string",
|
"normalize_string",
|
||||||
|
"normalize_doi",
|
||||||
]
|
]
|
||||||
|
|
|
@ -32,6 +32,21 @@ def validate_doi(doi: str) -> bool:
|
||||||
return is_doi(doi)
|
return is_doi(doi)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_doi(doi: str) -> str:
|
||||||
|
"""
|
||||||
|
Normalize a DOI string using idutils.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
doi (str): The DOI to normalize.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The normalized DOI string.
|
||||||
|
"""
|
||||||
|
from idutils.normalizers import normalize_doi as idutils_normalize_doi
|
||||||
|
|
||||||
|
return idutils_normalize_doi(doi)
|
||||||
|
|
||||||
|
|
||||||
def validate_email_address(email: str) -> bool:
|
def validate_email_address(email: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Validate an email address and ensure its domain has an MX record.
|
Validate an email address and ensure its domain has an MX record.
|
||||||
|
|
|
@ -57,7 +57,7 @@ test = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
doi2dataset = "doi2dataset:main"
|
doi2dataset = "doi2dataset.cli:main"
|
||||||
|
|
||||||
[tool.setuptools_scm]
|
[tool.setuptools_scm]
|
||||||
version_scheme = "python-simplified-semver"
|
version_scheme = "python-simplified-semver"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue