Add comprehensive docstrings to classes and methods

Improve type annotations in Person.to_dict() and CustomEncoder class
to better reflect the actual return types. Enhance documentation for
Institution, Person, Abstract, and CustomEncoder classes with detailed
method descriptions and parameter explanations.
This commit is contained in:
Alexander Minges 2025-05-20 13:20:18 +02:00
parent f84a274848
commit 554951265e
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4

View file

@ -277,10 +277,24 @@ class CompoundMetadataField(
@dataclass
class Institution:
"""
Represents an institution or organization.
Attributes:
display_name (str): The name of the institution.
ror (str): Research Organization Registry identifier (optional).
"""
display_name: str
ror: str = ""
def affiliation_field(self) -> PrimitiveMetadataField:
"""
Create a metadata field for the affiliation.
Returns:
PrimitiveMetadataField: A metadata field representing the institution,
using ROR ID when available.
"""
if self.ror:
expanded_value = {
"scheme": "http://www.grid.ac/ontology/",
@ -311,25 +325,36 @@ class Person:
affiliation: Institution | str = ""
project: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, str | list[str]]:
"""Convert Person to a dictionary for JSON serialization."""
def to_dict(self) -> dict[str, str | list[str] | dict[str, str]]:
"""
Convert Person to a dictionary for JSON serialization.
return_dict = {
"family_name": self.family_name,
"given_name": self.given_name,
"orcid": self.orcid,
"email": self.email,
"project": self.project
}
Handles affiliations properly by checking if the affiliation
is an Institution object or a string.
if self.affiliation.ror:
return_dict["affiliation"] = self.affiliation.ror
elif self.affiliation.display_name:
return_dict["affiliation"] = self.affiliation.display_name
else:
return_dict["affiliation"] = ""
Returns:
dict: A dictionary containing the person's information including
name, contact details, and affiliation.
"""
return_dict: dict[str, str | list[str] | dict[str, str]] = {
"family_name": self.family_name,
"given_name": self.given_name,
"orcid": self.orcid,
"email": self.email,
"project": self.project
}
return return_dict
if isinstance(self.affiliation, Institution):
if self.affiliation.ror:
return_dict["affiliation"] = self.affiliation.ror
elif self.affiliation.display_name:
return_dict["affiliation"] = self.affiliation.display_name
else:
return_dict["affiliation"] = ""
else:
return_dict["affiliation"] = ""
return return_dict
def format_name(self) -> str:
"""
@ -342,17 +367,21 @@ class Person:
def author_fields(self) -> list[PrimitiveMetadataField | ControlledVocabularyMetadataField]:
"""
Build metadata fields for an author.
Build metadata fields for the author.
The method handles both Institution objects and string values for affiliations.
Different fields are generated depending on whether ORCID is available.
Returns:
list: List of metadata fields representing the author.
list: List of metadata fields representing the author, including name,
affiliation, and optionally ORCID identifier information.
"""
affiliation_field = None
if isinstance(self.affiliation, Institution):
affiliation_field = self.affiliation.affiliation_field()
else:
affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation)
if self.orcid:
return [
PrimitiveMetadataField("authorName", False, self.format_name()),
@ -368,10 +397,14 @@ class Person:
def dataset_contact_fields(self) -> list[PrimitiveMetadataField]:
"""
Build metadata fields for dataset contact information.
Generate metadata fields for dataset contact.
The method handles both Institution objects and string values for affiliations.
Creates fields for the contact name, affiliation, and email address.
Returns:
list: List of metadata fields for the dataset contact.
list: List of metadata fields for the dataset contact including name,
affiliation, and email address.
"""
affiliation_field = None
@ -379,7 +412,7 @@ class Person:
affiliation_field = self.affiliation.affiliation_field()
else:
affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation)
return [
PrimitiveMetadataField("datasetContactName", False, self.format_name()),
affiliation_field,
@ -414,6 +447,12 @@ class Abstract:
source: str
def __post_init__(self):
"""
Validate that the abstract source is one of the allowed values.
Raises:
ValueError: If source is not one of the allowed values.
"""
allowed_sources = ["crossref", "openalex", "none"]
if self.source not in allowed_sources:
raise ValueError(f"{self.source} is not valid! Needs to be one of {str(allowed_sources)}.")
@ -1529,10 +1568,23 @@ class MetadataProcessor:
try:
# Custom JSON encoder to handle custom objects
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, 'to_dict'):
return obj.to_dict()
return super().default(obj)
"""
Custom JSON encoder that handles objects with to_dict method.
This allows for proper serialization of custom classes like
Institution and Person by calling their to_dict method when
available.
Args:
o: The object to serialize.
Returns:
A JSON-serializable representation of the object.
"""
def default(self, o: Any) -> Any:
if hasattr(o, 'to_dict'):
return o.to_dict()
return super().default(o)
with open(self.output_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=4, ensure_ascii=False, cls=CustomEncoder)