Add comprehensive docstrings to classes and methods

Improve type annotations in Person.to_dict() and CustomEncoder class
to better reflect the actual return types. Enhance documentation for
Institution, Person, Abstract, and CustomEncoder classes with detailed
method descriptions and parameter explanations.
This commit is contained in:
Alexander Minges 2025-05-20 13:20:18 +02:00
parent f84a274848
commit 554951265e
Signed by: Athemis
SSH key fingerprint: SHA256:TUXshgulbwL+FRYvBNo54pCsI0auROsSEgSvueKbkZ4

View file

@ -277,10 +277,24 @@ class CompoundMetadataField(
@dataclass @dataclass
class Institution: class Institution:
"""
Represents an institution or organization.
Attributes:
display_name (str): The name of the institution.
ror (str): Research Organization Registry identifier (optional).
"""
display_name: str display_name: str
ror: str = "" ror: str = ""
def affiliation_field(self) -> PrimitiveMetadataField: def affiliation_field(self) -> PrimitiveMetadataField:
"""
Create a metadata field for the affiliation.
Returns:
PrimitiveMetadataField: A metadata field representing the institution,
using ROR ID when available.
"""
if self.ror: if self.ror:
expanded_value = { expanded_value = {
"scheme": "http://www.grid.ac/ontology/", "scheme": "http://www.grid.ac/ontology/",
@ -311,25 +325,36 @@ class Person:
affiliation: Institution | str = "" affiliation: Institution | str = ""
project: list[str] = field(default_factory=list) project: list[str] = field(default_factory=list)
def to_dict(self) -> dict[str, str | list[str]]: def to_dict(self) -> dict[str, str | list[str] | dict[str, str]]:
"""Convert Person to a dictionary for JSON serialization.""" """
Convert Person to a dictionary for JSON serialization.
return_dict = { Handles affiliations properly by checking if the affiliation
"family_name": self.family_name, is an Institution object or a string.
"given_name": self.given_name,
"orcid": self.orcid,
"email": self.email,
"project": self.project
}
if self.affiliation.ror: Returns:
return_dict["affiliation"] = self.affiliation.ror dict: A dictionary containing the person's information including
elif self.affiliation.display_name: name, contact details, and affiliation.
return_dict["affiliation"] = self.affiliation.display_name """
else: return_dict: dict[str, str | list[str] | dict[str, str]] = {
return_dict["affiliation"] = "" "family_name": self.family_name,
"given_name": self.given_name,
"orcid": self.orcid,
"email": self.email,
"project": self.project
}
return return_dict if isinstance(self.affiliation, Institution):
if self.affiliation.ror:
return_dict["affiliation"] = self.affiliation.ror
elif self.affiliation.display_name:
return_dict["affiliation"] = self.affiliation.display_name
else:
return_dict["affiliation"] = ""
else:
return_dict["affiliation"] = ""
return return_dict
def format_name(self) -> str: def format_name(self) -> str:
""" """
@ -342,17 +367,21 @@ class Person:
def author_fields(self) -> list[PrimitiveMetadataField | ControlledVocabularyMetadataField]: def author_fields(self) -> list[PrimitiveMetadataField | ControlledVocabularyMetadataField]:
""" """
Build metadata fields for an author. Build metadata fields for the author.
The method handles both Institution objects and string values for affiliations.
Different fields are generated depending on whether ORCID is available.
Returns: Returns:
list: List of metadata fields representing the author. list: List of metadata fields representing the author, including name,
affiliation, and optionally ORCID identifier information.
""" """
affiliation_field = None affiliation_field = None
if isinstance(self.affiliation, Institution): if isinstance(self.affiliation, Institution):
affiliation_field = self.affiliation.affiliation_field() affiliation_field = self.affiliation.affiliation_field()
else: else:
affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation) affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation)
if self.orcid: if self.orcid:
return [ return [
PrimitiveMetadataField("authorName", False, self.format_name()), PrimitiveMetadataField("authorName", False, self.format_name()),
@ -368,10 +397,14 @@ class Person:
def dataset_contact_fields(self) -> list[PrimitiveMetadataField]: def dataset_contact_fields(self) -> list[PrimitiveMetadataField]:
""" """
Build metadata fields for dataset contact information. Generate metadata fields for dataset contact.
The method handles both Institution objects and string values for affiliations.
Creates fields for the contact name, affiliation, and email address.
Returns: Returns:
list: List of metadata fields for the dataset contact. list: List of metadata fields for the dataset contact including name,
affiliation, and email address.
""" """
affiliation_field = None affiliation_field = None
@ -379,7 +412,7 @@ class Person:
affiliation_field = self.affiliation.affiliation_field() affiliation_field = self.affiliation.affiliation_field()
else: else:
affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation) affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation)
return [ return [
PrimitiveMetadataField("datasetContactName", False, self.format_name()), PrimitiveMetadataField("datasetContactName", False, self.format_name()),
affiliation_field, affiliation_field,
@ -414,6 +447,12 @@ class Abstract:
source: str source: str
def __post_init__(self): def __post_init__(self):
"""
Validate that the abstract source is one of the allowed values.
Raises:
ValueError: If source is not one of the allowed values.
"""
allowed_sources = ["crossref", "openalex", "none"] allowed_sources = ["crossref", "openalex", "none"]
if self.source not in allowed_sources: if self.source not in allowed_sources:
raise ValueError(f"{self.source} is not valid! Needs to be one of {str(allowed_sources)}.") raise ValueError(f"{self.source} is not valid! Needs to be one of {str(allowed_sources)}.")
@ -1529,10 +1568,23 @@ class MetadataProcessor:
try: try:
# Custom JSON encoder to handle custom objects # Custom JSON encoder to handle custom objects
class CustomEncoder(json.JSONEncoder): class CustomEncoder(json.JSONEncoder):
def default(self, obj): """
if hasattr(obj, 'to_dict'): Custom JSON encoder that handles objects with to_dict method.
return obj.to_dict()
return super().default(obj) This allows for proper serialization of custom classes like
Institution and Person by calling their to_dict method when
available.
Args:
o: The object to serialize.
Returns:
A JSON-serializable representation of the object.
"""
def default(self, o: Any) -> Any:
if hasattr(o, 'to_dict'):
return o.to_dict()
return super().default(o)
with open(self.output_path, "w", encoding="utf-8") as f: with open(self.output_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=4, ensure_ascii=False, cls=CustomEncoder) json.dump(metadata, f, indent=4, ensure_ascii=False, cls=CustomEncoder)