diff --git a/doi2dataset.py b/doi2dataset.py index 7a49732..fa0d711 100755 --- a/doi2dataset.py +++ b/doi2dataset.py @@ -277,10 +277,24 @@ class CompoundMetadataField( @dataclass class Institution: + """ + Represents an institution or organization. + + Attributes: + display_name (str): The name of the institution. + ror (str): Research Organization Registry identifier (optional). + """ display_name: str ror: str = "" def affiliation_field(self) -> PrimitiveMetadataField: + """ + Create a metadata field for the affiliation. + + Returns: + PrimitiveMetadataField: A metadata field representing the institution, + using ROR ID when available. + """ if self.ror: expanded_value = { "scheme": "http://www.grid.ac/ontology/", @@ -311,25 +325,36 @@ class Person: affiliation: Institution | str = "" project: list[str] = field(default_factory=list) - def to_dict(self) -> dict[str, str | list[str]]: - """Convert Person to a dictionary for JSON serialization.""" + def to_dict(self) -> dict[str, str | list[str] | dict[str, str]]: + """ + Convert Person to a dictionary for JSON serialization. - return_dict = { - "family_name": self.family_name, - "given_name": self.given_name, - "orcid": self.orcid, - "email": self.email, - "project": self.project - } + Handles affiliations properly by checking if the affiliation + is an Institution object or a string. - if self.affiliation.ror: - return_dict["affiliation"] = self.affiliation.ror - elif self.affiliation.display_name: - return_dict["affiliation"] = self.affiliation.display_name - else: - return_dict["affiliation"] = "" + Returns: + dict: A dictionary containing the person's information including + name, contact details, and affiliation. + """ + return_dict: dict[str, str | list[str] | dict[str, str]] = { + "family_name": self.family_name, + "given_name": self.given_name, + "orcid": self.orcid, + "email": self.email, + "project": self.project + } - return return_dict + if isinstance(self.affiliation, Institution): + if self.affiliation.ror: + return_dict["affiliation"] = self.affiliation.ror + elif self.affiliation.display_name: + return_dict["affiliation"] = self.affiliation.display_name + else: + return_dict["affiliation"] = "" + else: + return_dict["affiliation"] = "" + + return return_dict def format_name(self) -> str: """ @@ -342,17 +367,21 @@ class Person: def author_fields(self) -> list[PrimitiveMetadataField | ControlledVocabularyMetadataField]: """ - Build metadata fields for an author. + Build metadata fields for the author. + + The method handles both Institution objects and string values for affiliations. + Different fields are generated depending on whether ORCID is available. Returns: - list: List of metadata fields representing the author. + list: List of metadata fields representing the author, including name, + affiliation, and optionally ORCID identifier information. """ affiliation_field = None if isinstance(self.affiliation, Institution): affiliation_field = self.affiliation.affiliation_field() else: affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation) - + if self.orcid: return [ PrimitiveMetadataField("authorName", False, self.format_name()), @@ -368,10 +397,14 @@ class Person: def dataset_contact_fields(self) -> list[PrimitiveMetadataField]: """ - Build metadata fields for dataset contact information. + Generate metadata fields for dataset contact. + + The method handles both Institution objects and string values for affiliations. + Creates fields for the contact name, affiliation, and email address. Returns: - list: List of metadata fields for the dataset contact. + list: List of metadata fields for the dataset contact including name, + affiliation, and email address. """ affiliation_field = None @@ -379,7 +412,7 @@ class Person: affiliation_field = self.affiliation.affiliation_field() else: affiliation_field = PrimitiveMetadataField("authorAffiliation", False, self.affiliation) - + return [ PrimitiveMetadataField("datasetContactName", False, self.format_name()), affiliation_field, @@ -414,6 +447,12 @@ class Abstract: source: str def __post_init__(self): + """ + Validate that the abstract source is one of the allowed values. + + Raises: + ValueError: If source is not one of the allowed values. + """ allowed_sources = ["crossref", "openalex", "none"] if self.source not in allowed_sources: raise ValueError(f"{self.source} is not valid! Needs to be one of {str(allowed_sources)}.") @@ -1529,10 +1568,23 @@ class MetadataProcessor: try: # Custom JSON encoder to handle custom objects class CustomEncoder(json.JSONEncoder): - def default(self, obj): - if hasattr(obj, 'to_dict'): - return obj.to_dict() - return super().default(obj) + """ + Custom JSON encoder that handles objects with to_dict method. + + This allows for proper serialization of custom classes like + Institution and Person by calling their to_dict method when + available. + + Args: + o: The object to serialize. + + Returns: + A JSON-serializable representation of the object. + """ + def default(self, o: Any) -> Any: + if hasattr(o, 'to_dict'): + return o.to_dict() + return super().default(o) with open(self.output_path, "w", encoding="utf-8") as f: json.dump(metadata, f, indent=4, ensure_ascii=False, cls=CustomEncoder)