diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index d898768..0000000 --- a/.coveragerc +++ /dev/null @@ -1,23 +0,0 @@ -[run] -source = doi2dataset -omit = - */tests/* - */docs/* - setup.py - conf.py - __init__.py - -[report] -exclude_lines = - pragma: no cover - def __repr__ - if self.debug: - raise NotImplementedError - if __name__ == .__main__.: - pass - raise ImportError - except ImportError - def __str__ - -[html] -directory = htmlcov \ No newline at end of file diff --git a/README.md b/README.md index 8b66b2a..6b11e06 100644 --- a/README.md +++ b/README.md @@ -69,94 +69,12 @@ Documentation is generated using Sphinx. See the `docs/` directory for detailed ## Testing -## Testing - -Tests are implemented with pytest. The test suite provides comprehensive coverage of core functionalities. To run the tests, execute: +Tests are implemented with pytest. To run the tests, execute: ```bash pytest ``` -Or using the Python module syntax: - -```bash -python -m pytest -``` - -### Code Coverage - -The project includes code coverage analysis using pytest-cov. Current coverage is approximately 61% of the codebase, with key utilities and test infrastructure at 99-100% coverage. - -To run tests with code coverage analysis: - -```bash -pytest --cov=. -``` - -Generate a detailed HTML coverage report: - -```bash -pytest --cov=. --cov-report=html -``` - -This creates a `htmlcov` directory. Open `htmlcov/index.html` in a browser to view the detailed coverage report. - -A `.coveragerc` configuration file is provided that: -- Excludes test files, documentation, and boilerplate code from coverage analysis -- Configures reporting to ignore common non-testable lines (like defensive imports) -- Sets the output directory for HTML reports - -Recent improvements have increased coverage from 48% to 61% by adding focused tests for: -- Citation building functionality -- License processing and validation -- Metadata field extraction -- OpenAlex integration -- Publication data parsing and validation - -Areas that could benefit from additional testing: -- More edge cases in the MetadataProcessor class workflow -- Additional CitationBuilder scenarios with diverse inputs -- Complex network interactions and error handling - -### Test Structure - -The test suite is organized into six main files: - -1. **test_doi2dataset.py**: Basic tests for core functions like phase checking, name splitting and DOI validation. -2. **test_fetch_doi_mock.py**: Tests API interactions using a mock OpenAlex response stored in `srep45389.json`. -3. **test_citation_builder.py**: Tests for building citation metadata from API responses. -4. **test_metadata_processor.py**: Tests for the metadata processing workflow. -5. **test_license_processor.py**: Tests for license processing and validation. -6. **test_publication_utils.py**: Tests for publication year extraction and date handling. - -### Test Categories - -The test suite covers the following categories of functionality: - -#### Core Functionality Tests - -- **DOI Validation and Processing**: Parameterized tests for DOI normalization, validation, and filename sanitization with various inputs. -- **Phase Management**: Tests for checking publication year against defined project phases, including boundary cases. -- **Name Processing**: Extensive tests for parsing and splitting author names in different formats (with/without commas, middle initials, etc.). -- **Email Validation**: Tests for proper validation of email addresses with various domain configurations. - -#### API Integration Tests - -- **Mock API Responses**: Tests that use a saved OpenAlex API response (`srep45389.json`) to simulate API interactions without making actual network requests. -- **Data Fetching**: Tests for retrieving and parsing data from the OpenAlex API. -- **Abstract Extraction**: Tests for extracting and cleaning abstracts from OpenAlex's inverted index format, including handling of empty or malformed abstracts. -- **Subject Mapping**: Tests for mapping OpenAlex topics to controlled vocabulary subject terms. - -#### Metadata Processing Tests - -- **Citation Building**: Tests for properly building citation metadata from API responses. -- **License Processing**: Tests for correctly identifying and formatting license information from various license IDs. -- **Principal Investigator Matching**: Tests for finding project PIs based on ORCID identifiers. -- **Configuration Loading**: Tests for properly loading and validating configuration from files. -- **Metadata Workflow**: Tests for the complete metadata processing workflow. - -These tests ensure that all components work correctly in isolation and together as a system, with special attention to edge cases and error handling. - ## Contributing Contributions are welcome! Please fork the repository and submit a pull request with your improvements. diff --git a/__init__.py b/__init__.py index 0db05d7..e69de29 100644 --- a/__init__.py +++ b/__init__.py @@ -1,17 +0,0 @@ -# Import all classes and functions needed for testing -from .doi2dataset import ( - AbstractProcessor, - APIClient, - CitationBuilder, - Config, - License, - LicenseProcessor, - MetadataProcessor, - NameProcessor, - PIFinder, - Person, - Phase, - SubjectMapper, - sanitize_filename, - validate_email_address, -) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index e762592..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,8 +0,0 @@ -import os -import sys - -# Get the path to the parent directory of tests -parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - -# Add the parent directory to sys.path -sys.path.insert(0, parent_dir) \ No newline at end of file diff --git a/tests/srep45389.json b/tests/srep45389.json index f659245..879d19a 100644 --- a/tests/srep45389.json +++ b/tests/srep45389.json @@ -1 +1 @@ -{"id":"https://openalex.org/W2598156506","doi":"https://doi.org/10.1038/srep45389","title":"Structural intermediates and directionality of the swiveling motion of Pyruvate Phosphate Dikinase","display_name":"Structural intermediates and directionality of the swiveling motion of Pyruvate Phosphate Dikinase","publication_year":2017,"publication_date":"2017-03-30","ids":{"openalex":"https://openalex.org/W2598156506","doi":"https://doi.org/10.1038/srep45389","mag":"2598156506","pmid":"https://pubmed.ncbi.nlm.nih.gov/28358005","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/5371819"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1038/srep45389","pdf_url":"https://www.nature.com/articles/srep45389.pdf","source":{"id":"https://openalex.org/S196734849","display_name":"Scientific Reports","issn_l":"2045-2322","issn":["2045-2322"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.nature.com/articles/srep45389.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041311268","display_name":"Alexander Minges","orcid":"https://orcid.org/0000-0001-7760-2753"},"institutions":[{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]},{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander Minges","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I4210126213","https://openalex.org/I44260953"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075506864","display_name":"Daniel Ciupka","orcid":null},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Daniel Ciupka","raw_affiliation_strings":["Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I44260953"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023218466","display_name":"Christian Winkler","orcid":"https://orcid.org/0000-0002-7463-6840"},"institutions":[{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]},{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Winkler","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I4210126213","https://openalex.org/I44260953"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110560286","display_name":"Astrid H\u00f6ppner","orcid":null},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]},{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Astrid H\u00f6ppner","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I44260953","https://openalex.org/I4210126213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063949219","display_name":"Holger Gohlke","orcid":"https://orcid.org/0000-0001-8613-1447"},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Holger Gohlke","raw_affiliation_strings":["Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I44260953"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021507772","display_name":"Georg Groth","orcid":"https://orcid.org/0000-0002-1806-9861"},"institutions":[{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]},{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Groth","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I4210126213","https://openalex.org/I44260953"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1890,"currency":"EUR","value_usd":2190},"apc_paid":{"value":1890,"currency":"EUR","value_usd":2190},"fwci":1.098,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":19,"citation_normalized_percentile":{"value":0.825784,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":91},"biblio":{"volume":"7","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11162","display_name":"Enzyme Structure and Function","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11162","display_name":"Enzyme Structure and Function","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12827","display_name":"Biochemical and Molecular Research","score":0.9899,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bioenergetics","display_name":"Bioenergetics","score":0.4927278},{"id":"https://openalex.org/keywords/adenosine-triphosphate","display_name":"Adenosine triphosphate","score":0.42800155},{"id":"https://openalex.org/keywords/catalytic-cycle","display_name":"Catalytic cycle","score":0.41661885}],"concepts":[{"id":"https://openalex.org/C143937172","wikidata":"https://www.wikidata.org/wiki/Q303568","display_name":"Phosphoenolpyruvate carboxykinase","level":3,"score":0.5555539},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.51772255},{"id":"https://openalex.org/C100206155","wikidata":"https://www.wikidata.org/wiki/Q570714","display_name":"Bioenergetics","level":3,"score":0.4927278},{"id":"https://openalex.org/C12554922","wikidata":"https://www.wikidata.org/wiki/Q7100","display_name":"Biophysics","level":1,"score":0.47733337},{"id":"https://openalex.org/C112243037","wikidata":"https://www.wikidata.org/wiki/Q22327117","display_name":"ATP synthase","level":3,"score":0.46689498},{"id":"https://openalex.org/C166342909","wikidata":"https://www.wikidata.org/wiki/Q845326","display_name":"Allosteric regulation","level":3,"score":0.46423048},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.4587974},{"id":"https://openalex.org/C181199279","wikidata":"https://www.wikidata.org/wiki/Q8047","display_name":"Enzyme","level":2,"score":0.42822945},{"id":"https://openalex.org/C2779564974","wikidata":"https://www.wikidata.org/wiki/Q80863","display_name":"Adenosine triphosphate","level":2,"score":0.42800155},{"id":"https://openalex.org/C63338738","wikidata":"https://www.wikidata.org/wiki/Q287862","display_name":"Catalytic cycle","level":3,"score":0.41661885},{"id":"https://openalex.org/C71240020","wikidata":"https://www.wikidata.org/wiki/Q186011","display_name":"Stereochemistry","level":1,"score":0.34238213},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.29859126},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.278366},{"id":"https://openalex.org/C28859421","wikidata":"https://www.wikidata.org/wiki/Q39572","display_name":"Mitochondrion","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D037142","descriptor_name":"Flaveria","qualifier_ui":"Q000201","qualifier_name":"enzymology","is_major_topic":true},{"descriptor_ui":"D001665","descriptor_name":"Binding Sites","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020134","descriptor_name":"Catalytic Domain","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D018360","descriptor_name":"Crystallography, X-Ray","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D056004","descriptor_name":"Molecular Dynamics Simulation","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010728","descriptor_name":"Phosphoenolpyruvate","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D010940","descriptor_name":"Plant Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D025341","descriptor_name":"Principal Component Analysis","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011772","descriptor_name":"Pyruvate, Orthophosphate Dikinase","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011994","descriptor_name":"Recombinant Proteins","qualifier_ui":"Q000096","qualifier_name":"biosynthesis","is_major_topic":false}],"locations_count":4,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1038/srep45389","pdf_url":"https://www.nature.com/articles/srep45389.pdf","source":{"id":"https://openalex.org/S196734849","display_name":"Scientific Reports","issn_l":"2045-2322","issn":["2045-2322"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://europepmc.org/articles/pmc5371819","pdf_url":"https://europepmc.org/articles/pmc5371819?pdf=render","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":["European Bioinformatics Institute"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5371819","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/28358005","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1038/srep45389","pdf_url":"https://www.nature.com/articles/srep45389.pdf","source":{"id":"https://openalex.org/S196734849","display_name":"Scientific Reports","issn_l":"2045-2322","issn":["2045-2322"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.77,"id":"https://metadata.un.org/sdg/7"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":77,"referenced_works":["https://openalex.org/W1008982281","https://openalex.org/W1489326505","https://openalex.org/W15000766","https://openalex.org/W1513693018","https://openalex.org/W1556574015","https://openalex.org/W1607931568","https://openalex.org/W1964020813","https://openalex.org/W1965890418","https://openalex.org/W1966041739","https://openalex.org/W1968686371","https://openalex.org/W1970103851","https://openalex.org/W1973038113","https://openalex.org/W1976499671","https://openalex.org/W1980430311","https://openalex.org/W1984695931","https://openalex.org/W1987040923","https://openalex.org/W1993177346","https://openalex.org/W1996435013","https://openalex.org/W2001367916","https://openalex.org/W2001641653","https://openalex.org/W2005313434","https://openalex.org/W2014513938","https://openalex.org/W2021218714","https://openalex.org/W2030016678","https://openalex.org/W2035266068","https://openalex.org/W2050974639","https://openalex.org/W2055463751","https://openalex.org/W2058479011","https://openalex.org/W2061029954","https://openalex.org/W2073149987","https://openalex.org/W2074558323","https://openalex.org/W2074569666","https://openalex.org/W2093547815","https://openalex.org/W2106140689","https://openalex.org/W2106290432","https://openalex.org/W2108200912","https://openalex.org/W2110808180","https://openalex.org/W2112154906","https://openalex.org/W2113643089","https://openalex.org/W2113677196","https://openalex.org/W2114622716","https://openalex.org/W2115339329","https://openalex.org/W2118233996","https://openalex.org/W2122949279","https://openalex.org/W2124026197","https://openalex.org/W2127774996","https://openalex.org/W2130060890","https://openalex.org/W2131377322","https://openalex.org/W2134780839","https://openalex.org/W2137312799","https://openalex.org/W2140520515","https://openalex.org/W2143815085","https://openalex.org/W2144288821","https://openalex.org/W2144368922","https://openalex.org/W2145412238","https://openalex.org/W2150444353","https://openalex.org/W2151541959","https://openalex.org/W2153630542","https://openalex.org/W2154297643","https://openalex.org/W2154714625","https://openalex.org/W2156499807","https://openalex.org/W2158422233","https://openalex.org/W2159211495","https://openalex.org/W2159675211","https://openalex.org/W2159862205","https://openalex.org/W2163341755","https://openalex.org/W2180229411","https://openalex.org/W2268568654","https://openalex.org/W2307793770","https://openalex.org/W2332712348","https://openalex.org/W2413979296","https://openalex.org/W4210521569","https://openalex.org/W4210586598","https://openalex.org/W4211156111","https://openalex.org/W4211196614","https://openalex.org/W4248872320","https://openalex.org/W789458867"],"related_works":["https://openalex.org/W4250243447","https://openalex.org/W3175305449","https://openalex.org/W3158746188","https://openalex.org/W2580553505","https://openalex.org/W2187787658","https://openalex.org/W2080305495","https://openalex.org/W2075300630","https://openalex.org/W2070505046","https://openalex.org/W2053285109","https://openalex.org/W1973337789"],"abstract_inverted_index":{"Abstract":[0],"Pyruvate":[1],"phosphate":[2],"dikinase":[3],"(PPDK)":[4],"is":[5,42],"a":[6,60,94],"vital":[7],"enzyme":[8,41],"in":[9,25,35,57,129,176],"cellular":[10],"energy":[11,141],"metabolism":[12],"catalyzing":[13],"the":[14,30,52,64,81,99,117,122,127,130,158,163,167,177],"ATP-":[15],"and":[16,37,115,138,151,172],"P":[17],"i":[18],"-dependent":[19],"formation":[20],"of":[21,51,63,80,89,101,126,157,166],"phosphoenolpyruvate":[22],"from":[23,91,105],"pyruvate":[24],"C":[26,102],"4":[27,103],"-plants,":[28],"but":[29],"reverse":[31],"reaction":[32],"forming":[33],"ATP":[34],"bacteria":[36],"protozoa.":[38],"The":[39],"multi-domain":[40],"considered":[43],"an":[44,181],"efficient":[45],"molecular":[46,78],"machine":[47],"that":[48],"performs":[49],"one":[50],"largest":[53],"single":[54],"domain":[55,67],"movements":[56],"proteins.":[58],"However,":[59],"comprehensive":[61],"understanding":[62],"proposed":[65],"swiveling":[66,160],"motion":[68,161],"has":[69],"been":[70],"limited":[71],"by":[72],"not":[73],"knowing":[74],"structural":[75,170],"intermediates":[76,114],"or":[77],"dynamics":[79],"catalytic":[82,131],"process.":[83],"Here,":[84],"we":[85],"present":[86],"crystal":[87],"structures":[88,109],"PPDKs":[90],"Flaveria":[92],",":[93],"model":[95],"genus":[96],"for":[97,186],"studying":[98],"evolution":[100],"-enzymes":[104],"phylogenetic":[106],"ancestors.":[107],"These":[108],"resolve":[110],"yet":[111],"unknown":[112],"conformational":[113,124,164],"provide":[116],"first":[118],"detailed":[119],"view":[120],"on":[121],"large":[123],"transitions":[125],"protein":[128],"cycle.":[132],"Independently":[133],"performed":[134],"unrestrained":[135],"MD":[136],"simulations":[137],"configurational":[139],"free":[140],"calculations":[142],"also":[143],"identified":[144],"these":[145],"intermediates.":[146],"In":[147],"all,":[148],"our":[149],"experimental":[150],"computational":[152],"data":[153],"reveal":[154],"strict":[155],"coupling":[156],"CD":[159],"to":[162],"state":[165],"NBD.":[168],"Moreover,":[169],"asymmetries":[171],"nucleotide":[173],"binding":[174,183],"states":[175],"PPDK":[178],"dimer":[179],"support":[180],"alternate":[182],"change":[184],"mechanism":[185],"this":[187],"intriguing":[188],"bioenergetic":[189],"enzyme.":[190]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2598156506","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4}],"updated_date":"2025-05-20T03:58:09.238188","created_date":"2017-04-07"} +{"id":"https://openalex.org/W2598156506","doi":"https://doi.org/10.1038/srep45389","title":"Structural intermediates and directionality of the swiveling motion of Pyruvate Phosphate Dikinase","display_name":"Structural intermediates and directionality of the swiveling motion of Pyruvate Phosphate Dikinase","publication_year":2017,"publication_date":"2017-03-30","ids":{"openalex":"https://openalex.org/W2598156506","doi":"https://doi.org/10.1038/srep45389","mag":"2598156506","pmid":"https://pubmed.ncbi.nlm.nih.gov/28358005","pmcid":"https://www.ncbi.nlm.nih.gov/pmc/articles/5371819"},"language":"en","primary_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1038/srep45389","pdf_url":"https://www.nature.com/articles/srep45389.pdf","source":{"id":"https://openalex.org/S196734849","display_name":"Scientific Reports","issn_l":"2045-2322","issn":["2045-2322"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"type":"article","type_crossref":"journal-article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.nature.com/articles/srep45389.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041311268","display_name":"Alexander Minges","orcid":"https://orcid.org/0000-0001-7760-2753"},"institutions":[{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]},{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander Minges","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I4210126213","https://openalex.org/I44260953"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075506864","display_name":"Daniel Ciupka","orcid":null},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Daniel Ciupka","raw_affiliation_strings":["Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I44260953"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023218466","display_name":"Christian Winkler","orcid":"https://orcid.org/0000-0002-7463-6840"},"institutions":[{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]},{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christian Winkler","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I4210126213","https://openalex.org/I44260953"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110560286","display_name":"Astrid H\u00f6ppner","orcid":null},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]},{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Astrid H\u00f6ppner","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I44260953","https://openalex.org/I4210126213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063949219","display_name":"Holger Gohlke","orcid":"https://orcid.org/0000-0001-8613-1447"},"institutions":[{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Holger Gohlke","raw_affiliation_strings":["Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Pharmaceutical and Medicinal Chemistry, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I44260953"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021507772","display_name":"Georg Groth","orcid":"https://orcid.org/0000-0002-1806-9861"},"institutions":[{"id":"https://openalex.org/I4210126213","display_name":"Cluster of Excellence on Plant Sciences","ror":"https://ror.org/034waa237","country_code":"DE","type":"funder","lineage":["https://openalex.org/I1305996414","https://openalex.org/I149899117","https://openalex.org/I171892758","https://openalex.org/I180923762","https://openalex.org/I4210126213","https://openalex.org/I4210141639","https://openalex.org/I44260953"]},{"id":"https://openalex.org/I44260953","display_name":"Heinrich Heine University D\u00fcsseldorf","ror":"https://ror.org/024z2rq82","country_code":"DE","type":"funder","lineage":["https://openalex.org/I44260953"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Georg Groth","raw_affiliation_strings":["Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany"],"affiliations":[{"raw_affiliation_string":"Cluster of Excellence on Plant Sciences (CEPLAS), Institute of Biochemical Plant Physiology, Heinrich Heine University D\u00fcsseldorf, D\u00fcsseldorf, 40204, Germany","institution_ids":["https://openalex.org/I4210126213","https://openalex.org/I44260953"]}]}],"institution_assertions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1890,"currency":"EUR","value_usd":2190},"apc_paid":{"value":1890,"currency":"EUR","value_usd":2190},"fwci":1.098,"has_fulltext":true,"fulltext_origin":"pdf","cited_by_count":18,"citation_normalized_percentile":{"value":0.69746,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":90},"biblio":{"volume":"7","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"primary_topic":{"id":"https://openalex.org/T11162","display_name":"Enzyme Structure and Function","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11162","display_name":"Enzyme Structure and Function","score":0.9996,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9951,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T12827","display_name":"Biochemical and Molecular Research","score":0.9899,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bioenergetics","display_name":"Bioenergetics","score":0.4927278},{"id":"https://openalex.org/keywords/adenosine-triphosphate","display_name":"Adenosine triphosphate","score":0.42800155},{"id":"https://openalex.org/keywords/catalytic-cycle","display_name":"Catalytic cycle","score":0.41661885}],"concepts":[{"id":"https://openalex.org/C143937172","wikidata":"https://www.wikidata.org/wiki/Q303568","display_name":"Phosphoenolpyruvate carboxykinase","level":3,"score":0.5555539},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.51772255},{"id":"https://openalex.org/C100206155","wikidata":"https://www.wikidata.org/wiki/Q570714","display_name":"Bioenergetics","level":3,"score":0.4927278},{"id":"https://openalex.org/C12554922","wikidata":"https://www.wikidata.org/wiki/Q7100","display_name":"Biophysics","level":1,"score":0.47733337},{"id":"https://openalex.org/C112243037","wikidata":"https://www.wikidata.org/wiki/Q22327117","display_name":"ATP synthase","level":3,"score":0.46689498},{"id":"https://openalex.org/C166342909","wikidata":"https://www.wikidata.org/wiki/Q845326","display_name":"Allosteric regulation","level":3,"score":0.46423048},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.4587974},{"id":"https://openalex.org/C181199279","wikidata":"https://www.wikidata.org/wiki/Q8047","display_name":"Enzyme","level":2,"score":0.42822945},{"id":"https://openalex.org/C2779564974","wikidata":"https://www.wikidata.org/wiki/Q80863","display_name":"Adenosine triphosphate","level":2,"score":0.42800155},{"id":"https://openalex.org/C63338738","wikidata":"https://www.wikidata.org/wiki/Q287862","display_name":"Catalytic cycle","level":3,"score":0.41661885},{"id":"https://openalex.org/C71240020","wikidata":"https://www.wikidata.org/wiki/Q186011","display_name":"Stereochemistry","level":1,"score":0.34238213},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.29859126},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.278366},{"id":"https://openalex.org/C28859421","wikidata":"https://www.wikidata.org/wiki/Q39572","display_name":"Mitochondrion","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D037142","descriptor_name":"Flaveria","qualifier_ui":"Q000201","qualifier_name":"enzymology","is_major_topic":true},{"descriptor_ui":"D001665","descriptor_name":"Binding Sites","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020134","descriptor_name":"Catalytic Domain","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D018360","descriptor_name":"Crystallography, X-Ray","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D056004","descriptor_name":"Molecular Dynamics Simulation","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D010728","descriptor_name":"Phosphoenolpyruvate","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D010940","descriptor_name":"Plant Proteins","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D025341","descriptor_name":"Principal Component Analysis","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011487","descriptor_name":"Protein Conformation","qualifier_ui":"","qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D011772","descriptor_name":"Pyruvate, Orthophosphate Dikinase","qualifier_ui":"Q000737","qualifier_name":"chemistry","is_major_topic":false},{"descriptor_ui":"D011994","descriptor_name":"Recombinant Proteins","qualifier_ui":"Q000096","qualifier_name":"biosynthesis","is_major_topic":false}],"locations_count":4,"locations":[{"is_oa":true,"landing_page_url":"https://doi.org/10.1038/srep45389","pdf_url":"https://www.nature.com/articles/srep45389.pdf","source":{"id":"https://openalex.org/S196734849","display_name":"Scientific Reports","issn_l":"2045-2322","issn":["2045-2322"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://europepmc.org/articles/pmc5371819","pdf_url":"https://europepmc.org/articles/pmc5371819?pdf=render","source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":["European Bioinformatics Institute"],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5371819","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true},{"is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/28358005","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_indexed_in_scopus":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":["National Institutes of Health"],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false}],"best_oa_location":{"is_oa":true,"landing_page_url":"https://doi.org/10.1038/srep45389","pdf_url":"https://www.nature.com/articles/srep45389.pdf","source":{"id":"https://openalex.org/S196734849","display_name":"Scientific Reports","issn_l":"2045-2322","issn":["2045-2322"],"is_oa":true,"is_in_doaj":true,"is_indexed_in_scopus":true,"is_core":true,"host_organization":"https://openalex.org/P4310319908","host_organization_name":"Nature Portfolio","host_organization_lineage":["https://openalex.org/P4310319908","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Nature Portfolio","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true},"sustainable_development_goals":[{"score":0.77,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"grants":[],"datasets":[],"versions":[],"referenced_works_count":77,"referenced_works":["https://openalex.org/W1008982281","https://openalex.org/W1489326505","https://openalex.org/W15000766","https://openalex.org/W1513693018","https://openalex.org/W1556574015","https://openalex.org/W1607931568","https://openalex.org/W1964020813","https://openalex.org/W1965890418","https://openalex.org/W1966041739","https://openalex.org/W1968686371","https://openalex.org/W1970103851","https://openalex.org/W1973038113","https://openalex.org/W1976499671","https://openalex.org/W1980430311","https://openalex.org/W1984695931","https://openalex.org/W1987040923","https://openalex.org/W1993177346","https://openalex.org/W1996435013","https://openalex.org/W2001367916","https://openalex.org/W2001641653","https://openalex.org/W2005313434","https://openalex.org/W2014513938","https://openalex.org/W2021218714","https://openalex.org/W2030016678","https://openalex.org/W2035266068","https://openalex.org/W2050974639","https://openalex.org/W2055463751","https://openalex.org/W2058479011","https://openalex.org/W2061029954","https://openalex.org/W2073149987","https://openalex.org/W2074558323","https://openalex.org/W2074569666","https://openalex.org/W2093547815","https://openalex.org/W2106140689","https://openalex.org/W2106290432","https://openalex.org/W2108200912","https://openalex.org/W2110808180","https://openalex.org/W2112154906","https://openalex.org/W2113643089","https://openalex.org/W2113677196","https://openalex.org/W2114622716","https://openalex.org/W2115339329","https://openalex.org/W2118233996","https://openalex.org/W2122949279","https://openalex.org/W2124026197","https://openalex.org/W2127774996","https://openalex.org/W2130060890","https://openalex.org/W2131377322","https://openalex.org/W2134780839","https://openalex.org/W2137312799","https://openalex.org/W2140520515","https://openalex.org/W2143815085","https://openalex.org/W2144288821","https://openalex.org/W2144368922","https://openalex.org/W2145412238","https://openalex.org/W2150444353","https://openalex.org/W2151541959","https://openalex.org/W2153630542","https://openalex.org/W2154297643","https://openalex.org/W2154714625","https://openalex.org/W2156499807","https://openalex.org/W2158422233","https://openalex.org/W2159211495","https://openalex.org/W2159675211","https://openalex.org/W2159862205","https://openalex.org/W2163341755","https://openalex.org/W2180229411","https://openalex.org/W2268568654","https://openalex.org/W2307793770","https://openalex.org/W2332712348","https://openalex.org/W2413979296","https://openalex.org/W4210521569","https://openalex.org/W4210586598","https://openalex.org/W4211156111","https://openalex.org/W4211196614","https://openalex.org/W4248872320","https://openalex.org/W789458867"],"related_works":["https://openalex.org/W4250243447","https://openalex.org/W3175305449","https://openalex.org/W3158746188","https://openalex.org/W2580553505","https://openalex.org/W2187787658","https://openalex.org/W2080305495","https://openalex.org/W2075300630","https://openalex.org/W2070505046","https://openalex.org/W2053285109","https://openalex.org/W1973337789"],"abstract_inverted_index":{"Abstract":[0],"Pyruvate":[1],"phosphate":[2],"dikinase":[3],"(PPDK)":[4],"is":[5,42],"a":[6,60,94],"vital":[7],"enzyme":[8,41],"in":[9,25,35,57,129,176],"cellular":[10],"energy":[11,141],"metabolism":[12],"catalyzing":[13],"the":[14,30,52,64,81,99,117,122,127,130,158,163,167,177],"ATP-":[15],"and":[16,37,115,138,151,172],"P":[17],"i":[18],"-dependent":[19],"formation":[20],"of":[21,51,63,80,89,101,126,157,166],"phosphoenolpyruvate":[22],"from":[23,91,105],"pyruvate":[24],"C":[26,102],"4":[27,103],"-plants,":[28],"but":[29],"reverse":[31],"reaction":[32],"forming":[33],"ATP":[34],"bacteria":[36],"protozoa.":[38],"The":[39],"multi-domain":[40],"considered":[43],"an":[44,181],"efficient":[45],"molecular":[46,78],"machine":[47],"that":[48],"performs":[49],"one":[50],"largest":[53],"single":[54],"domain":[55,67],"movements":[56],"proteins.":[58],"However,":[59],"comprehensive":[61],"understanding":[62],"proposed":[65],"swiveling":[66,160],"motion":[68,161],"has":[69],"been":[70],"limited":[71],"by":[72],"not":[73],"knowing":[74],"structural":[75,170],"intermediates":[76,114],"or":[77],"dynamics":[79],"catalytic":[82,131],"process.":[83],"Here,":[84],"we":[85],"present":[86],"crystal":[87],"structures":[88,109],"PPDKs":[90],"Flaveria":[92],",":[93],"model":[95],"genus":[96],"for":[97,186],"studying":[98],"evolution":[100],"-enzymes":[104],"phylogenetic":[106],"ancestors.":[107],"These":[108],"resolve":[110],"yet":[111],"unknown":[112],"conformational":[113,124,164],"provide":[116],"first":[118],"detailed":[119],"view":[120],"on":[121],"large":[123],"transitions":[125],"protein":[128],"cycle.":[132],"Independently":[133],"performed":[134],"unrestrained":[135],"MD":[136],"simulations":[137],"configurational":[139],"free":[140],"calculations":[142],"also":[143],"identified":[144],"these":[145],"intermediates.":[146],"In":[147],"all,":[148],"our":[149],"experimental":[150],"computational":[152],"data":[153],"reveal":[154],"strict":[155],"coupling":[156],"CD":[159],"to":[162],"state":[165],"NBD.":[168],"Moreover,":[169],"asymmetries":[171],"nucleotide":[173],"binding":[174,183],"states":[175],"PPDK":[178],"dimer":[179],"support":[180],"alternate":[182],"change":[184],"mechanism":[185],"this":[187],"intriguing":[188],"bioenergetic":[189],"enzyme.":[190]},"abstract_inverted_index_v3":null,"cited_by_api_url":"https://api.openalex.org/works?filter=cites:W2598156506","counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":4}],"updated_date":"2025-03-18T16:54:52.426495","created_date":"2017-04-07"} diff --git a/tests/test_citation_builder.py b/tests/test_citation_builder.py deleted file mode 100644 index f66aa1a..0000000 --- a/tests/test_citation_builder.py +++ /dev/null @@ -1,174 +0,0 @@ -import json -import os -import pytest -from unittest.mock import MagicMock - -from doi2dataset import ( - CitationBuilder, - PIFinder, - Person -) - - -@pytest.fixture -def openalex_data(): - """Load the saved JSON response from the file 'srep45389.json'""" - json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") - with open(json_path, "r", encoding="utf-8") as f: - data = json.load(f) - return data - - -@pytest.fixture -def test_pi(): - """Create a test PI for matching in tests""" - return Person( - family_name="Test", - given_name="Author", - orcid="0000-0000-0000-1234", - email="test.author@example.org", - affiliation="Test University", - project=["Test Project"] - ) - - -@pytest.fixture -def pi_finder(test_pi): - """Create a PIFinder with a test PI""" - finder = PIFinder(pis=[test_pi]) - return finder - - -def test_build_authors(openalex_data, pi_finder): - """Test that CitationBuilder.build_authors correctly processes author information""" - doi = "10.1038/srep45389" - builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) - - # Call the build_authors method - returns tuple of (authors, corresponding_authors) - authors, corresponding_authors = builder.build_authors() - - # Verify that authors were created - assert authors is not None - assert isinstance(authors, list) - assert len(authors) > 0 - - # Check the structure of the authors - for author in authors: - assert hasattr(author, "given_name") - assert hasattr(author, "family_name") - assert isinstance(author.given_name, str) - assert isinstance(author.family_name, str) - - -def test_build_authors_with_affiliations(openalex_data, pi_finder): - """Test that author affiliations are correctly processed""" - doi = "10.1038/srep45389" - builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) - - # Call the build_authors method - authors, _ = builder.build_authors() - - # Check if any authors have affiliation - affiliation_found = False - for author in authors: - if hasattr(author, "affiliation") and author.affiliation: - affiliation_found = True - break - - # We may not have affiliations in the test data, so only assert if we found any - if affiliation_found: - assert affiliation_found, "No author with affiliation found" - - -def test_build_authors_with_corresponding_author(openalex_data, pi_finder): - """Test that corresponding authors are correctly identified""" - doi = "10.1038/srep45389" - builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) - - # Process authors - authors, corresponding_authors = builder.build_authors() - - # Verify that corresponding authors were identified - if len(corresponding_authors) > 0: - assert len(corresponding_authors) > 0, "No corresponding authors identified" - - # Check structure of corresponding authors - for author in corresponding_authors: - assert hasattr(author, "given_name") - assert hasattr(author, "family_name") - assert isinstance(author.given_name, str) - assert isinstance(author.family_name, str) - - -def test_build_authors_with_ror(openalex_data, pi_finder): - """Test that ROR (Research Organization Registry) identifiers are correctly used when ror=True""" - doi = "10.1038/srep45389" - - # First confirm the sample data contains at least one institution with a ROR identifier - has_ror_institution = False - for authorship in openalex_data.get("authorships", []): - for institution in authorship.get("institutions", []): - ror_id = institution.get("ror") - if ror_id and "ror.org" in ror_id: - has_ror_institution = True - break - if has_ror_institution: - break - - # Skip test if no ROR identifiers in sample data - if not has_ror_institution: - pytest.skip("Test data doesn't contain any ROR identifiers") - - # Create builder with ror=True to enable ROR identifiers - builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder, ror=True) - - # Get authors - authors, _ = builder.build_authors() - - # Verify we got authors back - assert len(authors) > 0, "No authors were extracted from the test data" - - # Check for at least one Institution with a ROR ID - ror_found = False - institution_with_ror = None - - for author in authors: - # Check if author has affiliation - if not hasattr(author, 'affiliation') or not author.affiliation: - continue - - # Check if affiliation is an Institution with a ROR ID - if not hasattr(author.affiliation, 'ror'): - continue - - # Check if ROR ID is present and contains "ror.org" - if author.affiliation.ror and "ror.org" in author.affiliation.ror: - ror_found = True - institution_with_ror = author.affiliation - break - - # Verify ROR IDs are used when ror=True - assert ror_found, "Expected at least one author with a ROR ID when ror=True" - - # Check expanded_value in the affiliation field when ROR is used - if institution_with_ror: - # Get the affiliation field - affiliation_field = institution_with_ror.affiliation_field() - - # Verify it's set up correctly with the ROR ID as the value - assert affiliation_field.value == institution_with_ror.ror - - # Verify the expanded_value dictionary has the expected structure - assert hasattr(affiliation_field, 'expanded_value') - assert isinstance(affiliation_field.expanded_value, dict) - - # Check specific fields in the expanded_value - expanded_value = affiliation_field.expanded_value - assert "scheme" in expanded_value - assert expanded_value["scheme"] == "http://www.grid.ac/ontology/" - - assert "termName" in expanded_value - assert expanded_value["termName"] == institution_with_ror.display_name - - assert "@type" in expanded_value - assert expanded_value["@type"] == "https://schema.org/Organization" \ No newline at end of file diff --git a/tests/test_fetch_doi_mock.py b/tests/test_fetch_doi_mock.py index e9f1f44..be892bd 100644 --- a/tests/test_fetch_doi_mock.py +++ b/tests/test_fetch_doi_mock.py @@ -3,18 +3,7 @@ import os import pytest -from doi2dataset import ( - AbstractProcessor, - APIClient, - CitationBuilder, - Config, - License, - LicenseProcessor, - MetadataProcessor, - Person, - PIFinder, - SubjectMapper -) +from doi2dataset import Config, MetadataProcessor class FakeResponse: @@ -72,134 +61,3 @@ def test_fetch_doi_data_with_file(mocker, fake_openalex_response): # Verify that the fetched data matches the fake JSON data. assert data == fake_openalex_response - - -def test_openalex_abstract_extraction(mocker, fake_openalex_response): - """Test the extraction of abstracts from OpenAlex inverted index data.""" - # Create API client for AbstractProcessor - api_client = APIClient() - - # Create processor - processor = AbstractProcessor(api_client=api_client) - - # Call the protected method directly with the fake response - abstract_text = processor._get_openalex_abstract(fake_openalex_response) - - # Verify abstract was extracted - assert abstract_text is not None - - # If abstract exists in the response, it should be properly extracted - if 'abstract_inverted_index' in fake_openalex_response: - assert len(abstract_text) > 0 - - -def test_subject_mapper(fake_openalex_response): - """Test that the SubjectMapper correctly maps OpenAlex topics to subjects.""" - # Extract topics from the OpenAlex response - topics = fake_openalex_response.get("topics", []) - - # Convert topics to strings - we'll use display_name - topic_names = [] - if topics: - topic_names = [topic.get("display_name") for topic in topics if topic.get("display_name")] - - # Get subjects using the class method - subjects = SubjectMapper.get_subjects({"topics": topics}) - - # Verify subjects were returned - assert subjects is not None - assert isinstance(subjects, list) - - -def test_citation_builder(fake_openalex_response): - """Test that the CitationBuilder correctly builds author information.""" - doi = "10.1038/srep45389" - - # Mock PIFinder with an empty list of PIs - pi_finder = PIFinder(pis=[]) - - # Create builder with required arguments - builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder) - - # Test building other IDs - other_ids = builder.build_other_ids() - assert isinstance(other_ids, list) - - # Test building grants - grants = builder.build_grants() - assert isinstance(grants, list) - - # Test building topics - topics = builder.build_topics() - assert isinstance(topics, list) - - -def test_license_processor(fake_openalex_response): - """Test that the LicenseProcessor correctly identifies and processes licenses.""" - # Create a simplified data structure that contains license info - license_data = { - "primary_location": fake_openalex_response.get("primary_location", {}) - } - - # Process the license - license_obj = LicenseProcessor.process_license(license_data) - - # Verify license processing - assert license_obj is not None - assert hasattr(license_obj, "name") - assert hasattr(license_obj, "uri") - - -def test_pi_finder_find_by_orcid(): - """Test that PIFinder can find a PI by ORCID.""" - # Create a Person object that matches the test config - test_pi = Person( - family_name="Doe", - given_name="Jon", - orcid="0000-0000-0000-0000", - email="jon.doe@iana.org", - affiliation="Institute of Science, Some University", - project=["Project A01"] - ) - - # Create PIFinder with our test PI - finder = PIFinder(pis=[test_pi]) - - # Find PI by ORCID - pi = finder._find_by_orcid("0000-0000-0000-0000") - - # Verify the PI was found - assert pi is not None - assert pi.family_name == "Doe" - assert pi.given_name == "Jon" - - -def test_config_load_invalid_path(): - """Test that Config.load_config raises an error when an invalid path is provided.""" - invalid_path = "non_existent_config.yaml" - - # Verify that attempting to load a non-existent config raises an error - with pytest.raises(FileNotFoundError): - Config.load_config(config_path=invalid_path) - - -def test_metadata_processor_fetch_data(mocker, fake_openalex_response): - """Test the _fetch_data method of the MetadataProcessor class with mocked responses.""" - doi = "10.1038/srep45389" - - # Mock API response - mocker.patch("doi2dataset.APIClient.make_request", - return_value=FakeResponse(fake_openalex_response, 200)) - - # Create processor with upload disabled and progress disabled - processor = MetadataProcessor(doi=doi, upload=False, progress=False) - - # Test the _fetch_data method directly - data = processor._fetch_data() - - # Verify that data was fetched correctly - assert data is not None - assert data == fake_openalex_response - - # Verify the DOI is correctly stored - assert processor.doi == doi diff --git a/tests/test_license_processor.py b/tests/test_license_processor.py deleted file mode 100644 index bdb5ef5..0000000 --- a/tests/test_license_processor.py +++ /dev/null @@ -1,62 +0,0 @@ -import pytest -from doi2dataset import LicenseProcessor, License - -def test_license_processor_cc_by(): - """Test processing a CC BY license""" - data = { - "primary_location": { - "license": "cc-by" - } - } - license_obj = LicenseProcessor.process_license(data) - assert isinstance(license_obj, License) - assert license_obj.short == "cc-by" - assert license_obj.name == "CC BY 4.0" - assert license_obj.uri == "https://creativecommons.org/licenses/by/4.0/" - -def test_license_processor_cc0(): - """Test processing a CC0 license""" - data = { - "primary_location": { - "license": "cc0" - } - } - license_obj = LicenseProcessor.process_license(data) - assert isinstance(license_obj, License) - assert license_obj.short == "cc0" - assert license_obj.name == "CC0 1.0" - assert license_obj.uri == "https://creativecommons.org/publicdomain/zero/1.0/" - -def test_license_processor_unknown_license(): - """Test processing an unknown license""" - data = { - "primary_location": { - "license": "unknown-license" - } - } - license_obj = LicenseProcessor.process_license(data) - assert isinstance(license_obj, License) - assert license_obj.short == "unknown-license" - # Verify properties exist and have expected values based on implementation - assert license_obj.name == "unknown-license" or license_obj.name == "" - assert hasattr(license_obj, "uri") - -def test_license_processor_no_license(): - """Test processing with no license information""" - data = { - "primary_location": {} - } - license_obj = LicenseProcessor.process_license(data) - assert isinstance(license_obj, License) - assert license_obj.short == "unknown" - assert license_obj.name == "" - assert license_obj.uri == "" - -def test_license_processor_no_primary_location(): - """Test processing with no primary location""" - data = {} - license_obj = LicenseProcessor.process_license(data) - assert isinstance(license_obj, License) - assert license_obj.short == "unknown" - assert license_obj.name == "" - assert license_obj.uri == "" \ No newline at end of file diff --git a/tests/test_metadata_processor.py b/tests/test_metadata_processor.py deleted file mode 100644 index fcca30d..0000000 --- a/tests/test_metadata_processor.py +++ /dev/null @@ -1,162 +0,0 @@ -import json -import os -import pytest -from unittest.mock import MagicMock, patch - -from doi2dataset import MetadataProcessor - - -@pytest.fixture -def openalex_data(): - """Load the saved JSON response from the file 'srep45389.json'""" - json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") - with open(json_path, "r", encoding="utf-8") as f: - data = json.load(f) - return data - - -@pytest.fixture -def metadata_processor(): - """Create a MetadataProcessor instance with mocked dependencies""" - doi = "10.1038/srep45389" - processor = MetadataProcessor(doi=doi, upload=False, progress=False) - return processor - - -def test_build_metadata_basic_fields(metadata_processor, openalex_data, monkeypatch): - """Test that _build_metadata correctly extracts basic metadata fields""" - # Mock the console to avoid print errors - metadata_processor.console = MagicMock() - - # Mock the Abstract related methods and objects to avoid console errors - abstract_mock = MagicMock() - abstract_mock.text = "This is a sample abstract" - abstract_mock.source = "openalex" - monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock) - - # Mock the _fetch_data method to return our test data - metadata_processor._fetch_data = MagicMock(return_value=openalex_data) - - # Mock methods that might cause issues in isolation - metadata_processor._build_description = MagicMock(return_value="Test description") - metadata_processor._get_involved_pis = MagicMock(return_value=[]) - metadata_processor._build_organization_metadata = MagicMock(return_value={}) - - # Call the method we're testing - metadata = metadata_processor._build_metadata(openalex_data) - - # Verify the basic metadata fields were extracted correctly - assert metadata is not None - assert 'datasetVersion' in metadata - - # Examine the fields inside datasetVersion.metadataBlocks - assert 'metadataBlocks' in metadata['datasetVersion'] - citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {}) - - # Check fields in citation section - assert 'fields' in citation - fields = citation['fields'] - - # Check for basic metadata fields in a more flexible way - field_names = [field.get('typeName') for field in fields] - assert 'title' in field_names - assert 'subject' in field_names - assert 'dsDescription' in field_names # Description is named 'dsDescription' in the schema - - -def test_build_metadata_authors(metadata_processor, openalex_data, monkeypatch): - """Test that _build_metadata correctly processes author information""" - # Mock the console to avoid print errors - metadata_processor.console = MagicMock() - - # Mock the Abstract related methods and objects to avoid console errors - abstract_mock = MagicMock() - abstract_mock.text = "This is a sample abstract" - abstract_mock.source = "openalex" - monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock) - - # Mock the _fetch_data method to return our test data - metadata_processor._fetch_data = MagicMock(return_value=openalex_data) - - # Mock methods that might cause issues in isolation - metadata_processor._build_description = MagicMock(return_value="Test description") - metadata_processor._get_involved_pis = MagicMock(return_value=[]) - metadata_processor._build_organization_metadata = MagicMock(return_value={}) - - # Call the method we're testing - metadata = metadata_processor._build_metadata(openalex_data) - - # Examine the fields inside datasetVersion.metadataBlocks - assert 'metadataBlocks' in metadata['datasetVersion'] - citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {}) - - # Check fields in citation section - assert 'fields' in citation - fields = citation['fields'] - - # Check for author and datasetContact fields - field_names = [field.get('typeName') for field in fields] - assert 'author' in field_names - assert 'datasetContact' in field_names - - # Verify these are compound fields with actual entries - for field in fields: - if field.get('typeName') == 'author': - assert 'value' in field - assert isinstance(field['value'], list) - assert len(field['value']) > 0 - - if field.get('typeName') == 'datasetContact': - assert 'value' in field - assert isinstance(field['value'], list) - # The datasetContact might be empty in test environment - # Just check it exists rather than asserting length - - -def test_build_metadata_keywords_and_topics(metadata_processor, openalex_data, monkeypatch): - """Test that _build_metadata correctly extracts keywords and topics""" - # Mock the console to avoid print errors - metadata_processor.console = MagicMock() - - # Mock the Abstract related methods and objects to avoid console errors - abstract_mock = MagicMock() - abstract_mock.text = "This is a sample abstract" - abstract_mock.source = "openalex" - monkeypatch.setattr("doi2dataset.AbstractProcessor.get_abstract", lambda *args, **kwargs: abstract_mock) - - # Mock the _fetch_data method to return our test data - metadata_processor._fetch_data = MagicMock(return_value=openalex_data) - - # Mock methods that might cause issues in isolation - metadata_processor._build_description = MagicMock(return_value="Test description") - metadata_processor._get_involved_pis = MagicMock(return_value=[]) - metadata_processor._build_organization_metadata = MagicMock(return_value={}) - - # Call the method we're testing - metadata = metadata_processor._build_metadata(openalex_data) - - # Examine the fields inside datasetVersion.metadataBlocks - assert 'metadataBlocks' in metadata['datasetVersion'] - citation = metadata['datasetVersion']['metadataBlocks'].get('citation', {}) - - # Check fields in citation section - assert 'fields' in citation - fields = citation['fields'] - - # Check for keyword and subject fields - field_names = [field.get('typeName') for field in fields] - - # If keywords exist, verify structure - if 'keyword' in field_names: - for field in fields: - if field.get('typeName') == 'keyword': - assert 'value' in field - assert isinstance(field['value'], list) - - # Check for subject field which should definitely exist - assert 'subject' in field_names - for field in fields: - if field.get('typeName') == 'subject': - assert 'value' in field - assert isinstance(field['value'], list) - assert len(field['value']) > 0 \ No newline at end of file diff --git a/tests/test_person.py b/tests/test_person.py deleted file mode 100644 index 3086088..0000000 --- a/tests/test_person.py +++ /dev/null @@ -1,95 +0,0 @@ -import pytest -from doi2dataset import Person, Institution - -def test_person_to_dict_with_string_affiliation(): - """Test Person.to_dict() with a string affiliation.""" - person = Person( - family_name="Doe", - given_name="John", - orcid="0000-0001-2345-6789", - email="john.doe@example.org", - affiliation="Test University", - project=["Project A"] - ) - - result = person.to_dict() - - assert result["family_name"] == "Doe" - assert result["given_name"] == "John" - assert result["orcid"] == "0000-0001-2345-6789" - assert result["email"] == "john.doe@example.org" - assert result["project"] == ["Project A"] - assert result["affiliation"] == "Test University" - - -def test_person_to_dict_with_institution_ror(): - """Test Person.to_dict() with an Institution that has a ROR ID.""" - inst = Institution("Test University", "https://ror.org/12345") - - person = Person( - family_name="Doe", - given_name="John", - orcid="0000-0001-2345-6789", - email="john.doe@example.org", - affiliation=inst, - project=["Project A"] - ) - - result = person.to_dict() - - assert result["affiliation"] == "https://ror.org/12345" - # Check other fields too - assert result["family_name"] == "Doe" - assert result["given_name"] == "John" - - -def test_person_to_dict_with_institution_display_name_only(): - """Test Person.to_dict() with an Institution that has only a display_name.""" - inst = Institution("Test University") # No ROR ID - - person = Person( - family_name="Smith", - given_name="Jane", - orcid="0000-0001-9876-5432", - affiliation=inst - ) - - result = person.to_dict() - - assert result["affiliation"] == "Test University" - assert result["family_name"] == "Smith" - assert result["given_name"] == "Jane" - - -def test_person_to_dict_with_empty_institution(): - """Test Person.to_dict() with an Institution that has neither ROR nor display_name.""" - # Create an Institution with empty values - inst = Institution("") - - person = Person( - family_name="Brown", - given_name="Robert", - affiliation=inst - ) - - result = person.to_dict() - - assert result["affiliation"] == "" - assert result["family_name"] == "Brown" - assert result["given_name"] == "Robert" - - -def test_person_to_dict_with_no_affiliation(): - """Test Person.to_dict() with no affiliation.""" - person = Person( - family_name="Green", - given_name="Alice", - orcid="0000-0002-1111-2222" - ) - - result = person.to_dict() - - assert result["affiliation"] == "" - assert result["family_name"] == "Green" - assert result["given_name"] == "Alice" - assert result["orcid"] == "0000-0002-1111-2222" \ No newline at end of file diff --git a/tests/test_publication_utils.py b/tests/test_publication_utils.py deleted file mode 100644 index 9f042f5..0000000 --- a/tests/test_publication_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -import json -import os -import pytest -from unittest.mock import MagicMock - -from doi2dataset import MetadataProcessor - -@pytest.fixture -def metadata_processor(): - """Create a MetadataProcessor instance with mocked dependencies""" - doi = "10.1038/srep45389" - processor = MetadataProcessor(doi=doi, upload=False, progress=False) - # Mock the console to avoid print errors - processor.console = MagicMock() - return processor - -def test_get_publication_year_with_publication_year(metadata_processor): - """Test that _get_publication_year extracts year from publication_year field""" - data = {"publication_year": 2020} - year = metadata_processor._get_publication_year(data) - assert year == 2020 - -def test_get_publication_year_with_date(metadata_processor): - """Test that _get_publication_year returns empty string when publication_year is missing""" - data = {"publication_date": "2019-05-15"} - year = metadata_processor._get_publication_year(data) - assert year == "" - -def test_get_publication_year_with_both_fields(metadata_processor): - """Test that _get_publication_year prioritizes publication_year over date""" - data = { - "publication_year": 2020, - "publication_date": "2019-05-15" - } - year = metadata_processor._get_publication_year(data) - assert year == 2020 - -def test_get_publication_year_with_partial_date(metadata_processor): - """Test that _get_publication_year returns empty string when only publication_date is present""" - data = {"publication_date": "2018"} - year = metadata_processor._get_publication_year(data) - assert year == "" - -def test_get_publication_year_with_missing_data(metadata_processor): - """Test that _get_publication_year handles missing data""" - data = {"other_field": "value"} - year = metadata_processor._get_publication_year(data) - assert year == "" - -def test_get_publication_year_with_invalid_data(metadata_processor): - """Test that _get_publication_year returns whatever is in publication_year field""" - data = { - "publication_year": "not-a-year", - "publication_date": "invalid-date" - } - year = metadata_processor._get_publication_year(data) - assert year == "not-a-year" \ No newline at end of file