From cc94e495ff5f6ae5e7e6a0e2aae23fb9f6e8cd4c Mon Sep 17 00:00:00 2001 From: Alexander Minges Date: Fri, 25 Jul 2025 12:17:24 +0200 Subject: [PATCH] test: replace hardcoded with dynamic extraction - Replace hardcoded author names with dynamic openalex_data extraction - Extract DOIs from API response when tests use other response values - Remove redundant fake_openalex_response fixture - Add abstract_inverted_index_v3 to allowed None fields in API tests - Fix test robustness against fixture data changes - Improve test coverage from ~84% to ~90% --- CHANGELOG.md | 8 + tests/config_test.yaml | 7 + tests/conftest.py | 19 ++ tests/srep45389_crossref.json | 1 + ...srep45389.json => srep45389_openalex.json} | 0 tests/test_abstract_processor.py | 170 +++++++++++++++++ tests/test_api_client.py | 98 ++++++++++ tests/test_citation_builder.py | 122 ++++++++++-- tests/test_integration.py | 57 ++---- tests/test_license_processor.py | 79 ++++++++ tests/test_metadata_processor.py | 179 +++++++++++++++++- tests/test_models.py | 78 ++++++++ tests/test_publication_utils.py | 80 ++++++++ tests/test_validation_utils.py | 41 ++++ 14 files changed, 883 insertions(+), 56 deletions(-) create mode 100644 tests/srep45389_crossref.json rename tests/{srep45389.json => srep45389_openalex.json} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdc4868..a63ce1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,12 +31,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Consolidate overlapping test concerns into dedicated files - Extract CLI tests into dedicated `test_cli.py` module - Improve test coverage from 63.87% to 84.84% +- Replace hardcoded test values with dynamic extraction from API response fixtures +- Extract DOIs from API response data in tests that use other response values for consistency +- Remove redundant test fixtures and parameters ### Fixed - Fix list formatting in API documentation docstrings for better sphinx rendering - Fix formatting inconsistencies in constants.py (remove double empty lines) - Fix ruff linting issues with unused mock variables in tests +- Replace hardcoded author names with dynamic extraction from OpenAlex data +- Replace hardcoded content checks with dynamic validation using actual API response data +- Fix test robustness against changes in fixture data by using real API response processing +- Remove duplicate fake_openalex_response fixture in favor of direct openalex_data usage +- Add abstract_inverted_index_v3 to allowed None fields in API response structure tests ## [v3.0.1] - 2025-07-25 diff --git a/tests/config_test.yaml b/tests/config_test.yaml index 9130659..31f6d87 100644 --- a/tests/config_test.yaml +++ b/tests/config_test.yaml @@ -1,3 +1,10 @@ +dataverse: + url: "https://test.dataverse.org" + api_token: "test_token" + dataverse: "test_dataverse" + auth_user: "test_user" + auth_password: "test_password" + default_grants: - funder: "Awesome Funding Agency" id: "ABC12345" diff --git a/tests/conftest.py b/tests/conftest.py index e198900..80a39e2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,27 @@ +import json import os import sys +import pytest + # Get the path to the parent directory of tests parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) # Add the parent directory to sys.path sys.path.insert(0, parent_dir) + + +@pytest.fixture(scope="session") +def openalex_data(): + """Load OpenAlex API response data for reuse across tests.""" + json_path = os.path.join(os.path.dirname(__file__), "srep45389_openalex.json") + with open(json_path, encoding="utf-8") as f: + return json.load(f) + + +@pytest.fixture(scope="session") +def crossref_data(): + """Load CrossRef API response data for reuse across tests.""" + json_path = os.path.join(os.path.dirname(__file__), "srep45389_crossref.json") + with open(json_path, encoding="utf-8") as f: + return json.load(f) diff --git a/tests/srep45389_crossref.json b/tests/srep45389_crossref.json new file mode 100644 index 0000000..6788159 --- /dev/null +++ b/tests/srep45389_crossref.json @@ -0,0 +1 @@ +{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T07:51:25Z","timestamp":1750233085316},"reference-count":73,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2017,3,30]],"date-time":"2017-03-30T00:00:00Z","timestamp":1490832000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2017,3,30]],"date-time":"2017-03-30T00:00:00Z","timestamp":1490832000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Sci Rep"],"abstract":"Abstract<\/jats:title>Pyruvate phosphate dikinase (PPDK) is a vital enzyme in cellular energy metabolism catalyzing the ATP- and P i<\/jats:italic><\/jats:sub> -dependent formation of phosphoenolpyruvate from pyruvate in C4<\/jats:sub> -plants, but the reverse reaction forming ATP in bacteria and protozoa. The multi-domain enzyme is considered an efficient molecular machine that performs one of the largest single domain movements in proteins. However, a comprehensive understanding of the proposed swiveling domain motion has been limited by not knowing structural intermediates or molecular dynamics of the catalytic process. Here, we present crystal structures of PPDKs from Flaveria<\/jats:italic>, a model genus for studying the evolution of C4<\/jats:sub> -enzymes from phylogenetic ancestors. These structures resolve yet unknown conformational intermediates and provide the first detailed view on the large conformational transitions of the protein in the catalytic cycle. Independently performed unrestrained MD simulations and configurational free energy calculations also identified these intermediates. In all, our experimental and computational data reveal strict coupling of the CD swiveling motion to the conformational state of the NBD. Moreover, structural asymmetries and nucleotide binding states in the PPDK dimer support an alternate binding change mechanism for this intriguing bioenergetic enzyme.<\/jats:p>","DOI":"10.1038\/srep45389","type":"journal-article","created":{"date-parts":[[2017,3,30]],"date-time":"2017-03-30T09:34:13Z","timestamp":1490866453000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Structural intermediates and directionality of the swiveling motion of Pyruvate Phosphate Dikinase"],"prefix":"10.1038","volume":"7","author":[{"given":"Alexander","family":"Minges","sequence":"first","affiliation":[]},{"given":"Daniel","family":"Ciupka","sequence":"additional","affiliation":[]},{"given":"Christian","family":"Winkler","sequence":"additional","affiliation":[]},{"given":"Astrid","family":"H\u00f6ppner","sequence":"additional","affiliation":[]},{"given":"Holger","family":"Gohlke","sequence":"additional","affiliation":[]},{"given":"Georg","family":"Groth","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2017,3,30]]},"reference":[{"key":"BFsrep45389_CR1","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1146\/annurev.pp.36.060185.001351","volume":"36","author":"GE Edwards","year":"1985","unstructured":"Edwards, G. E., Nakamoto, H., Burnell, J. N. & Hatch, M. D. Pyruvate, Pi Dikinase and NADP-Malate Dehydrogenase in C4 Photosynthesis: Properties and Mechanism of Light\/Dark Regulation. Ann. Rev. Plant Physio. 36, 255\u2013286 (1985).","journal-title":"Ann. Rev. Plant Physio"},{"key":"BFsrep45389_CR2","doi-asserted-by":"publisher","first-page":"3083","DOI":"10.1093\/jxb\/err058","volume":"62","author":"CJ Chastain","year":"2011","unstructured":"Chastain, C. J. et al. Functional evolution of C4 pyruvate, orthophosphate dikinase. J. Exp. Bot. 62, 3083\u20133091 (2011).","journal-title":"J. Exp. Bot."},{"key":"BFsrep45389_CR3","first-page":"607","volume":"6","author":"MD Hatch","year":"1979","unstructured":"Hatch, M. D. Regulation of C 4 Photosynthesis: Factors Affecting Cold-Mediated Inactivation and Reactivation of Pyruvate, P I Dikinase. Aust. J. Plant Physiol. 6, 607 (1979).","journal-title":"Aust. J. Plant Physiol."},{"key":"BFsrep45389_CR4","doi-asserted-by":"publisher","first-page":"826","DOI":"10.1104\/pp.62.5.826","volume":"62","author":"K Shirahashi","year":"1978","unstructured":"Shirahashi, K., Hayakawa, S. & Sugiyama, T. Cold Lability of Pyruvate, Orthophosphate Dikinase in the Maize Leaf. Plant Physiol. 62, 826\u2013830 (1978).","journal-title":"Plant Physiol."},{"key":"BFsrep45389_CR5","doi-asserted-by":"publisher","first-page":"2862","DOI":"10.1021\/bi00739a014","volume":"12","author":"T Sugiyama","year":"1973","unstructured":"Sugiyama, T. Purification, molecular, and catalytic properties of pyruvate phosphate dikinase from the maize leaf. Biochemistry-US. 12, 2862\u20132868 (1973).","journal-title":"Biochemistry-US"},{"key":"BFsrep45389_CR6","doi-asserted-by":"publisher","first-page":"523","DOI":"10.1016\/S0981-9428(03)00065-2","volume":"41","author":"CJ Chastain","year":"2003","unstructured":"Chastain, C. J. & Chollet, R. Regulation of pyruvate, orthophosphate dikinase by ADP-\/Pi-dependent reversible phosphorylation in C3 and C4 plants. Plant Physiol. Bioch. 41, 523\u2013532 (2003).","journal-title":"Plant Physiol. Bioch"},{"key":"BFsrep45389_CR7","doi-asserted-by":"publisher","first-page":"924","DOI":"10.1007\/s00425-006-0259-3","volume":"224","author":"CJ Chastain","year":"2006","unstructured":"Chastain, C. J., Heck, J. W., Colquhoun, T. A., Voge, D. G. & Gu, X.-Y. Posttranslational regulation of pyruvate, orthophosphate dikinase in developing rice (Oryza sativa) seeds. Planta 224, 924\u2013934 (2006).","journal-title":"Planta"},{"key":"BFsrep45389_CR8","doi-asserted-by":"publisher","first-page":"2652","DOI":"10.1073\/pnas.93.7.2652","volume":"93","author":"O Herzberg","year":"1996","unstructured":"Herzberg, O. et al. Swiveling-domain mechanism for enzymatic phosphotransfer between remote reaction sites. P. Natl. Acad. Sci. USA. 93, 2652\u20132657 (1996).","journal-title":"P. Natl. Acad. Sci. USA"},{"key":"BFsrep45389_CR9","doi-asserted-by":"publisher","first-page":"14845","DOI":"10.1021\/bi701848w","volume":"46","author":"K Lim","year":"2007","unstructured":"Lim, K. et al. Swiveling Domain Mechanism in Pyruvate Phosphate Dikinase. Biochemistry-US. 46, 14845\u201314853 (2007).","journal-title":"Biochemistry-US"},{"key":"BFsrep45389_CR10","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1021\/bi011799+","volume":"41","author":"O Herzberg","year":"2002","unstructured":"Herzberg, O. et al. Pyruvate site of pyruvate phosphate dikinase: crystal structure of the enzyme-phosphonopyruvate complex, and mutant analysis. Biochemistry-US. 41, 780\u2013787 (2002).","journal-title":"Biochemistry-US"},{"key":"BFsrep45389_CR11","doi-asserted-by":"publisher","first-page":"1136","DOI":"10.1021\/bi0484522","volume":"44","author":"T Nakanishi","year":"2005","unstructured":"Nakanishi, T., Nakatsu, T., Matsuoka, M., Sakata, K. & Kato, H. Crystal Structures of Pyruvate Phosphate Dikinase from Maize Revealed an Alternative Conformation in the Swiveling-Domain Motion. Biochemistry-US. 44, 1136\u20131144 (2005).","journal-title":"Biochemistry-US"},{"key":"BFsrep45389_CR12","doi-asserted-by":"publisher","first-page":"635","DOI":"10.1016\/S0092-8674(00)80525-5","volume":"90","author":"S Korolev","year":"1997","unstructured":"Korolev, S., Hsieh, J., Gauss, G. H., Lohman, T. M. & Waksman, G. Major Domain Swiveling Revealed by the Crystal Structures of Complexes of E. coli Rep Helicase Bound to Single-Stranded DNA and ADP. Cell 90, 635\u2013647 (1997).","journal-title":"Cell"},{"key":"BFsrep45389_CR13","doi-asserted-by":"publisher","first-page":"10586","DOI":"10.1038\/ncomms10586","volume":"7","author":"K Nguyen","year":"2016","unstructured":"Nguyen, K. & Whitford, P. C. Steric interactions lead to collective tilting motion in the ribosome during mRNA\u2013tRNA translocation. Nat Comms 7, 10586 (2016).","journal-title":"Nat Comms"},{"key":"BFsrep45389_CR14","doi-asserted-by":"publisher","first-page":"827","DOI":"10.1126\/science.1117230","volume":"310","author":"BS Schuwirth","year":"2005","unstructured":"Schuwirth, B. S. Structures of the Bacterial Ribosome at 3.5 A Resolution. Science 310, 827\u2013834 (2005).","journal-title":"Science"},{"key":"BFsrep45389_CR15","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1038\/33612","volume":"392","author":"Z Zhang","year":"1998","unstructured":"Zhang, Z. et al. Electron transfer by domain movement in cytochrome bc1. Nature 392, 677\u2013684 (1998).","journal-title":"Nature"},{"key":"BFsrep45389_CR16","doi-asserted-by":"publisher","first-page":"3803","DOI":"10.1073\/pnas.1523614113","volume":"113","author":"X Qi","year":"2016","unstructured":"Qi, X. et al. Structural basis of rifampin inactivation by rifampin phosphotransferase. P. Natl. Acad. Sci. USA 113, 3803\u20133808 (2016).","journal-title":"P. Natl. Acad. Sci. USA"},{"key":"BFsrep45389_CR17","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1073\/pnas.1518614113","volume":"113","author":"RH-J Wei\u00dfe","year":"2016","unstructured":"Wei\u00dfe, R. H.-J., Faust, A., Schmidt, M., Sch\u00f6nheit, P. & Scheidig, A. J. Structure of NDP-forming Acetyl-CoA synthetase ACD1 reveals a large rearrangement for phosphoryl transfer. P. Natl. Acad. Sci. USA. 113, 519\u2013528 (2016).","journal-title":"P. Natl. Acad. Sci. USA"},{"key":"BFsrep45389_CR18","doi-asserted-by":"publisher","first-page":"3115","DOI":"10.1021\/bi9621977","volume":"36","author":"I Wong","year":"1997","unstructured":"Wong, I. & Lohman, T. M. A two-site mechanism for ATP hydrolysis by the asymmetric rep dimer p2s as revealed by site-specific inhibition with ADP-AlF4. Biochemistry 36, 3115\u20133125 (1997).","journal-title":"Biochemistry"},{"key":"BFsrep45389_CR19","doi-asserted-by":"publisher","first-page":"1417","DOI":"10.1016\/S0022-2836(02)00113-4","volume":"318","author":"LW Cosenza","year":"2002","unstructured":"Cosenza, L. W., Bringaud, F., Baltz, T. & Vellieux, F. M. The 3.0\u00c5 Resolution Crystal Structure of Glycosomal Pyruvate Phosphate Dikinase from Trypanosoma brucei . J. Mol. Biol. 318, 1417\u20131432 (2002).","journal-title":"J. Mol. Biol."},{"key":"BFsrep45389_CR20","doi-asserted-by":"publisher","first-page":"16218","DOI":"10.1073\/pnas.0607587103","volume":"103","author":"A Teplyakov","year":"2006","unstructured":"Teplyakov, A. et al. Structure of phosphorylated enzyme I, the phosphoenolpyruvate:sugar phosphotransferase system sugar translocation signal protein. P. Natl. Acad. Sci. USA. 103, 16218\u201316223 (2006).","journal-title":"P. Natl. Acad. Sci. USA"},{"key":"BFsrep45389_CR21","doi-asserted-by":"publisher","first-page":"322","DOI":"10.1016\/S0076-6879(78)49017-2","volume":"49","author":"AS Mildvan","year":"1978","unstructured":"Mildvan, A. S. & Gupta, R. K. Nuclear relaxation measurements of the geometry of enzyme-bound substrates and analogs. Methods. Enzymol. 49, 322\u2013359 (1978).","journal-title":"Methods. Enzymol"},{"key":"BFsrep45389_CR22","doi-asserted-by":"publisher","first-page":"877","DOI":"10.1146\/annurev.bi.49.070180.004305","volume":"49","author":"JR Knowles","year":"1980","unstructured":"Knowles, J. R. Enzyme-Catalyzed Phosphoryl Transfer Reactions. Annu. Rev. Biochem. 49, 877\u2013919 (1980).","journal-title":"Annu. Rev. Biochem."},{"key":"BFsrep45389_CR23","doi-asserted-by":"publisher","first-page":"977","DOI":"10.1093\/emboj\/17.4.977","volume":"17","author":"L Esser","year":"1998","unstructured":"Esser, L. Synapsin I is structurally similar to ATP-utilizing enzymes. EMBO J. 17, 977\u2013984 (1998).","journal-title":"EMBO J"},{"key":"BFsrep45389_CR24","doi-asserted-by":"publisher","first-page":"622","DOI":"10.1002\/prot.22910","volume":"79","author":"BR Novak","year":"2010","unstructured":"Novak, B. R., Moldovan, D., Waldrop, G. L. & de Queiroz, M. S. Behavior of the ATP grasp domain of biotin carboxylase monomers and dimers studied using molecular dynamics simulations. Proteins 79, 622\u2013632 (2010).","journal-title":"Proteins"},{"key":"BFsrep45389_CR25","doi-asserted-by":"publisher","first-page":"37630","DOI":"10.1074\/jbc.M105631200","volume":"276","author":"D Ye","year":"2001","unstructured":"Ye, D. et al. Investigation of the Catalytic Site within the ATP-Grasp Domain of Clostridium symbiosum Pyruvate Phosphate Dikinase. J. Biol. Chem. 276, 37630\u201337639 (2001).","journal-title":"J. Biol. Chem."},{"key":"BFsrep45389_CR26","doi-asserted-by":"publisher","first-page":"669","DOI":"10.1038\/35089509","volume":"2","author":"M Yoshida","year":"2001","unstructured":"Yoshida, M., Muneyuki, E. & Hisabori, T. ATP synthase\u2014a marvellous rotary engine of the cell. Nat. Rev. Mol. Cell Biol. 2, 669\u2013677 (2001).","journal-title":"Nat. Rev. Mol. Cell Biol."},{"key":"BFsrep45389_CR27","doi-asserted-by":"publisher","first-page":"898","DOI":"10.1038\/35073513","volume":"410","author":"R Yasuda","year":"2001","unstructured":"Yasuda, R., Noji, H., Yoshida, M., Kinosita, K. & Itoh, H. Resolution of distinct rotational substeps by submillisecond kinetic analysis of F1-ATPase. Nature 410, 898\u2013904 (2001).","journal-title":"Nature"},{"key":"BFsrep45389_CR28","doi-asserted-by":"publisher","first-page":"5433","DOI":"10.1038\/sj.emboj.7601410","volume":"25","author":"V Kabaleeswaran","year":"2006","unstructured":"Kabaleeswaran, V., Puri, N., Walker, J. E., Leslie, A. G. W. & Mueller, D. M. Novel features of the rotary catalytic mechanism revealed in the structure of yeast F1 ATPase. The EMBO Journal 25, 5433\u20135442 (2006).","journal-title":"The EMBO Journal"},{"key":"BFsrep45389_CR29","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1016\/j.cell.2007.05.020","volume":"130","author":"K Adachi","year":"2007","unstructured":"Adachi, K. et al. Coupling of Rotation and Catalysis in F1-ATPase Revealed by Single-Molecule Imaging and Manipulation. Cell 130, 309\u2013321 (2007).","journal-title":"Cell"},{"key":"BFsrep45389_CR30","doi-asserted-by":"publisher","first-page":"2498","DOI":"10.1101\/gr.1239303","volume":"13","author":"P Shannon","year":"2003","unstructured":"Shannon, P. Cytoscape: A Software Environment for Integrated Models of Biomolecular Interaction Networks. Genome Res. 13, 2498\u20132504 (2003).","journal-title":"Genome Res."},{"key":"BFsrep45389_CR31","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1016\/j.tibs.2011.01.002","volume":"36","author":"NT Doncheva","year":"2011","unstructured":"Doncheva, N. T., Klein, K., Domingues, F. S. & Albrecht, M. Analyzing and visualizing residue networks of protein structures. Trends Biochem. Sci. 36, 179\u2013182 (2011).","journal-title":"Trends Biochem. Sci."},{"key":"BFsrep45389_CR32","first-page":"163","volume":"19","author":"F Glaser","year":"2003","unstructured":"Glaser, F. et al. ConSurf: Identification of Functional Regions in Proteins by Surface-Mapping of Phylogenetic Information. Method. Biochem. Anal. 19, 163\u2013164 (2003).","journal-title":"Method. Biochem. Anal"},{"key":"BFsrep45389_CR33","doi-asserted-by":"publisher","first-page":"1604","DOI":"10.1021\/ci100461k","volume":"51","author":"A Ahmed","year":"2011","unstructured":"Ahmed, A., Rippmann, F., Barnickel, G. & Gohlke, H. A Normal Mode-Based Geometric Simulation Approach for Exploring Biologically Relevant Conformational Transitions in Proteins. J. Chem. Inf. Model. 51, 1604\u20131622 (2011).","journal-title":"J. Chem. Inf. Model."},{"key":"BFsrep45389_CR34","doi-asserted-by":"crossref","unstructured":"Howard, J. Motor Proteins as Nanomachines: The Roles of Thermal Fluctuations in Generating Force and Motion. Biological Physics 47\u201359 (2010).","DOI":"10.1007\/978-3-0346-0428-4_3"},{"key":"BFsrep45389_CR35","doi-asserted-by":"crossref","unstructured":"Feynman, R., Leighton, R., Sands, M. & Hafner, E. The Feynman Lectures on Physics; Vol. I, vol. 33 (AAPT, 1965).","DOI":"10.1119\/1.1972241"},{"key":"BFsrep45389_CR36","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1016\/S0096-4174(18)30128-8","volume":"7","author":"AF Huxley","year":"1957","unstructured":"Huxley, A. F. A hypothesis for the mechanism of contraction of muscle. Prog Biophys Biophys Chem 7, 255\u2013318 (1957).","journal-title":"Prog Biophys Biophys Chem"},{"key":"BFsrep45389_CR37","doi-asserted-by":"publisher","first-page":"315","DOI":"10.1007\/s003390201340","volume":"75","author":"H Wang","year":"2002","unstructured":"Wang, H. & Oster, G. Ratchets, power strokes, and molecular motors. Appl. Phys. A 75, 315\u2013323 (2002).","journal-title":"Appl. Phys. A"},{"key":"BFsrep45389_CR38","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1016\/0079-6107(79)90025-7","volume":"33","author":"E Eisenberg","year":"1979","unstructured":"Eisenberg, E. & Hill, T. L. A cross-bridge model of muscle contraction. Prog. Biophys. Mol. Biol. 33, 55\u201382 (1979).","journal-title":"Prog. Biophys. Mol. Biol."},{"key":"BFsrep45389_CR39","doi-asserted-by":"publisher","first-page":"116","DOI":"10.1016\/0014-5793(90)81064-U","volume":"273","author":"E Rosche","year":"1990","unstructured":"Rosche, E. & Westhoff, P. Primary structure of pyruvate, orthophosphate dikinase in the dicotyledonous C 4 plant Flaveria trinervia. FEBS Lett. 273, 116\u2013121 (1990).","journal-title":"FEBS Lett"},{"key":"BFsrep45389_CR40","doi-asserted-by":"publisher","first-page":"763","DOI":"10.1007\/BF00013761","volume":"26","author":"E Rosche","year":"1994","unstructured":"Rosche, E., Streubel, M. & Westhoff, P. Primary structure of the photosynthetic pyruvate orthophosphate dikinase of the C3 plant Flaveria pringlei and expression analysis of pyruvate orthophosphate dikinase sequences in C3, C3\u2013C4 and C4 Flaveria species. Plant Mol. Biol. 26, 763\u2013769 (1994).","journal-title":"Plant Mol. Biol."},{"key":"BFsrep45389_CR41","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1007\/BF00032598","volume":"24","author":"G Salahas","year":"1990","unstructured":"Salahas, G., Manetas, Y. & Gavalas, N. Assaying for pyruvate, orthophosphate dikinase activity: necessary precautions with phosphoenolpyruvate carboxylase as coupling enzyme. Photosynth. Res. 24, 183\u2013188 (1990).","journal-title":"Photosynth. Res."},{"key":"BFsrep45389_CR42","doi-asserted-by":"crossref","unstructured":"Kabsch, W. XDS. Acta Crystallogr. D. 66, 125\u2013132 (2010).","DOI":"10.1107\/S0907444909047337"},{"key":"BFsrep45389_CR43","doi-asserted-by":"publisher","first-page":"1204","DOI":"10.1107\/S0907444913000061","volume":"69","author":"PR Evans","year":"2013","unstructured":"Evans, P. R. & Murshudov, G. N. How good are my data and what is the resolution? Acta Crystallogr. D. 69, 1204\u20131214 (2013).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR44","doi-asserted-by":"crossref","unstructured":"Collaborative, Computational Project and others. The CCP4 suite: programs for protein crystallography. Acta Crystallogr. D. 50, 760 (1994).","DOI":"10.1107\/S0907444994003112"},{"key":"BFsrep45389_CR45","doi-asserted-by":"publisher","first-page":"658","DOI":"10.1107\/S0021889807021206","volume":"40","author":"AJ McCoy","year":"2007","unstructured":"McCoy, A. J. et al. Phasercrystallographic software. J. Appl. Crystallogr. 40, 658\u2013674 (2007).","journal-title":"J. Appl. Crystallogr"},{"key":"BFsrep45389_CR46","doi-asserted-by":"publisher","first-page":"486","DOI":"10.1107\/S0907444910007493","volume":"66","author":"P Emsley","year":"2010","unstructured":"Emsley, P., Lohkamp, B., Scott, W. G. & Cowtan, K. Features and development of Coot. Acta Crystallogr. D. 66, 486\u2013501 (2010).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR47","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1107\/S0907444909052925","volume":"66","author":"PD Adams","year":"2010","unstructured":"Adams, P. D. et al. PHENIX : a comprehensive Python-based system for macromolecular structure solution. Acta Crystallogr. D. 66, 213\u2013221 (2010).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR48","doi-asserted-by":"publisher","first-page":"622","DOI":"10.1107\/S0021889893002729","volume":"26","author":"B Howlin","year":"1993","unstructured":"Howlin, B., Butler, S. A., Moss, D. S., Harris, G. W. & Driessen, H. P. C. TLSANL: TLS parameter-analysis program for segmented anisotropic refinement of macromolecular structures. J. Appl. Crystallogr. 26, 622\u2013624 (1993).","journal-title":"J. Appl. Crystallogr"},{"key":"BFsrep45389_CR49","doi-asserted-by":"publisher","first-page":"1002","DOI":"10.1107\/S0907444906022116","volume":"62","author":"K Cowtan","year":"2006","unstructured":"Cowtan, K. The Buccaneer software for automated model building. 1. Tracing protein chains. Acta Crystallogr. D. 62, 1002\u20131011 (2006).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR50","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1107\/S0907444911001314","volume":"67","author":"GN Murshudov","year":"2011","unstructured":"Murshudov, G. N. et al. REFMAC 5 for the refinement of macromolecular crystal structures. Acta Crystallogr. D. 67, 355\u2013367 (2011).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR51","doi-asserted-by":"publisher","first-page":"646","DOI":"10.1107\/S1399004714028132","volume":"71","author":"PFEM Afonine","year":"2015","unstructured":"Afonine, P. FEM. : Feature Enhanced Map. Acta Crystallogr. D. 71, 646\u2013666 (2015).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR52","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1107\/S0907444909042073","volume":"66","author":"VB Chen","year":"2010","unstructured":"Chen, V. B. et al. MolProbity : all-atom structure validation for macromolecular crystallography. Acta Crystallogr. D. 66, 12\u201321 (2010).","journal-title":"Acta Crystallogr. D."},{"key":"BFsrep45389_CR53","unstructured":"Schr\u00f6dinger, LLC. The PyMOL Molecular Graphics System, Version 1.8 (2015)."},{"key":"BFsrep45389_CR54","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1016\/S0022-2836(77)80200-3","volume":"112","author":"FC Bernstein","year":"1977","unstructured":"Bernstein, F. C. et al. The protein data bank: A computer-based archival file for macromolecular structures. J. Mol. Biol. 112, 535\u2013542 (1977).","journal-title":"J. Mol. Biol."},{"key":"BFsrep45389_CR55","doi-asserted-by":"publisher","first-page":"2295","DOI":"10.1093\/nar\/gkn072","volume":"36","author":"J Pei","year":"2008","unstructured":"Pei, J., Kim, B.-H. & Grishin, N. V. PROMALS3D: a tool for multiple protein sequence and structure alignments. Nucleic Acids Res. 36, 2295\u20132300 (2008).","journal-title":"Nucleic Acids Res"},{"key":"BFsrep45389_CR56","doi-asserted-by":"publisher","first-page":"3084","DOI":"10.1021\/ct400341p","volume":"9","author":"DR Roe","year":"2013","unstructured":"Roe, D. R. & Cheatham, T. E. PTRAJ and CPPTRAJ: Software for Processing and Analysis of Molecular Dynamics Trajectory Data. J. Chem. Theory Comput. 9, 3084\u20133095 (2013).","journal-title":"J. Chem. Theory Comput."},{"key":"BFsrep45389_CR57","doi-asserted-by":"publisher","first-page":"1668","DOI":"10.1002\/jcc.20290","volume":"26","author":"DA Case","year":"2005","unstructured":"Case, D. A. et al. The Amber biomolecular simulation programs. J. Comput. Chem. 26, 1668\u20131688 (2005).","journal-title":"J. Comput. Chem."},{"key":"BFsrep45389_CR58","doi-asserted-by":"publisher","first-page":"3543","DOI":"10.1021\/jp4125099","volume":"118","author":"DR Roe","year":"2014","unstructured":"Roe, D. R., Bergonzo, C. & Cheatham, T. E. Evaluation of Enhanced Sampling Provided by Accelerated Molecular Dynamics with Hamiltonian Replica Exchange Methods. J. Phys. Chem. B 118, 3543\u20133552 (2014).","journal-title":"J. Phys. Chem. B"},{"key":"BFsrep45389_CR59","doi-asserted-by":"publisher","first-page":"1041","DOI":"10.1016\/j.bbagen.2014.09.007","volume":"1850","author":"R Galindo-Murillo","year":"2015","unstructured":"Galindo-Murillo, R., Roe, D. R. & Cheatham, T. E. Convergence and reproducibility in molecular dynamics simulations of the DNA duplex d(gcacgaacgaacgaacgc). Biochim. Biophys. Acta 1850, 1041\u20131058 (2015).","journal-title":"Biochim. Biophys. Acta"},{"key":"BFsrep45389_CR60","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/978-1-59745-177-2_5","volume":"443","author":"S Hayward","year":"2008","unstructured":"Hayward, S. & Groot, B. L. Normal Modes and Essential Dynamics. Molecular Modeling of Proteins 443, 89\u2013106 (2008).","journal-title":"Molecular Modeling of Proteins"},{"key":"BFsrep45389_CR61","doi-asserted-by":"publisher","first-page":"3341","DOI":"10.1002\/prot.22841","volume":"78","author":"A Ahmed","year":"2010","unstructured":"Ahmed, A., Villinger, S. & Gohlke, H. Large-scale comparison of protein essential dynamics from molecular dynamics simulations and coarse-grained normal mode analyses. Proteins 78, 3341\u20133352 (2010).","journal-title":"Proteins"},{"key":"BFsrep45389_CR62","doi-asserted-by":"publisher","first-page":"3059","DOI":"10.1093\/nar\/gkf436","volume":"30","author":"K Katoh","year":"2002","unstructured":"Katoh, K. MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform. Nucleic Acids Res. 30, 3059\u20133066 (2002).","journal-title":"Nucleic Acids Res"},{"key":"BFsrep45389_CR63","doi-asserted-by":"publisher","first-page":"5.6.1","DOI":"10.1002\/0471250953.bi0506s47","volume":"54","author":"B Webb","year":"2014","unstructured":"Webb, B. & Sali, A. Comparative Protein Structure Modeling Using MODELLER. Current Protocols in Bioinformatics 54, 5.6.1\u20135.6.32 (2014).","journal-title":"Current Protocols in Bioinformatics"},{"key":"BFsrep45389_CR64","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1006\/jmbi.1998.2401","volume":"285","author":"J Word","year":"1999","unstructured":"Word, J., Lovell, S. C., Richardson, J. S. & Richardson, D. C. Asparagine and glutamine: using hydrogen atom contacts in the choice of side-chain amide orientation. J. Mol. Biol. 285, 1735\u20131747 (1999).","journal-title":"J. Mol. Biol."},{"key":"BFsrep45389_CR65","doi-asserted-by":"publisher","first-page":"926","DOI":"10.1063\/1.445869","volume":"79","author":"WL Jorgensen","year":"1983","unstructured":"Jorgensen, W. L., Chandrasekhar, J., Madura, J. D., Impey, R. W. & Klein, M. L. Comparison of simple potential functions for simulating liquid water. J. Chem. Phys. 79, 926 (1983).","journal-title":"J. Chem. Phys."},{"key":"BFsrep45389_CR66","doi-asserted-by":"crossref","first-page":"712","DOI":"10.1002\/prot.21123","volume":"65","author":"V Hornak","year":"2006","unstructured":"Hornak, V. et al. Comparison of multiple Amber force fields and development of improved protein backbone parameters. Proteins 65, 712\u2013725 (2006).","journal-title":"Proteins"},{"key":"BFsrep45389_CR67","doi-asserted-by":"publisher","first-page":"281","DOI":"10.1007\/s00894-005-0028-4","volume":"12","author":"N Homeyer","year":"2005","unstructured":"Homeyer, N., Horn, A. H. C., Lanig, H. & Sticht, H. AMBER force-field parameters for phosphorylated amino acids in different protonation states: phosphoserine, phosphothreonine, phosphotyrosine, and phosphohistidine. J. Mol. Model. 12, 281\u2013289 (2005).","journal-title":"J. Mol. Model."},{"key":"BFsrep45389_CR68","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1016\/0021-9991(77)90098-5","volume":"23","author":"J-P Ryckaert","year":"1977","unstructured":"Ryckaert, J.-P., Ciccotti, G. & Berendsen, H. J. Numerical integration of the cartesian equations of motion of a system with constraints: molecular dynamics of n-alkanes. J. Comput. Phys. 23, 327\u2013341 (1977).","journal-title":"J. Comput. Phys."},{"key":"BFsrep45389_CR69","doi-asserted-by":"publisher","first-page":"4193","DOI":"10.1021\/ja00119a045","volume":"117","author":"TEI Cheatham","year":"1995","unstructured":"Cheatham, T. E. I., Miller, J. L., Fox, T., Darden, T. A. & Kollman, P. A. Molecular Dynamics Simulations on Solvated Biomolecular Systems: The Particle Mesh Ewald Method Leads to Stable Trajectories of DNA, RNA, and Proteins. J. Am. Chem. Soc. 117, 4193\u20134194 (1995).","journal-title":"J. Am. Chem. Soc."},{"key":"BFsrep45389_CR70","doi-asserted-by":"crossref","first-page":"3684","DOI":"10.1063\/1.448118","volume":"81","author":"HJC Berendsen","year":"1984","unstructured":"Berendsen, H. J. C., Postma, J. P. M., van Gunsteren, W. F., DiNola, A. & Haak, J. R. Molecular dynamics with coupling to an external bath. J. Chem. Phys. 81, 3684 (1984).","journal-title":"J. Chem. Phys."},{"key":"BFsrep45389_CR71","doi-asserted-by":"publisher","first-page":"150","DOI":"10.1002\/prot.1081","volume":"44","author":"DJ Jacobs","year":"2001","unstructured":"Jacobs, D. J., Rader, A. J., Kuhn, L. A. & Thorpe, M. F. Protein flexibility predictions using graph theory. Proteins 44, 150\u2013165 (2001).","journal-title":"Proteins"},{"key":"BFsrep45389_CR72","doi-asserted-by":"publisher","first-page":"187","DOI":"10.1016\/0021-9991(77)90121-8","volume":"23","author":"G Torrie","year":"1977","unstructured":"Torrie, G. & Valleau, J. Nonphysical sampling distributions in Monte Carlo free-energy estimation: Umbrella sampling. J. Comput. Phys. 23, 187\u2013199 (1977).","journal-title":"J. Comput. Phys."},{"key":"BFsrep45389_CR73","doi-asserted-by":"publisher","first-page":"1011","DOI":"10.1002\/jcc.540130812","volume":"13","author":"S Kumar","year":"1992","unstructured":"Kumar, S., Rosenberg, J. M., Bouzida, D., Swendsen, R. H. & Kollman, P. A. THE weighted histogram analysis method for free-energy calculations on biomolecules. I. The method. J. Comput. Chem. 13, 1011\u20131021 (1992).","journal-title":"J. Comput. Chem."}],"container-title":["Scientific Reports"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/srep45389.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/srep45389","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/www.nature.com\/doifinder\/10.1038\/srep45389","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"},{"URL":"https:\/\/www.nature.com\/articles\/srep45389.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,23]],"date-time":"2022-12-23T23:51:27Z","timestamp":1671839487000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/srep45389"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,3,30]]},"references-count":73,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2017,6,19]]}},"alternative-id":["BFsrep45389"],"URL":"https:\/\/doi.org\/10.1038\/srep45389","relation":{},"ISSN":["2045-2322"],"issn-type":[{"value":"2045-2322","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,3,30]]},"assertion":[{"value":"8 December 2016","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 February 2017","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 March 2017","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing financial interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"45389"}} diff --git a/tests/srep45389.json b/tests/srep45389_openalex.json similarity index 100% rename from tests/srep45389.json rename to tests/srep45389_openalex.json diff --git a/tests/test_abstract_processor.py b/tests/test_abstract_processor.py index c0ca6c0..16971f8 100644 --- a/tests/test_abstract_processor.py +++ b/tests/test_abstract_processor.py @@ -204,3 +204,173 @@ class TestAbstractProcessor: mock_crossref.assert_not_called() mock_openalex.assert_called_once() assert result2.source == "openalex" + + def test_custom_license_console_output(self): + """Test console output for custom licenses without names""" + # Create a custom license without a name + custom_license = License(name="", uri="http://custom.license", short="custom") + + with patch.object( + self.processor, "_get_openalex_abstract", return_value="OpenAlex text" + ): + with patch.object(self.processor.console, "print") as mock_print: + result = self.processor.get_abstract("10.1234/test", {}, custom_license) + + # Should print custom license message + mock_print.assert_called() + # Check that it mentions "Custom license" + call_args = mock_print.call_args[0][0] + assert "Custom license does not allow derivative works" in call_args + assert result.source == "openalex" + + def test_crossref_api_failure(self): + """Test _get_crossref_abstract when API call fails""" + from unittest.mock import Mock + + # Mock API response failure + mock_response = Mock() + mock_response.status_code = 404 + + with patch.object( + self.processor.api_client, "make_request", return_value=mock_response + ): + result = self.processor._get_crossref_abstract("10.1234/test") + assert result is None + + # Test with no response + with patch.object(self.processor.api_client, "make_request", return_value=None): + result = self.processor._get_crossref_abstract("10.1234/test") + assert result is None + + def test_get_openalex_abstract_no_inverted_index(self): + """Test _get_openalex_abstract when no abstract_inverted_index exists""" + data = {"title": "Test Article"} # No abstract_inverted_index + + result = self.processor._get_openalex_abstract(data) + assert result is None + + def test_clean_jats_comprehensive(self): + """Test _clean_jats method with various JATS tags""" + # Test with None input + result = self.processor._clean_jats(None) + assert result == "" + + # Test with empty string + result = self.processor._clean_jats("") + assert result == "" + + # Test with ordered list + jats_text = 'First itemSecond item' + expected = "
  1. First item
  2. Second item
" + result = self.processor._clean_jats(jats_text) + assert result == expected + + # Test with unordered list + jats_text = 'Bullet oneBullet two' + expected = "
  • Bullet one
  • Bullet two
" + result = self.processor._clean_jats(jats_text) + assert result == expected + + # Test with mixed formatting tags + jats_text = "This is italic and bold text with superscript and subscript." + expected = "

This is italic and bold text with superscript and subscript.

" + result = self.processor._clean_jats(jats_text) + assert result == expected + + # Test with other formatting tags + jats_text = "Underlined Code Small caps" + expected = "Underlined Code Small caps" + result = self.processor._clean_jats(jats_text) + assert result == expected + + # Test with title and blockquote + jats_text = "Section TitleThis is a quote" + expected = "

Section Title

This is a quote
" + result = self.processor._clean_jats(jats_text) + assert result == expected + + def test_no_abstract_found_console_messages(self): + """Test console messages when no abstract is found""" + license_obj = create_license_from_map("cc-by-nd") # No derivative allowed + + with patch.object(self.processor, "_get_openalex_abstract", return_value=None): + with patch.object(self.processor.console, "print") as mock_print: + result = self.processor.get_abstract("10.1234/test", {}, license_obj) + + # Should print warning messages + assert mock_print.call_count >= 2 + + # Check for specific warning messages + call_messages = [call[0][0] for call in mock_print.call_args_list] + assert any( + "No abstract found in OpenAlex!" in msg for msg in call_messages + ) + assert any( + "No abstract found in either CrossRef nor OpenAlex!" in msg + for msg in call_messages + ) + + assert result.text == "" + assert result.source == "none" + + def test_crossref_abstract_with_real_data(self, crossref_data): + """Test CrossRef abstract extraction using real CrossRef data""" + from http import HTTPStatus + from unittest.mock import Mock + + # Mock successful API response with real data + mock_response = Mock() + mock_response.status_code = HTTPStatus.OK + mock_response.json.return_value = crossref_data + + # Extract DOI from CrossRef data since we're using other values from the response + expected_doi = crossref_data["message"]["DOI"] + + with patch.object( + self.processor.api_client, "make_request", return_value=mock_response + ): + result = self.processor._get_crossref_abstract(expected_doi) + + # Should successfully extract and clean the abstract + assert result is not None + assert len(result) > 0 + + # Check that JATS tags were converted to HTML + assert "

" in result # JATS paragraphs converted + assert "" in result # JATS italic converted + assert "" in result # JATS subscript converted + assert "jats:" not in result # No JATS tags should remain + + def test_jats_cleaning_comprehensive_real_data(self, crossref_data): + """Test JATS cleaning with real CrossRef abstract data""" + + raw_abstract = crossref_data["message"]["abstract"] + + # Clean the JATS tags + cleaned = self.processor._clean_jats(raw_abstract) + + # Verify specific transformations from the real data + assert "" not in cleaned + assert "

" in cleaned # Title should be converted + assert "" not in cleaned + assert "

" in cleaned # Paragraphs should be converted + assert "" not in cleaned + assert "" in cleaned # Subscripts should be converted + assert "" not in cleaned + assert "" in cleaned # Italics should be converted + + # Ensure the content is preserved by checking for specific content from the abstract + assert "pyruvate phosphate dikinase" in cleaned.lower() + assert "Abstract" in cleaned + + def test_openalex_abstract_reconstruction_with_real_data(self, openalex_data): + """Test OpenAlex abstract reconstruction using real inverted index data""" + + # Extract the abstract using the inverted index + result = self.processor._get_openalex_abstract(openalex_data) + + if result: # Only test if there's an abstract in the data + assert isinstance(result, str) + assert len(result) > 0 + # Should be reconstructed from word positions + assert " " in result # Should have spaces between words diff --git a/tests/test_api_client.py b/tests/test_api_client.py index 2cc6881..aea461a 100644 --- a/tests/test_api_client.py +++ b/tests/test_api_client.py @@ -428,3 +428,101 @@ class TestAPIClientUsageScenarios: assert "X-Dataverse-key" in client.session.headers assert "Custom-Header" in client.session.headers assert client.session.headers["Custom-Header"] == "custom-value" + + +def test_api_response_structure_processing(openalex_data): + """Test API client processes complex nested response structures correctly.""" + client = APIClient() + + with patch.object(client.session, "request") as mock_request: + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = openalex_data + mock_request.return_value = mock_response + + response = client.make_request("https://api.openalex.org/works/test") + + assert response is not None + data = response.json() + + # Test that nested structures are preserved through the request pipeline + if "authorships" in data: + assert isinstance(data["authorships"], list) + # Test deep nesting preservation + for authorship in data["authorships"]: + if "institutions" in authorship: + assert isinstance(authorship["institutions"], list) + + # Test data type preservation through JSON serialization/deserialization + for key, value in data.items(): + assert value is not None or key in [ + "abstract_inverted_index", + "abstract_inverted_index_v3", + ] # Some fields can legitimately be None + + +def test_api_unicode_encoding_processing(openalex_data): + """Test API client correctly processes Unicode characters in responses.""" + client = APIClient() + + with patch.object(client.session, "request") as mock_request: + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = openalex_data + mock_response.encoding = "utf-8" + mock_request.return_value = mock_response + + response = client.make_request("https://api.openalex.org/works/test") + + assert response is not None + data = response.json() + + # Test that Unicode characters are preserved through processing pipeline + def check_unicode_preservation(obj): + if isinstance(obj, str): + # Should preserve Unicode characters + try: + obj.encode("utf-8") + return True + except UnicodeEncodeError: + return False + elif isinstance(obj, dict): + return all(check_unicode_preservation(v) for v in obj.values()) + elif isinstance(obj, list): + return all(check_unicode_preservation(item) for item in obj) + return True + + assert check_unicode_preservation(data) + + +def test_large_response_processing_efficiency(openalex_data): + """Test API client efficiently processes large response payloads.""" + client = APIClient() + + # Create large response based on real structure + large_data = dict(openalex_data) + if "referenced_works" in large_data: + # Extend existing referenced works + base_works = ( + large_data["referenced_works"][:10] + if large_data["referenced_works"] + else [] + ) + large_data["referenced_works"] = base_works * 100 # Create large list + + with patch.object(client.session, "request") as mock_request: + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = large_data + mock_request.return_value = mock_response + + response = client.make_request("https://api.openalex.org/works/test") + + assert response is not None + data = response.json() + + # Verify large data structures are handled correctly + if "referenced_works" in data: + assert len(data["referenced_works"]) > 100 + # All elements should maintain structure integrity + assert all(isinstance(work, str) for work in data["referenced_works"]) diff --git a/tests/test_citation_builder.py b/tests/test_citation_builder.py index b664bf7..49d8b53 100644 --- a/tests/test_citation_builder.py +++ b/tests/test_citation_builder.py @@ -1,18 +1,8 @@ -import json -import os - import pytest from doi2dataset import CitationBuilder, Person, PIFinder - -@pytest.fixture -def openalex_data(): - """Load the saved JSON response from the file 'srep45389.json'""" - json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") - with open(json_path, encoding="utf-8") as f: - data = json.load(f) - return data +# openalex_data fixture now comes from conftest.py @pytest.fixture @@ -169,3 +159,113 @@ def test_build_authors_with_ror(openalex_data, pi_finder): assert "@type" in expanded_value assert expanded_value["@type"] == "https://schema.org/Organization" + + +def test_build_authors_with_real_data(openalex_data, pi_finder): + """Test author building with real OpenAlex data structure""" + doi = openalex_data["doi"].replace("https://doi.org/", "") + builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) + + authors, corresponding = builder.build_authors() + + # Should have multiple authors from the real data + assert len(authors) > 0 + + # Extract expected author names from the API response data + expected_authors = [] + for authorship in openalex_data.get("authorships", []): + if "author" in authorship and "display_name" in authorship["author"]: + expected_authors.append(authorship["author"]["display_name"]) + + # Check that real author names from API response are processed correctly + author_names = [f"{author.given_name} {author.family_name}" for author in authors] + + # Verify that at least some expected authors from the API response are found + found_authors = 0 + for expected_name in expected_authors: + if any(expected_name in author_name for author_name in author_names): + found_authors += 1 + + # Should find at least some authors from the API response + assert ( + found_authors > 0 + ), f"No expected authors found. Expected: {expected_authors}, Got: {author_names}" + + +def test_process_author_edge_cases(pi_finder): + """Test _process_author with various edge cases""" + builder = CitationBuilder( + data={"authorships": []}, doi="10.1000/test", pi_finder=pi_finder + ) + + # Test with minimal author data + minimal_author = {"display_name": "John Smith"} + empty_authorship = {} + person = builder._process_author(minimal_author, empty_authorship) + assert person.given_name == "John" + assert person.family_name == "Smith" + + # Test with ORCID + author_with_orcid = { + "display_name": "Jane Doe", + "orcid": "https://orcid.org/0000-0000-0000-0000", + } + person = builder._process_author(author_with_orcid, empty_authorship) + assert person.orcid == "0000-0000-0000-0000" # URL part is stripped + + +def test_build_grants_with_default_config(pi_finder): + """Test that grants include default grants from config""" + # Use real data structure but focus on grants behavior + data = {"authorships": [], "grants": []} + + builder = CitationBuilder(data=data, doi="10.1000/test", pi_finder=pi_finder) + grants = builder.build_grants() + + # Should have at least the default grants from config + # The exact number depends on the config, but should be >= 0 + assert isinstance(grants, list) + for grant in grants: + assert len(grant) == 2 # Should have agency and value fields + assert grant[0].name == "grantNumberAgency" + assert grant[1].name == "grantNumberValue" + + +def test_process_corresponding_author_no_email(pi_finder): + """Test _process_corresponding_author when no email is available""" + builder = CitationBuilder( + data={"authorships": []}, doi="10.1000/test", pi_finder=pi_finder + ) + + # Create a Person without email + person = Person( + given_name="John", family_name="Doe", orcid=None, email=None, affiliation=None + ) + + authorship = {"is_corresponding": True} + + result = builder._process_corresponding_author(person, authorship) + + # Should return None when no email is available + assert result is None + + +def test_build_authors_skip_empty_authorships(pi_finder): + """Test that empty author entries are skipped""" + data_with_empty_authors = { + "authorships": [ + {"author": {}}, # Empty author + {}, # No author key + {"author": {"display_name": "John Doe"}}, # Valid author + ] + } + + builder = CitationBuilder( + data=data_with_empty_authors, doi="10.1000/test", pi_finder=pi_finder + ) + authors, corresponding = builder.build_authors() + + # Should only process the one valid author + assert len(authors) == 1 + assert authors[0].given_name == "John" + assert authors[0].family_name == "Doe" diff --git a/tests/test_integration.py b/tests/test_integration.py index 820441f..c62f27e 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,4 +1,3 @@ -import json import os from unittest.mock import patch @@ -44,27 +43,15 @@ def load_config_test(): Config.load_config(config_path=config_path) -@pytest.fixture -def fake_openalex_response(): - """ - Load the saved JSON response from the file 'srep45389.json' - located in the same directory as this test file. - """ - json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") - with open(json_path, encoding="utf-8") as f: - data = json.load(f) - return data - - -def test_fetch_doi_data_with_file(mocker, fake_openalex_response): +def test_fetch_doi_data_with_file(mocker, openalex_data): """ Test fetching DOI metadata by simulating the API call with a locally saved JSON response. The APIClient.make_request method is patched to return a fake response built from the contents of 'srep45389.json', ensuring that the configuration is loaded from 'config_test.yaml'. """ - doi = "10.1038/srep45389" - fake_response = FakeResponse(fake_openalex_response, 200) + doi = openalex_data["doi"].replace("https://doi.org/", "") + fake_response = FakeResponse(openalex_data, 200) # Patch the make_request method of APIClient to return our fake_response. mocker.patch("doi2dataset.APIClient.make_request", return_value=fake_response) @@ -75,11 +62,11 @@ def test_fetch_doi_data_with_file(mocker, fake_openalex_response): # Call _fetch_data(), which should now return our fake JSON data. data = processor._fetch_data() - # Verify that the fetched data matches the fake JSON data. - assert data == fake_openalex_response + # Verify that the fetched data matches the OpenAlex data. + assert data == openalex_data -def test_openalex_abstract_extraction(mocker, fake_openalex_response): +def test_openalex_abstract_extraction(openalex_data): """Test the extraction of abstracts from OpenAlex inverted index data.""" # Create API client for AbstractProcessor api_client = APIClient() @@ -88,20 +75,20 @@ def test_openalex_abstract_extraction(mocker, fake_openalex_response): processor = AbstractProcessor(api_client=api_client) # Call the protected method directly with the fake response - abstract_text = processor._get_openalex_abstract(fake_openalex_response) + result = processor._get_openalex_abstract(openalex_data) # Verify abstract was extracted - assert abstract_text is not None + assert result is not None # If abstract exists in the response, it should be properly extracted - if "abstract_inverted_index" in fake_openalex_response: - assert len(abstract_text) > 0 + if "abstract_inverted_index" in openalex_data: + assert len(result) > 0 -def test_subject_mapper(fake_openalex_response): +def test_subject_mapper(openalex_data): """Test that the SubjectMapper correctly maps OpenAlex topics to subjects.""" # Extract topics from the OpenAlex response - topics = fake_openalex_response.get("topics", []) + topics = openalex_data.get("topics", []) # Get subjects using the class method subjects = SubjectMapper.get_subjects({"topics": topics}) @@ -111,15 +98,15 @@ def test_subject_mapper(fake_openalex_response): assert isinstance(subjects, list) -def test_citation_builder(fake_openalex_response): +def test_citation_builder(openalex_data): """Test that the CitationBuilder correctly builds author information.""" - doi = "10.1038/srep45389" + doi = openalex_data["doi"].replace("https://doi.org/", "") # Mock PIFinder with an empty list of PIs pi_finder = PIFinder(pis=[]) # Create builder with required arguments - builder = CitationBuilder(data=fake_openalex_response, doi=doi, pi_finder=pi_finder) + builder = CitationBuilder(data=openalex_data, doi=doi, pi_finder=pi_finder) # Test building other IDs other_ids = builder.build_other_ids() @@ -134,12 +121,10 @@ def test_citation_builder(fake_openalex_response): assert isinstance(topics, list) -def test_license_processor(fake_openalex_response): +def test_license_processor(openalex_data): """Test that the LicenseProcessor correctly identifies and processes licenses.""" # Create a simplified data structure that contains license info - license_data = { - "primary_location": fake_openalex_response.get("primary_location", {}) - } + license_data = {"primary_location": openalex_data.get("primary_location", {})} # Process the license license_obj = LicenseProcessor.process_license(license_data) @@ -182,14 +167,14 @@ def test_config_load_invalid_path(): Config.load_config(config_path=invalid_path) -def test_metadata_processor_fetch_data(mocker, fake_openalex_response): +def test_metadata_processor_fetch_data(mocker, openalex_data): """Test the _fetch_data method of the MetadataProcessor class with mocked responses.""" - doi = "10.1038/srep45389" + doi = openalex_data["doi"].replace("https://doi.org/", "") # Mock API response mocker.patch( "doi2dataset.APIClient.make_request", - return_value=FakeResponse(fake_openalex_response, 200), + return_value=FakeResponse(openalex_data, 200), ) # Create processor with upload disabled and progress disabled @@ -200,7 +185,7 @@ def test_metadata_processor_fetch_data(mocker, fake_openalex_response): # Verify that data was fetched correctly assert data is not None - assert data == fake_openalex_response + assert data == openalex_data # Verify the DOI is correctly stored assert processor.doi == doi diff --git a/tests/test_license_processor.py b/tests/test_license_processor.py index ff9a164..c972fe2 100644 --- a/tests/test_license_processor.py +++ b/tests/test_license_processor.py @@ -102,3 +102,82 @@ def test_derivative_allowed_licenses_set_completeness(): """Test that DERIVATIVE_ALLOWED_LICENSES contains expected licenses""" expected_licenses = {"cc-by", "cc-by-sa", "cc-by-nc", "cc-by-nc-sa", "cc0", "pd"} assert DERIVATIVE_ALLOWED_LICENSES == expected_licenses + + +def test_license_processing_with_real_openalex_structure(openalex_data): + """Test that license processor correctly handles real OpenAlex data structure.""" + # Process license data exactly as the real application would + license_obj = LicenseProcessor.process_license(openalex_data) + + # Verify the processing logic works with real data structure + assert isinstance(license_obj, License) + assert hasattr(license_obj, "short") + assert hasattr(license_obj, "name") + assert hasattr(license_obj, "uri") + + # Test derivative permission logic with real license + if license_obj.short in DERIVATIVE_ALLOWED_LICENSES: + # Should be able to use CrossRef abstract + assert license_obj.short in [ + "cc-by", + "cc-by-sa", + "cc-by-nc", + "cc-by-nc-sa", + "cc0", + "pd", + ] + else: + # Should use OpenAlex abstract reconstruction + assert license_obj.short not in DERIVATIVE_ALLOWED_LICENSES + + +def test_license_processing_with_multiple_locations(openalex_data): + """Test license processing logic with multiple publication locations.""" + # Process all locations like the real application might encounter + locations = openalex_data.get("locations", []) + + processed_licenses = [] + for location in locations: + # Create data structure as it would appear from API + location_data = {"primary_location": location} + license_obj = LicenseProcessor.process_license(location_data) + processed_licenses.append(license_obj) + + # Verify processing logic works for all location types + assert len(processed_licenses) > 0 + assert all(isinstance(lic, License) for lic in processed_licenses) + + # Should handle various license states consistently + for license_obj in processed_licenses: + if license_obj.short != "unknown": + assert ( + license_obj.short in DERIVATIVE_ALLOWED_LICENSES + or license_obj.short not in DERIVATIVE_ALLOWED_LICENSES + ) + + +def test_crossref_license_url_mapping_logic(crossref_data): + """Test license URL to short-form mapping logic with real CrossRef data.""" + # Extract license information as the real application would + crossref_licenses = crossref_data.get("message", {}).get("license", []) + + if crossref_licenses: + license_url = crossref_licenses[0].get("URL", "") + + # Test the mapping logic that would be used in practice + from doi2dataset import LICENSE_MAP + + # Find corresponding short form by URL matching + matching_short = None + for short, (uri, _name) in LICENSE_MAP.items(): + if uri == license_url: + matching_short = short + break + + if matching_short: + # Test that our license processor handles this correctly + test_data = {"primary_location": {"license": matching_short}} + license_obj = LicenseProcessor.process_license(test_data) + + assert license_obj.short == matching_short + assert license_obj.uri == license_url diff --git a/tests/test_metadata_processor.py b/tests/test_metadata_processor.py index 2168699..ab0ae89 100644 --- a/tests/test_metadata_processor.py +++ b/tests/test_metadata_processor.py @@ -1,5 +1,4 @@ import json -import os import tempfile from http import HTTPStatus from pathlib import Path @@ -9,14 +8,7 @@ import pytest from doi2dataset import MetadataProcessor - -@pytest.fixture -def openalex_data(): - """Load the saved JSON response from the file 'srep45389.json'""" - json_path = os.path.join(os.path.dirname(__file__), "srep45389.json") - with open(json_path, encoding="utf-8") as f: - data = json.load(f) - return data +# openalex_data fixture now comes from conftest.py @pytest.fixture @@ -444,6 +436,175 @@ class TestMetadataProcessorErrorHandling: with pytest.raises(KeyError, match="Missing required field"): processor.process() + def test_update_progress_with_progress_bar(self): + """Test progress update when progress bar is enabled.""" + processor = MetadataProcessor( + doi="10.1000/test", output_path=Path("/tmp/test.json"), progress=True + ) + processor.console = MagicMock() + + # Mock progress bar + mock_progress = MagicMock() + processor.progress = mock_progress + processor.task_id = "test_task_id" + + processor._update_progress() + + # Verify progress.advance was called + mock_progress.advance.assert_called_once_with("test_task_id") + + def test_update_progress_without_progress_bar(self): + """Test progress update when progress bar is disabled.""" + processor = MetadataProcessor( + doi="10.1000/test", output_path=Path("/tmp/test.json"), progress=False + ) + processor.console = MagicMock() + + # No progress bar set + processor.progress = None + processor.task_id = None + + # Should not raise any errors + processor._update_progress() + + @patch("doi2dataset.processing.metadata.APIClient") + def test_upload_success_with_persistent_id(self, mock_api_client_class): + """Test successful upload with persistent ID response.""" + import os + + from doi2dataset import Config + + # Load test config + config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml") + Config.load_config(config_path=config_path) + + # Mock the APIClient instance and response + mock_client = Mock() + mock_response = Mock() + mock_response.status_code = 201 # Success status for upload + mock_response.json.return_value = { + "data": {"persistentId": "doi:10.7910/DVN/TEST123"} + } + mock_client.make_request.return_value = mock_response + mock_api_client_class.return_value = mock_client + + processor = MetadataProcessor( + doi="10.1000/test", output_path=Path("/tmp/test.json"), upload=True + ) + processor.console = MagicMock() + + metadata = {"datasetVersion": {"files": []}} + result = processor._upload_data(metadata) + + # Verify successful response handling + assert result["data"]["persistentId"] == "doi:10.7910/DVN/TEST123" + processor.console.print.assert_called() + + @patch("doi2dataset.processing.metadata.APIClient") + def test_upload_success_console_output(self, mock_api_client_class): + """Test console output during successful upload.""" + import os + from unittest.mock import Mock + + from doi2dataset import Config + + # Load test config + config_path = os.path.join(os.path.dirname(__file__), "config_test.yaml") + Config.load_config(config_path=config_path) + + # Mock the APIClient instance and response + mock_client = Mock() + mock_response = Mock() + mock_response.status_code = 201 # Success status for upload + mock_response.json.return_value = { + "data": {"persistentId": "doi:10.7910/DVN/TEST123"} + } + mock_client.make_request.return_value = mock_response + mock_api_client_class.return_value = mock_client + + processor = MetadataProcessor( + doi="10.1000/test", output_path=Path("/tmp/test.json"), upload=True + ) + processor.console = MagicMock() + + metadata = {"datasetVersion": {"files": []}} + processor._upload_data(metadata) + + # Verify successful upload message was printed + processor.console.print.assert_called() + call_args = [call[0][0] for call in processor.console.print.call_args_list] + upload_message = next( + (msg for msg in call_args if "Dataset uploaded to:" in msg), None + ) + assert upload_message is not None + assert "TEST123" in upload_message + + def test_progress_update_integration(self): + """Test progress updates during complete processing workflow.""" + from unittest.mock import patch + + # Mock all external dependencies + mock_data = {"title": "Test Paper", "authorships": []} + + with patch( + "doi2dataset.processing.metadata.MetadataProcessor._fetch_data", + return_value=mock_data, + ): + with patch( + "doi2dataset.processing.metadata.MetadataProcessor._build_metadata", + return_value={"test": "metadata"}, + ): + with patch( + "doi2dataset.processing.metadata.MetadataProcessor._save_output" + ): + processor = MetadataProcessor( + doi="10.1000/test", + output_path=Path("/tmp/test.json"), + progress=True, + ) + processor.console = MagicMock() + + # Mock progress bar + mock_progress = MagicMock() + processor.progress = mock_progress + processor.task_id = "test_task" + + # Process should call _update_progress multiple times + processor.process() + + # Verify progress was advanced multiple times (fetch, build, save) + assert mock_progress.advance.call_count >= 3 + for call in mock_progress.advance.call_args_list: + assert call[0][0] == "test_task" + + def test_fetch_data_with_real_structure(self, openalex_data): + """Test _fetch_data method with realistic OpenAlex response structure.""" + from http import HTTPStatus + from unittest.mock import Mock, patch + + mock_client = Mock() + mock_response = Mock() + mock_response.status_code = HTTPStatus.OK + mock_response.json.return_value = openalex_data + # Test fetch_data with real structure + mock_client.make_request.return_value = mock_response + + with patch( + "doi2dataset.processing.metadata.APIClient", return_value=mock_client + ): + processor = MetadataProcessor( + doi="10.1038/srep45389", output_path=Path("/tmp/test.json") + ) + processor.console = MagicMock() + + result = processor._fetch_data() + + # Verify we got the expected data structure + assert result == openalex_data + assert "title" in result + assert "authorships" in result + assert "publication_date" in result + def test_partial_data(self): """Test handling of incomplete API responses.""" with patch( diff --git a/tests/test_models.py b/tests/test_models.py index 61e081d..391abc5 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -84,3 +84,81 @@ def test_person_to_dict_with_no_affiliation(): assert result["family_name"] == "Green" assert result["given_name"] == "Alice" assert result["orcid"] == "0000-0002-1111-2222" + + +def test_person_creation_from_real_authorship_data(openalex_data): + """Test Person creation by processing real OpenAlex authorship data.""" + from doi2dataset.utils.validation import split_name + + # Process first authorship like the real application would + first_authorship = openalex_data["authorships"][0] + author_data = first_authorship["author"] + + # Extract display_name and process it like CitationBuilder does + display_name = author_data.get("display_name", "") + given_name, family_name = split_name(display_name) + + # Extract ORCID and clean it like the real application + orcid = author_data.get("orcid") + if orcid and "orcid.org/" in orcid: + orcid = orcid.split("orcid.org/")[-1] + + person = Person( + family_name=family_name, + given_name=given_name, + orcid=orcid, + email=None, + affiliation=None, + ) + + # Verify the processing worked correctly + assert person.family_name != "" + assert person.given_name != "" + if orcid: + assert len(person.orcid) == 19 # ORCID format: 0000-0000-0000-0000 + + +def test_institution_processing_from_real_data(openalex_data): + """Test Institution creation by processing real OpenAlex institution data.""" + # Process first institution like the real application would + first_authorship = openalex_data["authorships"][0] + institution_data = first_authorship["institutions"][0] + + # Extract and process data like CitationBuilder does + display_name = institution_data.get("display_name", "") + ror = institution_data.get("ror", "") + + institution = Institution(display_name=display_name, ror=ror) + + # Test that processing preserves essential functionality + assert len(institution.display_name) > 0 + if ror: + assert ror.startswith("https://ror.org/") + affiliation_field = institution.affiliation_field() + assert affiliation_field.value == ror + assert affiliation_field.expanded_value["termName"] == display_name + + +def test_multiple_institutions_processing(openalex_data): + """Test processing multiple institutions from real authorship data.""" + institutions_created = [] + + # Process all institutions like the real application would + for authorship in openalex_data["authorships"]: + for institution_data in authorship.get("institutions", []): + display_name = institution_data.get("display_name", "") + ror = institution_data.get("ror", "") + + if display_name: # Only create if there's actual data + institution = Institution(display_name=display_name, ror=ror) + institutions_created.append(institution) + + # Verify we processed multiple institutions successfully + assert len(institutions_created) > 0 + + # All should have valid display names + assert all(len(inst.display_name) > 0 for inst in institutions_created) + + # Some should have ROR IDs (based on real data) + ror_institutions = [inst for inst in institutions_created if inst.ror] + assert len(ror_institutions) > 0 diff --git a/tests/test_publication_utils.py b/tests/test_publication_utils.py index 40b506e..d639f80 100644 --- a/tests/test_publication_utils.py +++ b/tests/test_publication_utils.py @@ -29,6 +29,86 @@ def test_get_publication_year_with_date(metadata_processor): assert year == "" +def test_publication_year_processing_logic(openalex_data): + """Test publication year extraction logic with real OpenAlex data structure.""" + doi = openalex_data["doi"].replace("https://doi.org/", "") + processor = MetadataProcessor(doi=doi, upload=False, progress=False) + processor.console = MagicMock() + + # Test the actual processing logic used by the application + year = processor._get_publication_year(openalex_data) + + # Verify the processing logic works (should prefer publication_year field) + assert isinstance(year, int) + assert year > 1900 # Reasonable publication year + assert year <= 2030 # Not future date + + +def test_doi_validation_processing_pipeline(openalex_data): + """Test DOI processing pipeline with real OpenAlex DOI format.""" + from doi2dataset.utils.validation import normalize_doi, validate_doi + + # Extract DOI as the real application would + doi_from_data = openalex_data.get("doi", "") + + # Process DOI through the same pipeline as real application + if doi_from_data.startswith("https://doi.org/"): + clean_doi = doi_from_data.replace("https://doi.org/", "") + else: + clean_doi = doi_from_data + + # Test validation and normalization logic + is_valid = validate_doi(clean_doi) + normalized = normalize_doi(clean_doi) + + assert is_valid is True + assert normalized.startswith("10.") + assert len(normalized.split("/")) == 2 # Should have registrant/suffix format + + +def test_subject_mapping_processing_logic(openalex_data): + """Test subject mapping logic with real OpenAlex topics structure.""" + from doi2dataset import SubjectMapper + + # Process topics exactly as the real application would + topics = openalex_data.get("topics", []) + + # Test SubjectMapper processing logic + subjects = SubjectMapper.get_subjects({"topics": topics}) + + # Verify the mapping logic produces valid results + assert isinstance(subjects, list) + + # If we have topics, we should get subjects + if topics: + assert len(subjects) > 0 + # Each subject should be a string + assert all(isinstance(subj, str) for subj in subjects) + + +def test_abstract_reconstruction_processing(openalex_data): + """Test abstract reconstruction logic with real inverted index data.""" + from doi2dataset.api.client import APIClient + from doi2dataset.api.processors import AbstractProcessor + + # Test the actual reconstruction logic used in the application + processor = AbstractProcessor(APIClient()) + + # Process abstract inverted index as the real application would + reconstructed = processor._get_openalex_abstract(openalex_data) + + if openalex_data.get("abstract_inverted_index"): + # Should successfully reconstruct abstract + assert reconstructed is not None + assert isinstance(reconstructed, str) + assert len(reconstructed) > 0 + # Should contain readable text with spaces + assert " " in reconstructed + else: + # Should handle missing abstract gracefully + assert reconstructed is None + + def test_get_publication_year_with_both_fields(metadata_processor): """Test that _get_publication_year prioritizes publication_year over date""" data = {"publication_year": 2020, "publication_date": "2019-05-15"} diff --git a/tests/test_validation_utils.py b/tests/test_validation_utils.py index cd83064..ffcae4a 100644 --- a/tests/test_validation_utils.py +++ b/tests/test_validation_utils.py @@ -188,6 +188,47 @@ def test_validate_email_validator_error(): assert result is False +@patch("dns.resolver.resolve") +def test_validate_email_dns_exceptions(mock_resolve): + """Test email validation with DNS-related exceptions.""" + # Test with mocked DNS resolver raising various exceptions + with patch("email_validator.validate_email") as mock_validate: + mock_result = Mock() + mock_result.normalized = "test@example.com" + mock_validate.return_value = mock_result + + # Test with NoAnswer exception + mock_resolve.side_effect = dns.resolver.NoAnswer() + result = validate_email_address("test@example.com") + assert result is False + + # Test with NXDOMAIN exception + mock_resolve.side_effect = dns.resolver.NXDOMAIN() + result = validate_email_address("test@example.com") + assert result is False + + +def test_validate_email_validator_exceptions(): + """Test email validation with email_validator exceptions.""" + # Test email validator error + with patch("email_validator.validate_email") as mock_validate: + mock_validate.side_effect = EmailNotValidError("Invalid format") + result = validate_email_address("invalid-email") + assert result is False + + # Test with various malformed emails that should fail validation + invalid_emails = [ + "plainaddress", + "@missingusername.com", + "username@.com", + "username@com", + "username..double.dot@example.com", + ] + + for email in invalid_emails: + assert validate_email_address(email) is False + + # DOI validation edge cases def test_validate_doi_formats(): """Test validation of various valid DOI formats."""