diff --git a/doi2dataset.py b/doi2dataset.py index 6d5d694..7d23409 100755 --- a/doi2dataset.py +++ b/doi2dataset.py @@ -866,6 +866,23 @@ class AbstractProcessor: if not text: return "" + # Handle list tags with sequential processing to avoid duplicate keys + # Process ordered lists first - replace both opening and closing tags + text = text.replace('', "
    ") + # Find and replace closing tags for ordered lists + import re + + # Replace closing tags that follow ordered list openings + # This regex matches that comes after
      tags + pattern = r'(
        .*?)' + text = re.sub(pattern, r'\1
      ', text, flags=re.DOTALL) + + # Process unordered lists second + text = text.replace('', "
        ") + # Replace remaining tags as unordered list closings + text = text.replace('', '
      ') + + # Handle other JATS tags replacements = { "": "", "": "", @@ -885,10 +902,6 @@ class AbstractProcessor: "": "

      ", "": "

      ", "": "

      ", - '': "
        ", - "": "
      ", - '': "
        ", - "": "
      ", "": "
    1. ", "": "
    2. ", "": "
      ",