From 457d4afaece821a5881e91694db78ec6fa310240 Mon Sep 17 00:00:00 2001 From: Joel Kronqvist Date: Sat, 31 Jan 2026 20:12:54 +0200 Subject: feat: menu parsing --- src/__init__.py | 0 src/menu/__init__.py | 0 src/menu/parse.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100755 src/__init__.py create mode 100755 src/menu/__init__.py create mode 100755 src/menu/parse.py (limited to 'src') diff --git a/src/__init__.py b/src/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/menu/__init__.py b/src/menu/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/src/menu/parse.py b/src/menu/parse.py new file mode 100755 index 0000000..fb45099 --- /dev/null +++ b/src/menu/parse.py @@ -0,0 +1,104 @@ + +import datetime +import xml.etree.ElementTree as ET +import re + + + +def parse(xml_menu: str) -> dict[datetime.date, str]: + """Parse the given xml menu and return a (date -> entry) map.""" + + root = ET.fromstring(xml_menu) + + item_nodes: list[ET.Element] = root.findall("./channel/item") + + + def getchildren(node: ET.Element) -> tuple[ET.Element, ET.Element]: + title = node.find("title") + desc = node.find("description") + if title is None or desc is None: + raise ValueError("title and description of item should be defined") + return (title, desc) + + item_children: map[tuple[ET.Element, ET.Element]] = map(getchildren, item_nodes) + + def gettext(node: ET.Element) -> str: + text = node.text + if text is None: + raise ValueError(f"{node.tag} should contain text") + return text + items = map(lambda item: (parse_weekday_date(gettext(item[0])), gettext(item[1])), item_children) + + return dict(items) + + +def parse_from_file(xml_file_path: str) -> dict[datetime.date, str]: + """Parse the menu from the given xml file and return a (date -> entry) map. + + Throws an exception if file reading is not succesful.""" + + with open(xml_file_path, 'r', encoding="utf-8") as f: + xml_menu = f.read() + + return parse(xml_menu) + + +def parse_weekday_date(date: str) -> datetime.date: + """Parse the given date of the form 'WD DD.MM.YYYY' and return the corresponding date object. + + The weekday should be in finnish format. + + >>> parse_weekday_date("su 26.1.2026") + datetime.date(2026, 1, 26) + + Returns a string indicating an error if no weekday + is present or if the date is of the wrong format. + + >>> parse_weekday_date("mon 16.2.2026") + 'päiväyksen ensimmäinen sanan pitäisi olla viikonpäivän lyhenne' + + Parsing the date after the weekday is done as in `parse_date`. + """ + + (head, tail) = date.split(maxsplit = 1) + + if head not in ["ma", "ti", "ke", "to", "pe", "la", "su"]: + raise ValueError("päiväyksen ensimmäinen sanan pitäisi olla viikonpäivän lyhenne") + + return parse_date(tail) + + +def parse_date(date: str) -> datetime.date: + """Parse the given date of the form 'DD.MM.YYYY' and return the corresponding date object. + + >>> parse_date("16.2.2026") + datetime.date(2026, 2, 16) + + Returns a string indicating an error if the date is of the wrong format. + >>> parse_date("bogus") + 'päiväyksen tulee olla muotoa PP.KK.VVVV' + + Nonexistent dates are handled: + >>> parse_date("31.2.2024") + 'päivän tulee olla olemassa ollut tai oleva päivä väliltä 01.01.0001-31.12.9999' + """ + + date_pat = re.compile("([0-9]{1,2})\\.([0-9]{1,2})\\.([0-9]{4})") + date_match = date_pat.fullmatch(date) + if date_match is None: + raise ValueError("päiväyksen tulee olla muotoa PP.KK.VVVV") + + (day, month, year) = date_match.groups() + + try: + date_ = datetime.date(int(year), int(month), int(day)) + return date_ + except: + raise ValueError(f"päivän tulee olla olemassa ollut tai oleva päivä väliltä 01.01.{datetime.MINYEAR:04d}-31.12.{datetime.MAXYEAR}") + + +if __name__ == "__main__": + import doctest + doctest.testmod() + + -- cgit v1.2.3