From 73ae6cf8829957b4f6a55b278a9246ec9043fa3e Mon Sep 17 00:00:00 2001
From: Tulir Asokan <tulir@maunium.net>
Date: Wed, 25 Mar 2020 20:38:21 +0200
Subject: [PATCH] Add locale system for new date parser

---
 reminder/bot.py         |  41 +++++++++++-
 reminder/db.py          |  27 +++++++-
 reminder/locale_util.py | 144 ++++++++++++++++++++++++++++++++++++++++
 reminder/locales.py     |  92 +++++++++++++++++++++++++
 reminder/util.py        |  42 +++---------
 5 files changed, 311 insertions(+), 35 deletions(-)
 create mode 100644 reminder/locale_util.py
 create mode 100644 reminder/locales.py

diff --git a/reminder/bot.py b/reminder/bot.py
index e994013..bc6d1ee 100644
--- a/reminder/bot.py
+++ b/reminder/bot.py
@@ -13,7 +13,7 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
-from typing import Type, Tuple
+from typing import Type, Tuple, List
 from datetime import datetime, timedelta
 from html import escape
 import asyncio
@@ -28,6 +28,7 @@ from maubot.handlers import command, event
 
 from .db import ReminderDatabase
 from .util import Config, ReminderInfo, DateArgument, parse_timezone, format_time
+from .locales import locales
 
 
 class ReminderBot(Plugin):
@@ -142,7 +143,9 @@ class ReminderBot(Plugin):
         await evt.reply(f"Maubot [Reminder](https://github.com/maubot/reminder) plugin.\n\n"
                         f"* !{self.base_command} <date> <message> - Add a reminder\n"
                         f"* !{self.base_command} list - Get a list of your reminders\n"
-                        f"* !{self.base_command} tz <timezone> - Set your time zone\n\n"
+                        f"* !{self.base_command} tz <timezone> - Set your time zone\n"
+                        f"* !{self.base_command} locale <locale> - Set your locale\n"
+                        f"* !{self.base_command} locales - List available locales\n\n"
                         "<date> can be a time delta (e.g. `2 days 1.5 hours` or `friday at 15:00`) "
                         "or an absolute date (e.g. `2020-03-27 15:00`)\n\n"
                         "To get mentioned by a reminder added by someone else, upvote the message "
@@ -179,6 +182,40 @@ class ReminderBot(Plugin):
     def format_time(self, evt: MessageEvent, reminder: ReminderInfo) -> str:
         return format_time(reminder.date.astimezone(self.db.get_timezone(evt.sender)))
 
+    @remind.subcommand("locales", help="List available locales")
+    async def locales(self, evt: MessageEvent) -> None:
+        def _format_key(key: str) -> str:
+            language, country = key.split("_")
+            return f"{language.lower()}_{country.upper()}"
+
+        await evt.reply("Available locales:\n\n" +
+                        "\n".join(f"* `{_format_key(key)}` - {locale.name}"
+                                  for key, locale in locales.items()))
+
+    @staticmethod
+    def _fmt_locales(locale_ids: List[str]) -> str:
+        locale_names = [locales[id].name for id in locale_ids]
+        if len(locale_names) == 0:
+            return "unset"
+        elif len(locale_names) == 1:
+            return locale_names[0]
+        else:
+            return ", ".join(locale_names[:-1]) + " and " + locale_names[-1]
+
+    @remind.subcommand("locale", help="Set your locale")
+    @command.argument("locale", required=False, pass_raw=True)
+    async def locale(self, evt: MessageEvent, locale: str) -> None:
+        if not locale:
+            await evt.reply(f"Your locale is {self._fmt_locales(self.db.get_locales(evt.sender))}")
+            return
+        locale_ids = [part.strip() for part in locale.lower().split(" ")]
+        for locale_id in locale_ids:
+            if locale_id not in locales:
+                await evt.reply(f"Locale `{locale_id}` is not supported")
+                return
+        self.db.set_locales(evt.sender, locale_ids)
+        await evt.reply(f"Set your locale to {self._fmt_locales(locale_ids)}")
+
     @remind.subcommand("timezone", help="Set your timezone", aliases=("tz",))
     @command.argument("timezone", parser=parse_timezone, required=False)
     async def timezone(self, evt: MessageEvent, timezone: pytz.timezone) -> None:
diff --git a/reminder/db.py b/reminder/db.py
index 0708a61..a99aa1a 100644
--- a/reminder/db.py
+++ b/reminder/db.py
@@ -13,7 +13,7 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
-from typing import Optional, Iterator, Dict
+from typing import Optional, Iterator, Dict, List
 from datetime import datetime
 
 import pytz
@@ -31,11 +31,13 @@ class ReminderDatabase:
     reminder_target: Table
     timezone: Table
     tz_cache: Dict[UserID, pytz.timezone]
+    locale_cache: Dict[UserID, List[str]]
     db: Engine
 
     def __init__(self, db: Engine) -> None:
         self.db = db
         self.tz_cache = {}
+        self.locale_cache = {}
 
         meta = MetaData()
         meta.bind = db
@@ -55,7 +57,10 @@ class ReminderDatabase:
                                      Column("event_id", String(255), nullable=False))
         self.timezone = Table("timezone", meta,
                               Column("user_id", String(255), primary_key=True),
-                              Column("timezone", String(255), primary_key=True))
+                              Column("timezone", String(255), nullable=False))
+        self.locale = Table("locale", meta,
+                            Column("user_id", String(255), primary_key=True),
+                            Column("locales", String(255), nullable=False))
 
         meta.create_all()
 
@@ -77,6 +82,24 @@ class ReminderDatabase:
                 self.tz_cache[user_id] = pytz.UTC
             return self.tz_cache[user_id]
 
+    def set_locales(self, user_id: UserID, locales: List[str]) -> None:
+        with self.db.begin() as tx:
+            tx.execute(self.locale.delete().where(self.locale.c.user_id == user_id))
+            tx.execute(self.locale.insert().values(user_id=user_id, locales=",".join(locales)))
+        self.locale_cache[user_id] = locales
+
+    def get_locales(self, user_id: UserID) -> List[str]:
+        try:
+            return self.locale_cache[user_id]
+        except KeyError:
+            rows = self.db.execute(select([self.locale.c.locales])
+                                   .where(self.locale.c.user_id == user_id))
+            try:
+                self.locale_cache[user_id] = next(rows)[0].split(",")
+            except (StopIteration, IndexError):
+                self.locale_cache[user_id] = ["en_iso"]
+            return self.locale_cache[user_id]
+
     def all_for_user(self, user_id: UserID, room_id: Optional[RoomID] = None
                      ) -> Iterator[ReminderInfo]:
         where = [self.reminder.c.id == self.reminder_target.c.reminder_id,
diff --git a/reminder/locale_util.py b/reminder/locale_util.py
new file mode 100644
index 0000000..1d93dcc
--- /dev/null
+++ b/reminder/locale_util.py
@@ -0,0 +1,144 @@
+# reminder - A maubot plugin to remind you about things.
+# Copyright (C) 2020 Tulir Asokan
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from typing import NamedTuple, Union, Pattern, Dict, Type, Optional, TYPE_CHECKING
+from datetime import datetime
+from abc import ABC, abstractmethod
+import re
+
+from dateutil.relativedelta import MO
+
+WeekdayType = type(MO)
+
+if TYPE_CHECKING:
+    from typing import TypedDict
+
+
+    class RelativeDeltaParams(TypedDict):
+        year: int
+        month: int
+        day: int
+        hour: int
+        minute: int
+        second: int
+        microsecond: int
+
+        years: Union[int, float]
+        months: Union[int, float]
+        weeks: Union[int, float]
+        days: Union[int, float]
+        hours: Union[int, float]
+        minutes: Union[int, float]
+        seconds: Union[int, float]
+        microseconds: Union[int, float]
+
+        weekday: Union[int, WeekdayType]
+
+        leapdays: int
+        yearday: int
+        nlyearday: int
+
+
+class MatcherReturn(NamedTuple):
+    params: 'RelativeDeltaParams'
+    end: int
+
+
+class Matcher(ABC):
+    @abstractmethod
+    def match(self, val: str, start: int = 0) -> Optional[MatcherReturn]:
+        pass
+
+
+class RegexMatcher(Matcher):
+    regex: Pattern
+    value_type: Type
+
+    def __init__(self, pattern: str, value_type: Type = int) -> None:
+        self.regex = re.compile(pattern, re.IGNORECASE)
+        self.value_type = value_type
+
+    def match(self, val: str, start: int = 0) -> Optional[MatcherReturn]:
+        match = self.regex.match(val, pos=start)
+        if match and match.end() > 0:
+            return MatcherReturn(params={key: self.value_type(value)
+                                         for key, value in match.groupdict().items() if value},
+                                 end=match.end())
+        return None
+
+
+class WeekdayMatcher(Matcher):
+    regex: Pattern
+    map: Dict[str, Union[int, WeekdayType]]
+    substr: int
+
+    def __init__(self, pattern: str, map: Dict[str, Union[int, WeekdayType]], substr: int) -> None:
+        self.regex = re.compile(pattern, re.IGNORECASE)
+        self.map = map
+        self.substr = substr
+
+    def match(self, val: str, start: int = 0) -> Optional[MatcherReturn]:
+        match = self.regex.match(val, pos=start)
+        if match and match.end() > 0:
+            weekday = self.map[match.string[:self.substr].lower()]
+            if isinstance(weekday, int):
+                weekday = (datetime.now().weekday() + weekday) % 7
+            return MatcherReturn(params={"weekday": weekday}, end=match.end())
+        return None
+
+
+class Locale(Matcher):
+    name: str
+    timedelta: Matcher
+    date: Matcher
+    weekday: Matcher
+    time: Matcher
+
+    def __init__(self, name: str, timedelta: Matcher, date: Matcher, weekday: Matcher,
+                 time: Matcher) -> None:
+        self.name = name
+        self.timedelta = timedelta
+        self.date = date
+        self.weekday = weekday
+        self.time = time
+
+    def replace(self, name: str, timedelta: Matcher = None, date: Matcher = None,
+                weekday: Matcher = None, time: Matcher = None) -> 'Locale':
+        return Locale(name=name, timedelta=timedelta or self.timedelta, date=date or self.date,
+                      weekday=weekday or self.weekday, time=time or self.time)
+
+    def match(self, val: str, start: int = 0) -> Optional[MatcherReturn]:
+        end = start
+        found_delta = self.timedelta.match(val, start=end)
+        if found_delta:
+            params, end = found_delta
+        else:
+            params = {}
+            found_day = self.weekday.match(val, start=end)
+            if found_day:
+                params, end = found_day
+            else:
+                found_date = self.date.match(val, start=end)
+                if found_date:
+                    params, end = found_date
+
+            found_time = self.time.match(val, start=end)
+            if found_time:
+                params = {**params, **found_time.params}
+                end = found_time.end
+        return MatcherReturn(params, end) if len(params) > 0 else None
+
+
+Locales = Dict[str, Locale]
diff --git a/reminder/locales.py b/reminder/locales.py
new file mode 100644
index 0000000..989abda
--- /dev/null
+++ b/reminder/locales.py
@@ -0,0 +1,92 @@
+# reminder - A maubot plugin to remind you about things.
+# Copyright (C) 2020 Tulir Asokan
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU
+
+from .locale_util import Locales, Locale, RegexMatcher, WeekdayMatcher
+
+locales: Locales = {}
+
+td_sep_en = r"(?:[\s,]{1,3}(?:and\s)?)"
+locales["en_iso"] = Locale(
+    name="English (ISO)",
+    timedelta=RegexMatcher(r"(?:(?:in|after)\s)?"
+                           rf"(?:(?P<years>[-+]?\d+)\s?y(?:r|ears?)?{td_sep_en})?"
+                           rf"(?:(?P<months>[-+]?\d+)\s?mo(?:nths?)?{td_sep_en})?"
+                           rf"(?:(?P<weeks>[-+]?\d+)\s?w(?:k|eeks?)?{td_sep_en})?"
+                           rf"(?:(?P<days>[-+]?\d+)\s?d(?:ays?)?{td_sep_en})?"
+                           rf"(?:(?P<hours>[-+]?\d+)\s?h(?:(?:r|our)?s?){td_sep_en})?"
+                           rf"(?:(?P<minutes>[-+]?\d+)\s?m(?:in(?:ute)?s?)?{td_sep_en})?"
+                           r"(?:(?P<seconds>[-+]?\d+)\s?s(?:ec(?:ond)?s?)?)?"),
+    date=RegexMatcher(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})"),
+    weekday=WeekdayMatcher(pattern=r"today"
+                                   r"|tomorrow"
+                                   r"|mon(?:day)?"
+                                   r"|tues?(?:day)?"
+                                   r"|wed(?:nesday)?"
+                                   r"|thu(?:rs(?:day)?)?"
+                                   r"|fri(?:day)?"
+                                   r"|sat(?:urday)?"
+                                   r"|sun(?:day)?",
+                           map={
+                               "tod": +0, "tom": +1, "mon": MO, "tue": TU, "wed": WE, "thu": TH,
+                               "fri": FR, "sat": SA, "sun": SU,
+                           }, substr=3),
+    time=RegexMatcher(r"\s?(?:at\s)?"
+                      r"(?P<hour>\d{2})"
+                      r"[:.](?P<minute>\d{2})"
+                      r"(?:[:.](?P<second>\d{2}))?")
+)
+
+locales["en_us"] = locales["en_iso"].replace(
+    name="English (US)",
+    date=RegexMatcher(r"(?P<month>\d{1,2})/(?P<day>\d{1,2})(?:/(?P<year>\d{4}))?"))
+
+locales["en_uk"] = locales["en_iso"].replace(
+    name="English (UK)",
+    date=RegexMatcher(r"(?P<day>\d{1,2})/(?P<month>\d{1,2})(?:/(?P<year>\d{4}))?"))
+
+td_sep_fi = r"(?:[\s,]{1,3}(?:ja\s)?)"
+locales["fi_fi"] = Locale(
+    name="Finnish",
+    timedelta=RegexMatcher(rf"(?:(?P<years>[-+]?\d+)\s?v(?:uo(?:tta|den))?{td_sep_fi})?"
+                           rf"(?:(?P<months>[-+]?\d+)\s?k(?:k|uukau(?:si|tta|den))?{td_sep_fi})?"
+                           rf"(?:(?P<weeks>[-+]?\d+)\s?v(?:k|iikk?o[an]?){td_sep_fi})?"
+                           rf"(?:(?P<days>[-+]?\d+)\s?p(?:v|äivä[än]?){td_sep_fi})?"
+                           rf"(?:(?P<hours>[-+]?\d+)\s?t(?:un(?:nin?|tia))?{td_sep_fi})?"
+                           rf"(?:(?P<minutes>[-+]?\d+)\s?m(?:in(?:uut(?:in?|tia))?)?{td_sep_fi})?"
+                           r"(?:(?P<seconds>[-+]?\d+)\s?s(?:ek(?:un(?:nin?|tia))?)?)?"
+                           r"(?:\s(?:kuluttua|päästä?))?"),
+    date=RegexMatcher(r"(?P<day>\d{1,2})\.(?P<month>\d{1,2})\.(?P<year>\d{4})"),
+    weekday=WeekdayMatcher(pattern=r"(?:tänään"
+                                   r"|(?:yli)?huomen"
+                                   r"|ma(?:aanantai)?"
+                                   r"|ti(?:iistai)?"
+                                   r"|ke(?:skiviikko)?"
+                                   r"|to(?:rstai)?"
+                                   r"|pe(?:rjantai)?"
+                                   r"|la(?:uantai)?"
+                                   r"|su(?:nnuntai)?)"
+                                   r"(?:na)?",
+                           map={
+                               "tä": +0, "hu": +1, "yl": +2,
+                               "ma": MO, "ti": TU, "ke": WE, "to": TH, "pe": FR, "la": SA,
+                               "su": SU,
+                           }, substr=2),
+    time=RegexMatcher(r"\s?(?:ke?ll?o\.?\s)?"
+                      r"(?P<hour>\d{2})"
+                      r"[:.](?P<minute>\d{2})"
+                      r"(?:[:.](?P<second>\d{2}))?"),
+)
diff --git a/reminder/util.py b/reminder/util.py
index c9cf2e1..e852fa8 100644
--- a/reminder/util.py
+++ b/reminder/util.py
@@ -26,6 +26,8 @@ from mautrix.util.config import BaseProxyConfig, ConfigUpdateHelper
 from maubot import MessageEvent
 from maubot.handlers.command import Argument, ArgumentSyntaxError
 
+from .locales import locales
+
 if TYPE_CHECKING:
     from .bot import ReminderBot
 
@@ -67,39 +69,17 @@ class DateArgument(Argument):
     def match(self, val: str, evt: MessageEvent = None, instance: 'ReminderBot' = None
               ) -> Tuple[str, Optional[datetime]]:
         tz = pytz.UTC
+        use_locales = [locales["en_iso"]]
         if instance:
             tz = instance.db.get_timezone(evt.sender)
-
-        found_delta = timedelta_regex.match(val)
-        end = 0
-        if found_delta.end() > 0:
-            params = {k: float(v) for k, v in found_delta.groupdict().items() if v}
-            end = found_delta.end()
-        else:
-            params = {}
-            found_day = day_regex.match(val)
-            if found_day:
-                end = found_day.end()
-                params["weekday"] = {
-                    "tod": datetime.now().weekday(), "tom": datetime.now().weekday() + 1,
-                    "mon": 0, "tue": 1, "wed": 2, "thu": 3, "fri": 4, "sat": 5, "sun": 6,
-                }[found_day.string[:3].lower()]
-            else:
-                found_date = date_regex.match(val)
-                if found_date:
-                    end = found_date.end()
-                    params = {k: int(v) for k, v in found_delta.groupdict().items() if v}
-
-            found_time = time_regex.match(val, pos=end)
-            if found_time:
-                params = {
-                    **params,
-                    **{k: int(v) for k, v in found_time.groupdict().items() if v}
-                }
-                end = found_time.end()
-
-        return val[end:], ((datetime.now(tz=tz) + relativedelta(**params))
-                           if len(params) > 0 else None)
+            locale_ids = instance.db.get_locales(evt.sender)
+            use_locales = [locales[id] for id in locale_ids if id in locales]
+
+        for locale in use_locales:
+            match = locale.match(val)
+            if match:
+                return val[match.end:], datetime.now(tz=tz) + relativedelta(**match.params)
+        return val, None
 
 
 def parse_timezone(val: str) -> Optional[pytz.timezone]:
-- 
GitLab