diff --git a/python/catalog.py b/python/catalog.py new file mode 100644 index 0000000..41adfae --- /dev/null +++ b/python/catalog.py @@ -0,0 +1,948 @@ +""" + babel.messages.catalog + ~~~~~~~~~~~~~~~~~~~~~~ + + Data structures for message catalogs. + + :copyright: (c) 2013-2023 by the Babel Team. + :license: BSD, see LICENSE for more details. +""" +from __future__ import annotations + +import datetime +import re +from collections import OrderedDict +from collections.abc import Iterable, Iterator +from copy import copy +from difflib import SequenceMatcher +from email import message_from_string +from heapq import nlargest +from typing import TYPE_CHECKING + +from babel import __version__ as VERSION +from babel.core import Locale, UnknownLocaleError +from babel.dates import format_datetime +from babel.messages.plurals import get_plural +from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + _MessageID: TypeAlias = str | tuple[str, ...] | list[str] + +__all__ = ['Message', 'Catalog', 'TranslationError'] + +def get_close_matches(word, possibilities, n=3, cutoff=0.6): + """A modified version of ``difflib.get_close_matches``. + + It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work + around https://github.com/python/cpython/issues/90825. + """ + if not n > 0: # pragma: no cover + raise ValueError(f"n must be > 0: {n!r}") + if not 0.0 <= cutoff <= 1.0: # pragma: no cover + raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}") + result = [] + s = SequenceMatcher(autojunk=False) # only line changed from difflib.py + s.set_seq2(word) + for x in possibilities: + s.set_seq1(x) + if s.real_quick_ratio() >= cutoff and \ + s.quick_ratio() >= cutoff and \ + s.ratio() >= cutoff: + result.append((s.ratio(), x)) + + # Move the best scorers to head of list + result = nlargest(n, result) + # Strip scores for the best n matches + return [x for score, x in result] + + +PYTHON_FORMAT = re.compile(r''' + \% + (?:\(([\w]*)\))? + ( + [-#0\ +]?(?:\*|[\d]+)? + (?:\.(?:\*|[\d]+))? + [hlL]? + ) + ([diouxXeEfFgGcrs%]) +''', re.VERBOSE) + + +def _parse_datetime_header(value: str) -> datetime.datetime: + match = re.match(r'^(?P.*?)(?P[+-]\d{4})?$', value) + + dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') + + # Separate the offset into a sign component, hours, and # minutes + tzoffset = match.group('tzoffset') + if tzoffset is not None: + plus_minus_s, rest = tzoffset[0], tzoffset[1:] + hours_offset_s, mins_offset_s = rest[:2], rest[2:] + + # Make them all integers + plus_minus = int(f"{plus_minus_s}1") + hours_offset = int(hours_offset_s) + mins_offset = int(mins_offset_s) + + # Calculate net offset + net_mins_offset = hours_offset * 60 + net_mins_offset += mins_offset + net_mins_offset *= plus_minus + + # Create an offset object + tzoffset = FixedOffsetTimezone(net_mins_offset) + + # Store the offset in a datetime object + dt = dt.replace(tzinfo=tzoffset) + + return dt + + +class Message: + """Representation of a single message in a catalog.""" + + def __init__( + self, + id: _MessageID, + string: _MessageID | None = '', + locations: Iterable[tuple[str, int]] = (), + flags: Iterable[str] = (), + auto_comments: Iterable[str] = (), + user_comments: Iterable[str] = (), + previous_id: _MessageID = (), + lineno: int | None = None, + context: str | None = None, + ) -> None: + """Create the message object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of ``(filename, lineno)`` tuples + :param flags: a set or sequence of flags + :param auto_comments: a sequence of automatic comments for the message + :param user_comments: a sequence of user comments for the message + :param previous_id: the previous message ID, or a ``(singular, plural)`` + tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any + :param context: the message context + """ + self.id = id + if not string and self.pluralizable: + string = ('', '') + self.string = string + self.locations = list(distinct(locations)) + self.flags = set(flags) + if id and self.python_format: + self.flags.add('python-format') + else: + self.flags.discard('python-format') + self.auto_comments = list(distinct(auto_comments)) + self.user_comments = list(distinct(user_comments)) + if isinstance(previous_id, str): + self.previous_id = [previous_id] + else: + self.previous_id = list(previous_id) + self.lineno = lineno + self.context = context + + def __repr__(self) -> str: + return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>" + + def __cmp__(self, other: object) -> int: + """Compare Messages, taking into account plural ids""" + def values_to_compare(obj): + if isinstance(obj, Message) and obj.pluralizable: + return obj.id[0], obj.context or '' + return obj.id, obj.context or '' + return _cmp(values_to_compare(self), values_to_compare(other)) + + def __gt__(self, other: object) -> bool: + return self.__cmp__(other) > 0 + + def __lt__(self, other: object) -> bool: + return self.__cmp__(other) < 0 + + def __ge__(self, other: object) -> bool: + return self.__cmp__(other) >= 0 + + def __le__(self, other: object) -> bool: + return self.__cmp__(other) <= 0 + + def __eq__(self, other: object) -> bool: + return self.__cmp__(other) == 0 + + def __ne__(self, other: object) -> bool: + return self.__cmp__(other) != 0 + + def is_identical(self, other: Message) -> bool: + """Checks whether messages are identical, taking into account all + properties. + """ + assert isinstance(other, Message) + return self.__dict__ == other.__dict__ + + def clone(self) -> Message: + return Message(*map(copy, (self.id, self.string, self.locations, + self.flags, self.auto_comments, + self.user_comments, self.previous_id, + self.lineno, self.context))) + + def check(self, catalog: Catalog | None = None) -> list[TranslationError]: + """Run various validation checks on the message. Some validations + are only performed if the catalog is provided. This method returns + a sequence of `TranslationError` objects. + + :rtype: ``iterator`` + :param catalog: A catalog instance that is passed to the checkers + :see: `Catalog.check` for a way to perform checks for all messages + in a catalog. + """ + from babel.messages.checkers import checkers + errors: list[TranslationError] = [] + for checker in checkers: + try: + checker(catalog, self) + except TranslationError as e: + errors.append(e) + return errors + + @property + def fuzzy(self) -> bool: + """Whether the translation is fuzzy. + + >>> Message('foo').fuzzy + False + >>> msg = Message('foo', 'foo', flags=['fuzzy']) + >>> msg.fuzzy + True + >>> msg + + + :type: `bool`""" + return 'fuzzy' in self.flags + + @property + def pluralizable(self) -> bool: + """Whether the message is plurizable. + + >>> Message('foo').pluralizable + False + >>> Message(('foo', 'bar')).pluralizable + True + + :type: `bool`""" + return isinstance(self.id, (list, tuple)) + + @property + def python_format(self) -> bool: + """Whether the message contains Python-style parameters. + + >>> Message('foo %(name)s bar').python_format + True + >>> Message(('foo %(name)s', 'foo %(name)s')).python_format + True + + :type: `bool`""" + ids = self.id + if not isinstance(ids, (list, tuple)): + ids = [ids] + return any(PYTHON_FORMAT.search(id) for id in ids) + + +class TranslationError(Exception): + """Exception thrown by translation checkers when invalid message + translations are encountered.""" + + +DEFAULT_HEADER = """\ +# Translations template for PROJECT. +# Copyright (C) YEAR ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , YEAR. +#""" + + +def parse_separated_header(value: str) -> dict[str, str]: + # Adapted from https://peps.python.org/pep-0594/#cgi + from email.message import Message + m = Message() + m['content-type'] = value + return dict(m.get_params()) + + +class Catalog: + """Representation of a message catalog.""" + + def __init__( + self, + locale: str | Locale | None = None, + domain: str | None = None, + header_comment: str | None = DEFAULT_HEADER, + project: str | None = None, + version: str | None = None, + copyright_holder: str | None = None, + msgid_bugs_address: str | None = None, + creation_date: datetime.datetime | str | None = None, + revision_date: datetime.datetime | datetime.time | float | str | None = None, + last_translator: str | None = None, + language_team: str | None = None, + charset: str | None = None, + fuzzy: bool = True, + ) -> None: + """Initialize the catalog object. + + :param locale: the locale identifier or `Locale` object, or `None` + if the catalog is not bound to a locale (which basically + means it's a template) + :param domain: the message domain + :param header_comment: the header comment as string, or `None` for the + default header + :param project: the project's name + :param version: the project's version + :param copyright_holder: the copyright holder of the catalog + :param msgid_bugs_address: the email address or URL to submit bug + reports to + :param creation_date: the date the catalog was created + :param revision_date: the date the catalog was revised + :param last_translator: the name and email of the last translator + :param language_team: the name and email of the language team + :param charset: the encoding to use in the output (defaults to utf-8) + :param fuzzy: the fuzzy bit on the catalog header + """ + self.domain = domain + self.locale = locale + self._header_comment = header_comment + self._messages: OrderedDict[str | tuple[str, str], Message] = OrderedDict() + + self.project = project or 'PROJECT' + self.version = version or 'VERSION' + self.copyright_holder = copyright_holder or 'ORGANIZATION' + self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' + + self.last_translator = last_translator or 'FULL NAME ' + """Name and email address of the last translator.""" + self.language_team = language_team or 'LANGUAGE ' + """Name and email address of the language team.""" + + self.charset = charset or 'utf-8' + + if creation_date is None: + creation_date = datetime.datetime.now(LOCALTZ) + elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo: + creation_date = creation_date.replace(tzinfo=LOCALTZ) + self.creation_date = creation_date + if revision_date is None: + revision_date = 'YEAR-MO-DA HO:MI+ZONE' + elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo: + revision_date = revision_date.replace(tzinfo=LOCALTZ) + self.revision_date = revision_date + self.fuzzy = fuzzy + + # Dictionary of obsolete messages + self.obsolete: OrderedDict[str | tuple[str, str], Message] = OrderedDict() + self._num_plurals = None + self._plural_expr = None + + def _set_locale(self, locale: Locale | str | None) -> None: + if locale is None: + self._locale_identifier = None + self._locale = None + return + + if isinstance(locale, Locale): + self._locale_identifier = str(locale) + self._locale = locale + return + + if isinstance(locale, str): + self._locale_identifier = str(locale) + try: + self._locale = Locale.parse(locale) + except UnknownLocaleError: + self._locale = None + return + + raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}") + + def _get_locale(self) -> Locale | None: + return self._locale + + def _get_locale_identifier(self) -> str | None: + return self._locale_identifier + + locale = property(_get_locale, _set_locale) + locale_identifier = property(_get_locale_identifier) + + def _get_header_comment(self) -> str: + comment = self._header_comment + year = datetime.datetime.now(LOCALTZ).strftime('%Y') + if hasattr(self.revision_date, 'strftime'): + year = self.revision_date.strftime('%Y') + comment = comment.replace('PROJECT', self.project) \ + .replace('VERSION', self.version) \ + .replace('YEAR', year) \ + .replace('ORGANIZATION', self.copyright_holder) + locale_name = (self.locale.english_name if self.locale else self.locale_identifier) + if locale_name: + comment = comment.replace("Translations template", f"{locale_name} translations") + return comment + + def _set_header_comment(self, string: str | None) -> None: + self._header_comment = string + + header_comment = property(_get_header_comment, _set_header_comment, doc="""\ + The header comment for the catalog. + + >>> catalog = Catalog(project='Foobar', version='1.0', + ... copyright_holder='Foo Company') + >>> print(catalog.header_comment) #doctest: +ELLIPSIS + # Translations template for Foobar. + # Copyright (C) ... Foo Company + # This file is distributed under the same license as the Foobar project. + # FIRST AUTHOR , .... + # + + The header can also be set from a string. Any known upper-case variables + will be replaced when the header is retrieved again: + + >>> catalog = Catalog(project='Foobar', version='1.0', + ... copyright_holder='Foo Company') + >>> catalog.header_comment = '''\\ + ... # The POT for my really cool PROJECT project. + ... # Copyright (C) 1990-2003 ORGANIZATION + ... # This file is distributed under the same license as the PROJECT + ... # project. + ... #''' + >>> print(catalog.header_comment) + # The POT for my really cool Foobar project. + # Copyright (C) 1990-2003 Foo Company + # This file is distributed under the same license as the Foobar + # project. + # + + :type: `unicode` + """) + + def _get_mime_headers(self) -> list[tuple[str, str]]: + headers: list[tuple[str, str]] = [] + headers.append(("Project-Id-Version", f"{self.project} {self.version}")) + headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) + headers.append(('POT-Creation-Date', + format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', + locale='en'))) + if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)): + headers.append(('PO-Revision-Date', + format_datetime(self.revision_date, + 'yyyy-MM-dd HH:mmZ', locale='en'))) + else: + headers.append(('PO-Revision-Date', self.revision_date)) + headers.append(('Last-Translator', self.last_translator)) + if self.locale_identifier: + headers.append(('Language', str(self.locale_identifier))) + if self.locale_identifier and ('LANGUAGE' in self.language_team): + headers.append(('Language-Team', + self.language_team.replace('LANGUAGE', + str(self.locale_identifier)))) + else: + headers.append(('Language-Team', self.language_team)) + if self.locale is not None: + headers.append(('Plural-Forms', self.plural_forms)) + headers.append(('MIME-Version', '1.0')) + headers.append(("Content-Type", f"text/plain; charset={self.charset}")) + headers.append(('Content-Transfer-Encoding', '8bit')) + headers.append(("Generated-By", f"Babel {VERSION}\n")) + return headers + + def _force_text(self, s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str: + if isinstance(s, str): + return s + if isinstance(s, bytes): + return s.decode(encoding, errors) + return str(s) + + def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: + for name, value in headers: + name = self._force_text(name.lower(), encoding=self.charset) + value = self._force_text(value, encoding=self.charset) + if name == 'project-id-version': + parts = value.split(' ') + self.project = ' '.join(parts[:-1]) + self.version = parts[-1] + elif name == 'report-msgid-bugs-to': + self.msgid_bugs_address = value + elif name == 'last-translator': + self.last_translator = value + elif name == 'language': + value = value.replace('-', '_') + self._set_locale(value) + elif name == 'language-team': + self.language_team = value + elif name == 'content-type': + params = parse_separated_header(value) + if 'charset' in params: + self.charset = params['charset'].lower() + elif name == 'plural-forms': + params = parse_separated_header(f" ;{value}") + self._num_plurals = int(params.get('nplurals', 2)) + self._plural_expr = params.get('plural', '(n != 1)') + elif name == 'pot-creation-date': + self.creation_date = _parse_datetime_header(value) + elif name == 'po-revision-date': + # Keep the value if it's not the default one + if 'YEAR' not in value: + self.revision_date = _parse_datetime_header(value) + + mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ + The MIME headers of the catalog, used for the special ``msgid ""`` entry. + + The behavior of this property changes slightly depending on whether a locale + is set or not, the latter indicating that the catalog is actually a template + for actual translations. + + Here's an example of the output for such a catalog template: + + >>> from babel.dates import UTC + >>> from datetime import datetime + >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) + >>> catalog = Catalog(project='Foobar', version='1.0', + ... creation_date=created) + >>> for name, value in catalog.mime_headers: + ... print('%s: %s' % (name, value)) + Project-Id-Version: Foobar 1.0 + Report-Msgid-Bugs-To: EMAIL@ADDRESS + POT-Creation-Date: 1990-04-01 15:30+0000 + PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE + Last-Translator: FULL NAME + Language-Team: LANGUAGE + MIME-Version: 1.0 + Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + Generated-By: Babel ... + + And here's an example of the output when the locale is set: + + >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) + >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', + ... creation_date=created, revision_date=revised, + ... last_translator='John Doe ', + ... language_team='de_DE ') + >>> for name, value in catalog.mime_headers: + ... print('%s: %s' % (name, value)) + Project-Id-Version: Foobar 1.0 + Report-Msgid-Bugs-To: EMAIL@ADDRESS + POT-Creation-Date: 1990-04-01 15:30+0000 + PO-Revision-Date: 1990-08-03 12:00+0000 + Last-Translator: John Doe + Language: de_DE + Language-Team: de_DE + Plural-Forms: nplurals=2; plural=(n != 1); + MIME-Version: 1.0 + Content-Type: text/plain; charset=utf-8 + Content-Transfer-Encoding: 8bit + Generated-By: Babel ... + + :type: `list` + """) + + @property + def num_plurals(self) -> int: + """The number of plurals used by the catalog or locale. + + >>> Catalog(locale='en').num_plurals + 2 + >>> Catalog(locale='ga').num_plurals + 5 + + :type: `int`""" + if self._num_plurals is None: + num = 2 + if self.locale: + num = get_plural(self.locale)[0] + self._num_plurals = num + return self._num_plurals + + @property + def plural_expr(self) -> str: + """The plural expression used by the catalog or locale. + + >>> Catalog(locale='en').plural_expr + '(n != 1)' + >>> Catalog(locale='ga').plural_expr + '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)' + >>> Catalog(locale='ding').plural_expr # unknown locale + '(n != 1)' + + :type: `str`""" + if self._plural_expr is None: + expr = '(n != 1)' + if self.locale: + expr = get_plural(self.locale)[1] + self._plural_expr = expr + return self._plural_expr + + @property + def plural_forms(self) -> str: + """Return the plural forms declaration for the locale. + + >>> Catalog(locale='en').plural_forms + 'nplurals=2; plural=(n != 1);' + >>> Catalog(locale='pt_BR').plural_forms + 'nplurals=2; plural=(n > 1);' + + :type: `str`""" + return f"nplurals={self.num_plurals}; plural={self.plural_expr};" + + def __contains__(self, id: _MessageID) -> bool: + """Return whether the catalog has a message with the specified ID.""" + return self._key_for(id) in self._messages + + def __len__(self) -> int: + """The number of messages in the catalog. + + This does not include the special ``msgid ""`` entry.""" + return len(self._messages) + + def __iter__(self) -> Iterator[Message]: + """Iterates through all the entries in the catalog, in the order they + were added, yielding a `Message` object for every entry. + + :rtype: ``iterator``""" + buf = [] + for name, value in self.mime_headers: + buf.append(f"{name}: {value}") + flags = set() + if self.fuzzy: + flags |= {'fuzzy'} + yield Message('', '\n'.join(buf), flags=flags) + for key in self._messages: + yield self._messages[key] + + def __repr__(self) -> str: + locale = '' + if self.locale: + locale = f" {self.locale}" + return f"<{type(self).__name__} {self.domain!r}{locale}>" + + def __delitem__(self, id: _MessageID) -> None: + """Delete the message with the specified ID.""" + self.delete(id) + + def __getitem__(self, id: _MessageID) -> Message: + """Return the message with the specified ID. + + :param id: the message ID + """ + return self.get(id) + + def __setitem__(self, id: _MessageID, message: Message) -> None: + """Add or update the message with the specified ID. + + >>> catalog = Catalog() + >>> catalog[u'foo'] = Message(u'foo') + >>> catalog[u'foo'] + + + If a message with that ID is already in the catalog, it is updated + to include the locations and flags of the new message. + + >>> catalog = Catalog() + >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) + >>> catalog[u'foo'].locations + [('main.py', 1)] + >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) + >>> catalog[u'foo'].locations + [('main.py', 1), ('utils.py', 5)] + + :param id: the message ID + :param message: the `Message` object + """ + assert isinstance(message, Message), 'expected a Message object' + key = self._key_for(id, message.context) + current = self._messages.get(key) + if current: + if message.pluralizable and not current.pluralizable: + # The new message adds pluralization + current.id = message.id + current.string = message.string + current.locations = list(distinct(current.locations + + message.locations)) + current.auto_comments = list(distinct(current.auto_comments + + message.auto_comments)) + current.user_comments = list(distinct(current.user_comments + + message.user_comments)) + current.flags |= message.flags + message = current + elif id == '': + # special treatment for the header message + self.mime_headers = message_from_string(message.string).items() + self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments]) + self.fuzzy = message.fuzzy + else: + if isinstance(id, (list, tuple)): + assert isinstance(message.string, (list, tuple)), \ + f"Expected sequence but got {type(message.string)}" + self._messages[key] = message + + def add( + self, + id: _MessageID, + string: _MessageID | None = None, + locations: Iterable[tuple[str, int]] = (), + flags: Iterable[str] = (), + auto_comments: Iterable[str] = (), + user_comments: Iterable[str] = (), + previous_id: _MessageID = (), + lineno: int | None = None, + context: str | None = None, + ) -> Message: + """Add or update the message with the specified ID. + + >>> catalog = Catalog() + >>> catalog.add(u'foo') + + >>> catalog[u'foo'] + + + This method simply constructs a `Message` object with the given + arguments and invokes `__setitem__` with that object. + + :param id: the message ID, or a ``(singular, plural)`` tuple for + pluralizable messages + :param string: the translated message string, or a + ``(singular, plural)`` tuple for pluralizable messages + :param locations: a sequence of ``(filename, lineno)`` tuples + :param flags: a set or sequence of flags + :param auto_comments: a sequence of automatic comments + :param user_comments: a sequence of user comments + :param previous_id: the previous message ID, or a ``(singular, plural)`` + tuple for pluralizable messages + :param lineno: the line number on which the msgid line was found in the + PO file, if any + :param context: the message context + """ + message = Message(id, string, list(locations), flags, auto_comments, + user_comments, previous_id, lineno=lineno, + context=context) + self[id] = message + return message + + def check(self) -> Iterable[tuple[Message, list[TranslationError]]]: + """Run various validation checks on the translations in the catalog. + + For every message which fails validation, this method yield a + ``(message, errors)`` tuple, where ``message`` is the `Message` object + and ``errors`` is a sequence of `TranslationError` objects. + + :rtype: ``generator`` of ``(message, errors)`` + """ + for message in self._messages.values(): + errors = message.check(catalog=self) + if errors: + yield message, errors + + def get(self, id: _MessageID, context: str | None = None) -> Message | None: + """Return the message with the specified ID and context. + + :param id: the message ID + :param context: the message context, or ``None`` for no context + """ + return self._messages.get(self._key_for(id, context)) + + def delete(self, id: _MessageID, context: str | None = None) -> None: + """Delete the message with the specified ID and context. + + :param id: the message ID + :param context: the message context, or ``None`` for no context + """ + key = self._key_for(id, context) + if key in self._messages: + del self._messages[key] + + def update( + self, + template: Catalog, + no_fuzzy_matching: bool = False, + update_header_comment: bool = False, + keep_user_comments: bool = True, + update_creation_date: bool = True, + ) -> None: + """Update the catalog based on the given template catalog. + + >>> from babel.messages import Catalog + >>> template = Catalog() + >>> template.add('green', locations=[('main.py', 99)]) + + >>> template.add('blue', locations=[('main.py', 100)]) + + >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) + + >>> catalog = Catalog(locale='de_DE') + >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) + + >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) + + >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), + ... locations=[('util.py', 38)]) + + + >>> catalog.update(template) + >>> len(catalog) + 3 + + >>> msg1 = catalog['green'] + >>> msg1.string + >>> msg1.locations + [('main.py', 99)] + + >>> msg2 = catalog['blue'] + >>> msg2.string + u'blau' + >>> msg2.locations + [('main.py', 100)] + + >>> msg3 = catalog['salad'] + >>> msg3.string + (u'Salat', u'Salate') + >>> msg3.locations + [('util.py', 42)] + + Messages that are in the catalog but not in the template are removed + from the main collection, but can still be accessed via the `obsolete` + member: + + >>> 'head' in catalog + False + >>> list(catalog.obsolete.values()) + [] + + :param template: the reference catalog, usually read from a POT file + :param no_fuzzy_matching: whether to use fuzzy matching of message IDs + """ + messages = self._messages + remaining = messages.copy() + self._messages = OrderedDict() + + # Prepare for fuzzy matching + fuzzy_candidates = {} + if not no_fuzzy_matching: + for msgid in messages: + if msgid and messages[msgid].string: + key = self._key_for(msgid) + ctxt = messages[msgid].context + fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt) + fuzzy_matches = set() + + def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: + message = message.clone() + fuzzy = False + if oldkey != newkey: + fuzzy = True + fuzzy_matches.add(oldkey) + oldmsg = messages.get(oldkey) + assert oldmsg is not None + if isinstance(oldmsg.id, str): + message.previous_id = [oldmsg.id] + else: + message.previous_id = list(oldmsg.id) + else: + oldmsg = remaining.pop(oldkey, None) + assert oldmsg is not None + message.string = oldmsg.string + + if keep_user_comments: + message.user_comments = list(distinct(oldmsg.user_comments)) + + if isinstance(message.id, (list, tuple)): + if not isinstance(message.string, (list, tuple)): + fuzzy = True + message.string = tuple( + [message.string] + ([''] * (len(message.id) - 1)), + ) + elif len(message.string) != self.num_plurals: + fuzzy = True + message.string = tuple(message.string[:len(oldmsg.string)]) + elif isinstance(message.string, (list, tuple)): + fuzzy = True + message.string = message.string[0] + message.flags |= oldmsg.flags + if fuzzy: + message.flags |= {'fuzzy'} + self[message.id] = message + + for message in template: + if message.id: + key = self._key_for(message.id, message.context) + if key in messages: + _merge(message, key, key) + else: + if not no_fuzzy_matching: + # do some fuzzy matching with difflib + matches = get_close_matches( + self._to_fuzzy_match_key(key), + fuzzy_candidates.keys(), + 1, + ) + if matches: + modified_key = matches[0] + newkey, newctxt = fuzzy_candidates[modified_key] + if newctxt is not None: + newkey = newkey, newctxt + _merge(message, newkey, key) + continue + + self[message.id] = message + + for msgid in remaining: + if no_fuzzy_matching or msgid not in fuzzy_matches: + self.obsolete[msgid] = remaining[msgid] + + if update_header_comment: + # Allow the updated catalog's header to be rewritten based on the + # template's header + self.header_comment = template.header_comment + + # Make updated catalog's POT-Creation-Date equal to the template + # used to update the catalog + if update_creation_date: + self.creation_date = template.creation_date + + def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str: + """Converts a message key to a string suitable for fuzzy matching.""" + if isinstance(key, tuple): + matchkey = key[0] # just the msgid, no context + else: + matchkey = key + return matchkey.lower().strip() + + def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str: + """The key for a message is just the singular ID even for pluralizable + messages, but is a ``(msgid, msgctxt)`` tuple for context-specific + messages. + """ + key = id + if isinstance(key, (list, tuple)): + key = id[0] + if context is not None: + key = (key, context) + return key + + def is_identical(self, other: Catalog) -> bool: + """Checks if catalogs are identical, taking into account messages and + headers. + """ + assert isinstance(other, Catalog) + for key in self._messages.keys() | other._messages.keys(): + message_1 = self.get(key) + message_2 = other.get(key) + if ( + message_1 is None + or message_2 is None + or not message_1.is_identical(message_2) + ): + return False + return dict(self.mime_headers) == dict(other.mime_headers) diff --git a/python/plurals.py b/python/plurals.py new file mode 100644 index 0000000..fa3f03e --- /dev/null +++ b/python/plurals.py @@ -0,0 +1,257 @@ +""" + babel.messages.plurals + ~~~~~~~~~~~~~~~~~~~~~~ + + Plural form definitions. + + :copyright: (c) 2013-2023 by the Babel Team. + :license: BSD, see LICENSE for more details. +""" +from __future__ import annotations + +from operator import itemgetter + +from babel.core import Locale, default_locale + +# XXX: remove this file, duplication with babel.plural + + +LC_CTYPE: str | None = default_locale('LC_CTYPE') + + +PLURALS: dict[str, tuple[int, str]] = { + # Afar + # 'aa': (), + # Abkhazian + # 'ab': (), + # Avestan + # 'ae': (), + # Afrikaans - From Pootle's PO's + 'af': (2, '(n != 1)'), + # Akan + # 'ak': (), + # Amharic + # 'am': (), + # Aragonese + # 'an': (), + # Arabic - From Pootle's PO's + 'ar': (6, '(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=0 && n%100<=2 ? 4 : 5)'), + # Assamese + # 'as': (), + # Avaric + # 'av': (), + # Aymara + # 'ay': (), + # Azerbaijani + # 'az': (), + # Bashkir + # 'ba': (), + # Belarusian + 'be': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Bulgarian - From Pootle's PO's + 'bg': (2, '(n != 1)'), + # Bihari + # 'bh': (), + # Bislama + # 'bi': (), + # Bambara + # 'bm': (), + # Bengali - From Pootle's PO's + 'bn': (2, '(n != 1)'), + # Tibetan - as discussed in private with Andrew West + 'bo': (1, '0'), + # Breton + 'br': ( + 6, + '(n==1 ? 0 : n%10==1 && n%100!=11 && n%100!=71 && n%100!=91 ? 1 : n%10==2 && n%100!=12 && n%100!=72 && ' + 'n%100!=92 ? 2 : (n%10==3 || n%10==4 || n%10==9) && n%100!=13 && n%100!=14 && n%100!=19 && n%100!=73 && ' + 'n%100!=74 && n%100!=79 && n%100!=93 && n%100!=94 && n%100!=99 ? 3 : n%1000000==0 ? 4 : 5)', + ), + # Bosnian + 'bs': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Catalan - From Pootle's PO's + 'ca': (2, '(n != 1)'), + # Chechen + # 'ce': (), + # Chamorro + # 'ch': (), + # Corsican + # 'co': (), + # Cree + # 'cr': (), + # Czech + 'cs': (3, '((n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2)'), + # Church Slavic + # 'cu': (), + # Chuvash + 'cv': (1, '0'), + # Welsh + 'cy': (5, '(n==1 ? 1 : n==2 ? 2 : n==3 ? 3 : n==6 ? 4 : 0)'), + # Danish + 'da': (2, '(n != 1)'), + # German + 'de': (2, '(n != 1)'), + # Divehi + # 'dv': (), + # Dzongkha + 'dz': (1, '0'), + # Greek + 'el': (2, '(n != 1)'), + # English + 'en': (2, '(n != 1)'), + # Esperanto + 'eo': (2, '(n != 1)'), + # Spanish + 'es': (2, '(n != 1)'), + # Estonian + 'et': (2, '(n != 1)'), + # Basque - From Pootle's PO's + 'eu': (2, '(n != 1)'), + # Persian - From Pootle's PO's + 'fa': (1, '0'), + # Finnish + 'fi': (2, '(n != 1)'), + # French + 'fr': (2, '(n > 1)'), + # Friulian - From Pootle's PO's + 'fur': (2, '(n > 1)'), + # Irish + 'ga': (5, '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'), + # Galician - From Pootle's PO's + 'gl': (2, '(n != 1)'), + # Hausa - From Pootle's PO's + 'ha': (2, '(n != 1)'), + # Hebrew + 'he': (2, '(n != 1)'), + # Hindi - From Pootle's PO's + 'hi': (2, '(n != 1)'), + # Croatian + 'hr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Hungarian + 'hu': (1, '0'), + # Armenian - From Pootle's PO's + 'hy': (1, '0'), + # Icelandic - From Pootle's PO's + 'is': (2, '(n%10==1 && n%100!=11 ? 0 : 1)'), + # Italian + 'it': (2, '(n != 1)'), + # Japanese + 'ja': (1, '0'), + # Georgian - From Pootle's PO's + 'ka': (1, '0'), + # Kongo - From Pootle's PO's + 'kg': (2, '(n != 1)'), + # Khmer - From Pootle's PO's + 'km': (1, '0'), + # Korean + 'ko': (1, '0'), + # Kurdish - From Pootle's PO's + 'ku': (2, '(n != 1)'), + # Lao - Another member of the Tai language family, like Thai. + 'lo': (1, '0'), + # Lithuanian + 'lt': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Latvian + 'lv': (3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), + # Maltese - From Pootle's PO's + 'mt': (4, '(n==1 ? 0 : n==0 || ( n%100>=1 && n%100<=10) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), + # Norwegian Bokmål + 'nb': (2, '(n != 1)'), + # Dutch + 'nl': (2, '(n != 1)'), + # Norwegian Nynorsk + 'nn': (2, '(n != 1)'), + # Norwegian + 'no': (2, '(n != 1)'), + # Punjabi - From Pootle's PO's + 'pa': (2, '(n != 1)'), + # Polish + 'pl': (3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Portuguese + 'pt': (2, '(n != 1)'), + # Brazilian + 'pt_BR': (2, '(n > 1)'), + # Romanian - From Pootle's PO's + 'ro': (3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2)'), + # Russian + 'ru': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Slovak + 'sk': (3, '((n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2)'), + # Slovenian + 'sl': (4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), + # Serbian - From Pootle's PO's + 'sr': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Southern Sotho - From Pootle's PO's + 'st': (2, '(n != 1)'), + # Swedish + 'sv': (2, '(n != 1)'), + # Thai + 'th': (1, '0'), + # Turkish + 'tr': (1, '0'), + # Ukrainian + 'uk': (3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), + # Venda - From Pootle's PO's + 've': (2, '(n != 1)'), + # Vietnamese - From Pootle's PO's + 'vi': (1, '0'), + # Xhosa - From Pootle's PO's + 'xh': (2, '(n != 1)'), + # Chinese - From Pootle's PO's (modified) + 'zh': (1, '0'), +} + + +DEFAULT_PLURAL: tuple[int, str] = (2, '(n != 1)') + + +class _PluralTuple(tuple): + """A tuple with plural information.""" + + __slots__ = () + num_plurals = property(itemgetter(0), doc=""" + The number of plurals used by the locale.""") + plural_expr = property(itemgetter(1), doc=""" + The plural expression used by the locale.""") + plural_forms = property(lambda x: 'nplurals={}; plural={};'.format(*x), doc=""" + The plural expression used by the catalog or locale.""") + + def __str__(self) -> str: + return self.plural_forms + + +def get_plural(locale: str | None = LC_CTYPE) -> _PluralTuple: + """A tuple with the information catalogs need to perform proper + pluralization. The first item of the tuple is the number of plural + forms, the second the plural expression. + + >>> get_plural(locale='en') + (2, '(n != 1)') + >>> get_plural(locale='ga') + (5, '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)') + + The object returned is a special tuple with additional members: + + >>> tup = get_plural("ja") + >>> tup.num_plurals + 1 + >>> tup.plural_expr + '0' + >>> tup.plural_forms + 'nplurals=1; plural=0;' + + Converting the tuple into a string prints the plural forms for a + gettext catalog: + + >>> str(tup) + 'nplurals=1; plural=0;' + """ + locale = Locale.parse(locale) + try: + tup = PLURALS[str(locale)] + except KeyError: + try: + tup = PLURALS[locale.language] + except KeyError: + tup = DEFAULT_PLURAL + return _PluralTuple(tup)