From 5410d6115d9209130f2ed1bee21c6fb3c37e14e7 Mon Sep 17 00:00:00 2001 From: "Carlos A. Iglesias" Date: Thu, 8 Feb 2024 18:32:18 +0100 Subject: [PATCH] Delete python/catalog.py --- python/catalog.py | 948 ---------------------------------------------- 1 file changed, 948 deletions(-) delete mode 100644 python/catalog.py diff --git a/python/catalog.py b/python/catalog.py deleted file mode 100644 index 41adfae..0000000 --- a/python/catalog.py +++ /dev/null @@ -1,948 +0,0 @@ -""" - babel.messages.catalog - ~~~~~~~~~~~~~~~~~~~~~~ - - Data structures for message catalogs. - - :copyright: (c) 2013-2023 by the Babel Team. - :license: BSD, see LICENSE for more details. -""" -from __future__ import annotations - -import datetime -import re -from collections import OrderedDict -from collections.abc import Iterable, Iterator -from copy import copy -from difflib import SequenceMatcher -from email import message_from_string -from heapq import nlargest -from typing import TYPE_CHECKING - -from babel import __version__ as VERSION -from babel.core import Locale, UnknownLocaleError -from babel.dates import format_datetime -from babel.messages.plurals import get_plural -from babel.util import LOCALTZ, FixedOffsetTimezone, _cmp, distinct - -if TYPE_CHECKING: - from typing_extensions import TypeAlias - - _MessageID: TypeAlias = str | tuple[str, ...] | list[str] - -__all__ = ['Message', 'Catalog', 'TranslationError'] - -def get_close_matches(word, possibilities, n=3, cutoff=0.6): - """A modified version of ``difflib.get_close_matches``. - - It just passes ``autojunk=False`` to the ``SequenceMatcher``, to work - around https://github.com/python/cpython/issues/90825. - """ - if not n > 0: # pragma: no cover - raise ValueError(f"n must be > 0: {n!r}") - if not 0.0 <= cutoff <= 1.0: # pragma: no cover - raise ValueError(f"cutoff must be in [0.0, 1.0]: {cutoff!r}") - result = [] - s = SequenceMatcher(autojunk=False) # only line changed from difflib.py - s.set_seq2(word) - for x in possibilities: - s.set_seq1(x) - if s.real_quick_ratio() >= cutoff and \ - s.quick_ratio() >= cutoff and \ - s.ratio() >= cutoff: - result.append((s.ratio(), x)) - - # Move the best scorers to head of list - result = nlargest(n, result) - # Strip scores for the best n matches - return [x for score, x in result] - - -PYTHON_FORMAT = re.compile(r''' - \% - (?:\(([\w]*)\))? - ( - [-#0\ +]?(?:\*|[\d]+)? - (?:\.(?:\*|[\d]+))? - [hlL]? - ) - ([diouxXeEfFgGcrs%]) -''', re.VERBOSE) - - -def _parse_datetime_header(value: str) -> datetime.datetime: - match = re.match(r'^(?P.*?)(?P[+-]\d{4})?$', value) - - dt = datetime.datetime.strptime(match.group('datetime'), '%Y-%m-%d %H:%M') - - # Separate the offset into a sign component, hours, and # minutes - tzoffset = match.group('tzoffset') - if tzoffset is not None: - plus_minus_s, rest = tzoffset[0], tzoffset[1:] - hours_offset_s, mins_offset_s = rest[:2], rest[2:] - - # Make them all integers - plus_minus = int(f"{plus_minus_s}1") - hours_offset = int(hours_offset_s) - mins_offset = int(mins_offset_s) - - # Calculate net offset - net_mins_offset = hours_offset * 60 - net_mins_offset += mins_offset - net_mins_offset *= plus_minus - - # Create an offset object - tzoffset = FixedOffsetTimezone(net_mins_offset) - - # Store the offset in a datetime object - dt = dt.replace(tzinfo=tzoffset) - - return dt - - -class Message: - """Representation of a single message in a catalog.""" - - def __init__( - self, - id: _MessageID, - string: _MessageID | None = '', - locations: Iterable[tuple[str, int]] = (), - flags: Iterable[str] = (), - auto_comments: Iterable[str] = (), - user_comments: Iterable[str] = (), - previous_id: _MessageID = (), - lineno: int | None = None, - context: str | None = None, - ) -> None: - """Create the message object. - - :param id: the message ID, or a ``(singular, plural)`` tuple for - pluralizable messages - :param string: the translated message string, or a - ``(singular, plural)`` tuple for pluralizable messages - :param locations: a sequence of ``(filename, lineno)`` tuples - :param flags: a set or sequence of flags - :param auto_comments: a sequence of automatic comments for the message - :param user_comments: a sequence of user comments for the message - :param previous_id: the previous message ID, or a ``(singular, plural)`` - tuple for pluralizable messages - :param lineno: the line number on which the msgid line was found in the - PO file, if any - :param context: the message context - """ - self.id = id - if not string and self.pluralizable: - string = ('', '') - self.string = string - self.locations = list(distinct(locations)) - self.flags = set(flags) - if id and self.python_format: - self.flags.add('python-format') - else: - self.flags.discard('python-format') - self.auto_comments = list(distinct(auto_comments)) - self.user_comments = list(distinct(user_comments)) - if isinstance(previous_id, str): - self.previous_id = [previous_id] - else: - self.previous_id = list(previous_id) - self.lineno = lineno - self.context = context - - def __repr__(self) -> str: - return f"<{type(self).__name__} {self.id!r} (flags: {list(self.flags)!r})>" - - def __cmp__(self, other: object) -> int: - """Compare Messages, taking into account plural ids""" - def values_to_compare(obj): - if isinstance(obj, Message) and obj.pluralizable: - return obj.id[0], obj.context or '' - return obj.id, obj.context or '' - return _cmp(values_to_compare(self), values_to_compare(other)) - - def __gt__(self, other: object) -> bool: - return self.__cmp__(other) > 0 - - def __lt__(self, other: object) -> bool: - return self.__cmp__(other) < 0 - - def __ge__(self, other: object) -> bool: - return self.__cmp__(other) >= 0 - - def __le__(self, other: object) -> bool: - return self.__cmp__(other) <= 0 - - def __eq__(self, other: object) -> bool: - return self.__cmp__(other) == 0 - - def __ne__(self, other: object) -> bool: - return self.__cmp__(other) != 0 - - def is_identical(self, other: Message) -> bool: - """Checks whether messages are identical, taking into account all - properties. - """ - assert isinstance(other, Message) - return self.__dict__ == other.__dict__ - - def clone(self) -> Message: - return Message(*map(copy, (self.id, self.string, self.locations, - self.flags, self.auto_comments, - self.user_comments, self.previous_id, - self.lineno, self.context))) - - def check(self, catalog: Catalog | None = None) -> list[TranslationError]: - """Run various validation checks on the message. Some validations - are only performed if the catalog is provided. This method returns - a sequence of `TranslationError` objects. - - :rtype: ``iterator`` - :param catalog: A catalog instance that is passed to the checkers - :see: `Catalog.check` for a way to perform checks for all messages - in a catalog. - """ - from babel.messages.checkers import checkers - errors: list[TranslationError] = [] - for checker in checkers: - try: - checker(catalog, self) - except TranslationError as e: - errors.append(e) - return errors - - @property - def fuzzy(self) -> bool: - """Whether the translation is fuzzy. - - >>> Message('foo').fuzzy - False - >>> msg = Message('foo', 'foo', flags=['fuzzy']) - >>> msg.fuzzy - True - >>> msg - - - :type: `bool`""" - return 'fuzzy' in self.flags - - @property - def pluralizable(self) -> bool: - """Whether the message is plurizable. - - >>> Message('foo').pluralizable - False - >>> Message(('foo', 'bar')).pluralizable - True - - :type: `bool`""" - return isinstance(self.id, (list, tuple)) - - @property - def python_format(self) -> bool: - """Whether the message contains Python-style parameters. - - >>> Message('foo %(name)s bar').python_format - True - >>> Message(('foo %(name)s', 'foo %(name)s')).python_format - True - - :type: `bool`""" - ids = self.id - if not isinstance(ids, (list, tuple)): - ids = [ids] - return any(PYTHON_FORMAT.search(id) for id in ids) - - -class TranslationError(Exception): - """Exception thrown by translation checkers when invalid message - translations are encountered.""" - - -DEFAULT_HEADER = """\ -# Translations template for PROJECT. -# Copyright (C) YEAR ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , YEAR. -#""" - - -def parse_separated_header(value: str) -> dict[str, str]: - # Adapted from https://peps.python.org/pep-0594/#cgi - from email.message import Message - m = Message() - m['content-type'] = value - return dict(m.get_params()) - - -class Catalog: - """Representation of a message catalog.""" - - def __init__( - self, - locale: str | Locale | None = None, - domain: str | None = None, - header_comment: str | None = DEFAULT_HEADER, - project: str | None = None, - version: str | None = None, - copyright_holder: str | None = None, - msgid_bugs_address: str | None = None, - creation_date: datetime.datetime | str | None = None, - revision_date: datetime.datetime | datetime.time | float | str | None = None, - last_translator: str | None = None, - language_team: str | None = None, - charset: str | None = None, - fuzzy: bool = True, - ) -> None: - """Initialize the catalog object. - - :param locale: the locale identifier or `Locale` object, or `None` - if the catalog is not bound to a locale (which basically - means it's a template) - :param domain: the message domain - :param header_comment: the header comment as string, or `None` for the - default header - :param project: the project's name - :param version: the project's version - :param copyright_holder: the copyright holder of the catalog - :param msgid_bugs_address: the email address or URL to submit bug - reports to - :param creation_date: the date the catalog was created - :param revision_date: the date the catalog was revised - :param last_translator: the name and email of the last translator - :param language_team: the name and email of the language team - :param charset: the encoding to use in the output (defaults to utf-8) - :param fuzzy: the fuzzy bit on the catalog header - """ - self.domain = domain - self.locale = locale - self._header_comment = header_comment - self._messages: OrderedDict[str | tuple[str, str], Message] = OrderedDict() - - self.project = project or 'PROJECT' - self.version = version or 'VERSION' - self.copyright_holder = copyright_holder or 'ORGANIZATION' - self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS' - - self.last_translator = last_translator or 'FULL NAME ' - """Name and email address of the last translator.""" - self.language_team = language_team or 'LANGUAGE ' - """Name and email address of the language team.""" - - self.charset = charset or 'utf-8' - - if creation_date is None: - creation_date = datetime.datetime.now(LOCALTZ) - elif isinstance(creation_date, datetime.datetime) and not creation_date.tzinfo: - creation_date = creation_date.replace(tzinfo=LOCALTZ) - self.creation_date = creation_date - if revision_date is None: - revision_date = 'YEAR-MO-DA HO:MI+ZONE' - elif isinstance(revision_date, datetime.datetime) and not revision_date.tzinfo: - revision_date = revision_date.replace(tzinfo=LOCALTZ) - self.revision_date = revision_date - self.fuzzy = fuzzy - - # Dictionary of obsolete messages - self.obsolete: OrderedDict[str | tuple[str, str], Message] = OrderedDict() - self._num_plurals = None - self._plural_expr = None - - def _set_locale(self, locale: Locale | str | None) -> None: - if locale is None: - self._locale_identifier = None - self._locale = None - return - - if isinstance(locale, Locale): - self._locale_identifier = str(locale) - self._locale = locale - return - - if isinstance(locale, str): - self._locale_identifier = str(locale) - try: - self._locale = Locale.parse(locale) - except UnknownLocaleError: - self._locale = None - return - - raise TypeError(f"`locale` must be a Locale, a locale identifier string, or None; got {locale!r}") - - def _get_locale(self) -> Locale | None: - return self._locale - - def _get_locale_identifier(self) -> str | None: - return self._locale_identifier - - locale = property(_get_locale, _set_locale) - locale_identifier = property(_get_locale_identifier) - - def _get_header_comment(self) -> str: - comment = self._header_comment - year = datetime.datetime.now(LOCALTZ).strftime('%Y') - if hasattr(self.revision_date, 'strftime'): - year = self.revision_date.strftime('%Y') - comment = comment.replace('PROJECT', self.project) \ - .replace('VERSION', self.version) \ - .replace('YEAR', year) \ - .replace('ORGANIZATION', self.copyright_holder) - locale_name = (self.locale.english_name if self.locale else self.locale_identifier) - if locale_name: - comment = comment.replace("Translations template", f"{locale_name} translations") - return comment - - def _set_header_comment(self, string: str | None) -> None: - self._header_comment = string - - header_comment = property(_get_header_comment, _set_header_comment, doc="""\ - The header comment for the catalog. - - >>> catalog = Catalog(project='Foobar', version='1.0', - ... copyright_holder='Foo Company') - >>> print(catalog.header_comment) #doctest: +ELLIPSIS - # Translations template for Foobar. - # Copyright (C) ... Foo Company - # This file is distributed under the same license as the Foobar project. - # FIRST AUTHOR , .... - # - - The header can also be set from a string. Any known upper-case variables - will be replaced when the header is retrieved again: - - >>> catalog = Catalog(project='Foobar', version='1.0', - ... copyright_holder='Foo Company') - >>> catalog.header_comment = '''\\ - ... # The POT for my really cool PROJECT project. - ... # Copyright (C) 1990-2003 ORGANIZATION - ... # This file is distributed under the same license as the PROJECT - ... # project. - ... #''' - >>> print(catalog.header_comment) - # The POT for my really cool Foobar project. - # Copyright (C) 1990-2003 Foo Company - # This file is distributed under the same license as the Foobar - # project. - # - - :type: `unicode` - """) - - def _get_mime_headers(self) -> list[tuple[str, str]]: - headers: list[tuple[str, str]] = [] - headers.append(("Project-Id-Version", f"{self.project} {self.version}")) - headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address)) - headers.append(('POT-Creation-Date', - format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ', - locale='en'))) - if isinstance(self.revision_date, (datetime.datetime, datetime.time, int, float)): - headers.append(('PO-Revision-Date', - format_datetime(self.revision_date, - 'yyyy-MM-dd HH:mmZ', locale='en'))) - else: - headers.append(('PO-Revision-Date', self.revision_date)) - headers.append(('Last-Translator', self.last_translator)) - if self.locale_identifier: - headers.append(('Language', str(self.locale_identifier))) - if self.locale_identifier and ('LANGUAGE' in self.language_team): - headers.append(('Language-Team', - self.language_team.replace('LANGUAGE', - str(self.locale_identifier)))) - else: - headers.append(('Language-Team', self.language_team)) - if self.locale is not None: - headers.append(('Plural-Forms', self.plural_forms)) - headers.append(('MIME-Version', '1.0')) - headers.append(("Content-Type", f"text/plain; charset={self.charset}")) - headers.append(('Content-Transfer-Encoding', '8bit')) - headers.append(("Generated-By", f"Babel {VERSION}\n")) - return headers - - def _force_text(self, s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') -> str: - if isinstance(s, str): - return s - if isinstance(s, bytes): - return s.decode(encoding, errors) - return str(s) - - def _set_mime_headers(self, headers: Iterable[tuple[str, str]]) -> None: - for name, value in headers: - name = self._force_text(name.lower(), encoding=self.charset) - value = self._force_text(value, encoding=self.charset) - if name == 'project-id-version': - parts = value.split(' ') - self.project = ' '.join(parts[:-1]) - self.version = parts[-1] - elif name == 'report-msgid-bugs-to': - self.msgid_bugs_address = value - elif name == 'last-translator': - self.last_translator = value - elif name == 'language': - value = value.replace('-', '_') - self._set_locale(value) - elif name == 'language-team': - self.language_team = value - elif name == 'content-type': - params = parse_separated_header(value) - if 'charset' in params: - self.charset = params['charset'].lower() - elif name == 'plural-forms': - params = parse_separated_header(f" ;{value}") - self._num_plurals = int(params.get('nplurals', 2)) - self._plural_expr = params.get('plural', '(n != 1)') - elif name == 'pot-creation-date': - self.creation_date = _parse_datetime_header(value) - elif name == 'po-revision-date': - # Keep the value if it's not the default one - if 'YEAR' not in value: - self.revision_date = _parse_datetime_header(value) - - mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\ - The MIME headers of the catalog, used for the special ``msgid ""`` entry. - - The behavior of this property changes slightly depending on whether a locale - is set or not, the latter indicating that the catalog is actually a template - for actual translations. - - Here's an example of the output for such a catalog template: - - >>> from babel.dates import UTC - >>> from datetime import datetime - >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC) - >>> catalog = Catalog(project='Foobar', version='1.0', - ... creation_date=created) - >>> for name, value in catalog.mime_headers: - ... print('%s: %s' % (name, value)) - Project-Id-Version: Foobar 1.0 - Report-Msgid-Bugs-To: EMAIL@ADDRESS - POT-Creation-Date: 1990-04-01 15:30+0000 - PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE - Last-Translator: FULL NAME - Language-Team: LANGUAGE - MIME-Version: 1.0 - Content-Type: text/plain; charset=utf-8 - Content-Transfer-Encoding: 8bit - Generated-By: Babel ... - - And here's an example of the output when the locale is set: - - >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC) - >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0', - ... creation_date=created, revision_date=revised, - ... last_translator='John Doe ', - ... language_team='de_DE ') - >>> for name, value in catalog.mime_headers: - ... print('%s: %s' % (name, value)) - Project-Id-Version: Foobar 1.0 - Report-Msgid-Bugs-To: EMAIL@ADDRESS - POT-Creation-Date: 1990-04-01 15:30+0000 - PO-Revision-Date: 1990-08-03 12:00+0000 - Last-Translator: John Doe - Language: de_DE - Language-Team: de_DE - Plural-Forms: nplurals=2; plural=(n != 1); - MIME-Version: 1.0 - Content-Type: text/plain; charset=utf-8 - Content-Transfer-Encoding: 8bit - Generated-By: Babel ... - - :type: `list` - """) - - @property - def num_plurals(self) -> int: - """The number of plurals used by the catalog or locale. - - >>> Catalog(locale='en').num_plurals - 2 - >>> Catalog(locale='ga').num_plurals - 5 - - :type: `int`""" - if self._num_plurals is None: - num = 2 - if self.locale: - num = get_plural(self.locale)[0] - self._num_plurals = num - return self._num_plurals - - @property - def plural_expr(self) -> str: - """The plural expression used by the catalog or locale. - - >>> Catalog(locale='en').plural_expr - '(n != 1)' - >>> Catalog(locale='ga').plural_expr - '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)' - >>> Catalog(locale='ding').plural_expr # unknown locale - '(n != 1)' - - :type: `str`""" - if self._plural_expr is None: - expr = '(n != 1)' - if self.locale: - expr = get_plural(self.locale)[1] - self._plural_expr = expr - return self._plural_expr - - @property - def plural_forms(self) -> str: - """Return the plural forms declaration for the locale. - - >>> Catalog(locale='en').plural_forms - 'nplurals=2; plural=(n != 1);' - >>> Catalog(locale='pt_BR').plural_forms - 'nplurals=2; plural=(n > 1);' - - :type: `str`""" - return f"nplurals={self.num_plurals}; plural={self.plural_expr};" - - def __contains__(self, id: _MessageID) -> bool: - """Return whether the catalog has a message with the specified ID.""" - return self._key_for(id) in self._messages - - def __len__(self) -> int: - """The number of messages in the catalog. - - This does not include the special ``msgid ""`` entry.""" - return len(self._messages) - - def __iter__(self) -> Iterator[Message]: - """Iterates through all the entries in the catalog, in the order they - were added, yielding a `Message` object for every entry. - - :rtype: ``iterator``""" - buf = [] - for name, value in self.mime_headers: - buf.append(f"{name}: {value}") - flags = set() - if self.fuzzy: - flags |= {'fuzzy'} - yield Message('', '\n'.join(buf), flags=flags) - for key in self._messages: - yield self._messages[key] - - def __repr__(self) -> str: - locale = '' - if self.locale: - locale = f" {self.locale}" - return f"<{type(self).__name__} {self.domain!r}{locale}>" - - def __delitem__(self, id: _MessageID) -> None: - """Delete the message with the specified ID.""" - self.delete(id) - - def __getitem__(self, id: _MessageID) -> Message: - """Return the message with the specified ID. - - :param id: the message ID - """ - return self.get(id) - - def __setitem__(self, id: _MessageID, message: Message) -> None: - """Add or update the message with the specified ID. - - >>> catalog = Catalog() - >>> catalog[u'foo'] = Message(u'foo') - >>> catalog[u'foo'] - - - If a message with that ID is already in the catalog, it is updated - to include the locations and flags of the new message. - - >>> catalog = Catalog() - >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)]) - >>> catalog[u'foo'].locations - [('main.py', 1)] - >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)]) - >>> catalog[u'foo'].locations - [('main.py', 1), ('utils.py', 5)] - - :param id: the message ID - :param message: the `Message` object - """ - assert isinstance(message, Message), 'expected a Message object' - key = self._key_for(id, message.context) - current = self._messages.get(key) - if current: - if message.pluralizable and not current.pluralizable: - # The new message adds pluralization - current.id = message.id - current.string = message.string - current.locations = list(distinct(current.locations + - message.locations)) - current.auto_comments = list(distinct(current.auto_comments + - message.auto_comments)) - current.user_comments = list(distinct(current.user_comments + - message.user_comments)) - current.flags |= message.flags - message = current - elif id == '': - # special treatment for the header message - self.mime_headers = message_from_string(message.string).items() - self.header_comment = "\n".join([f"# {c}".rstrip() for c in message.user_comments]) - self.fuzzy = message.fuzzy - else: - if isinstance(id, (list, tuple)): - assert isinstance(message.string, (list, tuple)), \ - f"Expected sequence but got {type(message.string)}" - self._messages[key] = message - - def add( - self, - id: _MessageID, - string: _MessageID | None = None, - locations: Iterable[tuple[str, int]] = (), - flags: Iterable[str] = (), - auto_comments: Iterable[str] = (), - user_comments: Iterable[str] = (), - previous_id: _MessageID = (), - lineno: int | None = None, - context: str | None = None, - ) -> Message: - """Add or update the message with the specified ID. - - >>> catalog = Catalog() - >>> catalog.add(u'foo') - - >>> catalog[u'foo'] - - - This method simply constructs a `Message` object with the given - arguments and invokes `__setitem__` with that object. - - :param id: the message ID, or a ``(singular, plural)`` tuple for - pluralizable messages - :param string: the translated message string, or a - ``(singular, plural)`` tuple for pluralizable messages - :param locations: a sequence of ``(filename, lineno)`` tuples - :param flags: a set or sequence of flags - :param auto_comments: a sequence of automatic comments - :param user_comments: a sequence of user comments - :param previous_id: the previous message ID, or a ``(singular, plural)`` - tuple for pluralizable messages - :param lineno: the line number on which the msgid line was found in the - PO file, if any - :param context: the message context - """ - message = Message(id, string, list(locations), flags, auto_comments, - user_comments, previous_id, lineno=lineno, - context=context) - self[id] = message - return message - - def check(self) -> Iterable[tuple[Message, list[TranslationError]]]: - """Run various validation checks on the translations in the catalog. - - For every message which fails validation, this method yield a - ``(message, errors)`` tuple, where ``message`` is the `Message` object - and ``errors`` is a sequence of `TranslationError` objects. - - :rtype: ``generator`` of ``(message, errors)`` - """ - for message in self._messages.values(): - errors = message.check(catalog=self) - if errors: - yield message, errors - - def get(self, id: _MessageID, context: str | None = None) -> Message | None: - """Return the message with the specified ID and context. - - :param id: the message ID - :param context: the message context, or ``None`` for no context - """ - return self._messages.get(self._key_for(id, context)) - - def delete(self, id: _MessageID, context: str | None = None) -> None: - """Delete the message with the specified ID and context. - - :param id: the message ID - :param context: the message context, or ``None`` for no context - """ - key = self._key_for(id, context) - if key in self._messages: - del self._messages[key] - - def update( - self, - template: Catalog, - no_fuzzy_matching: bool = False, - update_header_comment: bool = False, - keep_user_comments: bool = True, - update_creation_date: bool = True, - ) -> None: - """Update the catalog based on the given template catalog. - - >>> from babel.messages import Catalog - >>> template = Catalog() - >>> template.add('green', locations=[('main.py', 99)]) - - >>> template.add('blue', locations=[('main.py', 100)]) - - >>> template.add(('salad', 'salads'), locations=[('util.py', 42)]) - - >>> catalog = Catalog(locale='de_DE') - >>> catalog.add('blue', u'blau', locations=[('main.py', 98)]) - - >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)]) - - >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'), - ... locations=[('util.py', 38)]) - - - >>> catalog.update(template) - >>> len(catalog) - 3 - - >>> msg1 = catalog['green'] - >>> msg1.string - >>> msg1.locations - [('main.py', 99)] - - >>> msg2 = catalog['blue'] - >>> msg2.string - u'blau' - >>> msg2.locations - [('main.py', 100)] - - >>> msg3 = catalog['salad'] - >>> msg3.string - (u'Salat', u'Salate') - >>> msg3.locations - [('util.py', 42)] - - Messages that are in the catalog but not in the template are removed - from the main collection, but can still be accessed via the `obsolete` - member: - - >>> 'head' in catalog - False - >>> list(catalog.obsolete.values()) - [] - - :param template: the reference catalog, usually read from a POT file - :param no_fuzzy_matching: whether to use fuzzy matching of message IDs - """ - messages = self._messages - remaining = messages.copy() - self._messages = OrderedDict() - - # Prepare for fuzzy matching - fuzzy_candidates = {} - if not no_fuzzy_matching: - for msgid in messages: - if msgid and messages[msgid].string: - key = self._key_for(msgid) - ctxt = messages[msgid].context - fuzzy_candidates[self._to_fuzzy_match_key(key)] = (key, ctxt) - fuzzy_matches = set() - - def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: - message = message.clone() - fuzzy = False - if oldkey != newkey: - fuzzy = True - fuzzy_matches.add(oldkey) - oldmsg = messages.get(oldkey) - assert oldmsg is not None - if isinstance(oldmsg.id, str): - message.previous_id = [oldmsg.id] - else: - message.previous_id = list(oldmsg.id) - else: - oldmsg = remaining.pop(oldkey, None) - assert oldmsg is not None - message.string = oldmsg.string - - if keep_user_comments: - message.user_comments = list(distinct(oldmsg.user_comments)) - - if isinstance(message.id, (list, tuple)): - if not isinstance(message.string, (list, tuple)): - fuzzy = True - message.string = tuple( - [message.string] + ([''] * (len(message.id) - 1)), - ) - elif len(message.string) != self.num_plurals: - fuzzy = True - message.string = tuple(message.string[:len(oldmsg.string)]) - elif isinstance(message.string, (list, tuple)): - fuzzy = True - message.string = message.string[0] - message.flags |= oldmsg.flags - if fuzzy: - message.flags |= {'fuzzy'} - self[message.id] = message - - for message in template: - if message.id: - key = self._key_for(message.id, message.context) - if key in messages: - _merge(message, key, key) - else: - if not no_fuzzy_matching: - # do some fuzzy matching with difflib - matches = get_close_matches( - self._to_fuzzy_match_key(key), - fuzzy_candidates.keys(), - 1, - ) - if matches: - modified_key = matches[0] - newkey, newctxt = fuzzy_candidates[modified_key] - if newctxt is not None: - newkey = newkey, newctxt - _merge(message, newkey, key) - continue - - self[message.id] = message - - for msgid in remaining: - if no_fuzzy_matching or msgid not in fuzzy_matches: - self.obsolete[msgid] = remaining[msgid] - - if update_header_comment: - # Allow the updated catalog's header to be rewritten based on the - # template's header - self.header_comment = template.header_comment - - # Make updated catalog's POT-Creation-Date equal to the template - # used to update the catalog - if update_creation_date: - self.creation_date = template.creation_date - - def _to_fuzzy_match_key(self, key: tuple[str, str] | str) -> str: - """Converts a message key to a string suitable for fuzzy matching.""" - if isinstance(key, tuple): - matchkey = key[0] # just the msgid, no context - else: - matchkey = key - return matchkey.lower().strip() - - def _key_for(self, id: _MessageID, context: str | None = None) -> tuple[str, str] | str: - """The key for a message is just the singular ID even for pluralizable - messages, but is a ``(msgid, msgctxt)`` tuple for context-specific - messages. - """ - key = id - if isinstance(key, (list, tuple)): - key = id[0] - if context is not None: - key = (key, context) - return key - - def is_identical(self, other: Catalog) -> bool: - """Checks if catalogs are identical, taking into account messages and - headers. - """ - assert isinstance(other, Catalog) - for key in self._messages.keys() | other._messages.keys(): - message_1 = self.get(key) - message_2 = other.get(key) - if ( - message_1 is None - or message_2 is None - or not message_1.is_identical(message_2) - ): - return False - return dict(self.mime_headers) == dict(other.mime_headers)