1 # -*- coding: utf-8 -*-
6 Data structures for message catalogs.
8 :copyright: (c) 2013 by the Babel Team.
9 :license: BSD, see LICENSE for more details.
15 from cgi import parse_header
16 from datetime import datetime, time as time_
17 from difflib import get_close_matches
18 from email import message_from_string
21 from babel import __version__ as VERSION
22 from babel.core import Locale
23 from babel.dates import format_datetime
24 from babel.messages.plurals import get_plural
25 from babel.util import odict, distinct, LOCALTZ, FixedOffsetTimezone
26 from babel._compat import string_types, number_types, PY2, cmp
28 __all__ = ['Message', 'Catalog', 'TranslationError']
31 PYTHON_FORMAT = re.compile(r'''(?x)
35 [-#0\ +]?(?:\*|[\d]+)?
43 def _parse_datetime_header(value):
44 match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
46 tt = time.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
48 dt = datetime.fromtimestamp(ts)
50 # Separate the offset into a sign component, hours, and # minutes
51 tzoffset = match.group('tzoffset')
52 if tzoffset is not None:
53 plus_minus_s, rest = tzoffset[0], tzoffset[1:]
54 hours_offset_s, mins_offset_s = rest[:2], rest[2:]
56 # Make them all integers
57 plus_minus = int(plus_minus_s + '1')
58 hours_offset = int(hours_offset_s)
59 mins_offset = int(mins_offset_s)
61 # Calculate net offset
62 net_mins_offset = hours_offset * 60
63 net_mins_offset += mins_offset
64 net_mins_offset *= plus_minus
66 # Create an offset object
67 tzoffset = FixedOffsetTimezone(net_mins_offset)
69 # Store the offset in a datetime object
70 dt = dt.replace(tzinfo=tzoffset)
75 class Message(object):
76 """Representation of a single message in a catalog."""
78 def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
79 user_comments=(), previous_id=(), lineno=None, context=None):
80 """Create the message object.
82 :param id: the message ID, or a ``(singular, plural)`` tuple for
84 :param string: the translated message string, or a
85 ``(singular, plural)`` tuple for pluralizable messages
86 :param locations: a sequence of ``(filenname, lineno)`` tuples
87 :param flags: a set or sequence of flags
88 :param auto_comments: a sequence of automatic comments for the message
89 :param user_comments: a sequence of user comments for the message
90 :param previous_id: the previous message ID, or a ``(singular, plural)``
91 tuple for pluralizable messages
92 :param lineno: the line number on which the msgid line was found in the
94 :param context: the message context
97 if not string and self.pluralizable:
100 self.locations = list(distinct(locations))
101 self.flags = set(flags)
102 if id and self.python_format:
103 self.flags.add('python-format')
105 self.flags.discard('python-format')
106 self.auto_comments = list(distinct(auto_comments))
107 self.user_comments = list(distinct(user_comments))
108 if isinstance(previous_id, string_types):
109 self.previous_id = [previous_id]
111 self.previous_id = list(previous_id)
113 self.context = context
116 return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
119 def __cmp__(self, obj):
120 """Compare Messages, taking into account plural ids"""
121 def values_to_compare():
122 if isinstance(obj, Message):
123 plural = self.pluralizable
124 obj_plural = obj.pluralizable
125 if plural and obj_plural:
126 return self.id[0], obj.id[0]
128 return self.id[0], obj.id
130 return self.id, obj.id[0]
131 return self.id, obj.id
132 this, other = values_to_compare()
133 return cmp(this, other)
135 def __gt__(self, other):
136 return self.__cmp__(other) > 0
138 def __lt__(self, other):
139 return self.__cmp__(other) < 0
141 def __ge__(self, other):
142 return self.__cmp__(other) >= 0
144 def __le__(self, other):
145 return self.__cmp__(other) <= 0
147 def __eq__(self, other):
148 return self.__cmp__(other) == 0
150 def __ne__(self, other):
151 return self.__cmp__(other) != 0
154 return Message(*map(copy, (self.id, self.string, self.locations,
155 self.flags, self.auto_comments,
156 self.user_comments, self.previous_id,
157 self.lineno, self.context)))
159 def check(self, catalog=None):
160 """Run various validation checks on the message. Some validations
161 are only performed if the catalog is provided. This method returns
162 a sequence of `TranslationError` objects.
165 :param catalog: A catalog instance that is passed to the checkers
166 :see: `Catalog.check` for a way to perform checks for all messages
169 from babel.messages.checkers import checkers
171 for checker in checkers:
173 checker(catalog, self)
174 except TranslationError as e:
180 """Whether the translation is fuzzy.
182 >>> Message('foo').fuzzy
184 >>> msg = Message('foo', 'foo', flags=['fuzzy'])
188 <Message 'foo' (flags: ['fuzzy'])>
191 return 'fuzzy' in self.flags
194 def pluralizable(self):
195 """Whether the message is plurizable.
197 >>> Message('foo').pluralizable
199 >>> Message(('foo', 'bar')).pluralizable
203 return isinstance(self.id, (list, tuple))
206 def python_format(self):
207 """Whether the message contains Python-style parameters.
209 >>> Message('foo %(name)s bar').python_format
211 >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
216 if not isinstance(ids, (list, tuple)):
218 return any(PYTHON_FORMAT.search(id) for id in ids)
221 class TranslationError(Exception):
222 """Exception thrown by translation checkers when invalid message
223 translations are encountered."""
226 DEFAULT_HEADER = u"""\
227 # Translations template for PROJECT.
228 # Copyright (C) YEAR ORGANIZATION
229 # This file is distributed under the same license as the PROJECT project.
230 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
235 def _parse_header(header_string):
236 # message_from_string only works for str, not for unicode
237 headers = message_from_string(header_string.encode('utf8'))
239 for name, value in headers.items():
240 name = name.decode('utf8')
241 value = value.decode('utf8')
242 decoded_headers[name] = value
243 return decoded_headers
246 _parse_header = message_from_string
249 class Catalog(object):
250 """Representation of a message catalog."""
252 def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
253 project=None, version=None, copyright_holder=None,
254 msgid_bugs_address=None, creation_date=None,
255 revision_date=None, last_translator=None, language_team=None,
256 charset=None, fuzzy=True):
257 """Initialize the catalog object.
259 :param locale: the locale identifier or `Locale` object, or `None`
260 if the catalog is not bound to a locale (which basically
261 means it's a template)
262 :param domain: the message domain
263 :param header_comment: the header comment as string, or `None` for the
265 :param project: the project's name
266 :param version: the project's version
267 :param copyright_holder: the copyright holder of the catalog
268 :param msgid_bugs_address: the email address or URL to submit bug
270 :param creation_date: the date the catalog was created
271 :param revision_date: the date the catalog was revised
272 :param last_translator: the name and email of the last translator
273 :param language_team: the name and email of the language team
274 :param charset: the encoding to use in the output (defaults to utf-8)
275 :param fuzzy: the fuzzy bit on the catalog header
279 locale = Locale.parse(locale)
281 self._header_comment = header_comment
282 self._messages = odict()
284 self.project = project or 'PROJECT'
285 self.version = version or 'VERSION'
286 self.copyright_holder = copyright_holder or 'ORGANIZATION'
287 self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
289 self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
290 """Name and email address of the last translator."""
291 self.language_team = language_team or 'LANGUAGE <LL@li.org>'
292 """Name and email address of the language team."""
294 self.charset = charset or 'utf-8'
296 if creation_date is None:
297 creation_date = datetime.now(LOCALTZ)
298 elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
299 creation_date = creation_date.replace(tzinfo=LOCALTZ)
300 self.creation_date = creation_date
301 if revision_date is None:
302 revision_date = 'YEAR-MO-DA HO:MI+ZONE'
303 elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
304 revision_date = revision_date.replace(tzinfo=LOCALTZ)
305 self.revision_date = revision_date
308 self.obsolete = odict() # Dictionary of obsolete messages
309 self._num_plurals = None
310 self._plural_expr = None
312 def _get_header_comment(self):
313 comment = self._header_comment
314 year = datetime.now(LOCALTZ).strftime('%Y')
315 if hasattr(self.revision_date, 'strftime'):
316 year = self.revision_date.strftime('%Y')
317 comment = comment.replace('PROJECT', self.project) \
318 .replace('VERSION', self.version) \
319 .replace('YEAR', year) \
320 .replace('ORGANIZATION', self.copyright_holder)
322 comment = comment.replace('Translations template', '%s translations'
323 % self.locale.english_name)
326 def _set_header_comment(self, string):
327 self._header_comment = string
329 header_comment = property(_get_header_comment, _set_header_comment, doc="""\
330 The header comment for the catalog.
332 >>> catalog = Catalog(project='Foobar', version='1.0',
333 ... copyright_holder='Foo Company')
334 >>> print(catalog.header_comment) #doctest: +ELLIPSIS
335 # Translations template for Foobar.
336 # Copyright (C) ... Foo Company
337 # This file is distributed under the same license as the Foobar project.
338 # FIRST AUTHOR <EMAIL@ADDRESS>, ....
341 The header can also be set from a string. Any known upper-case variables
342 will be replaced when the header is retrieved again:
344 >>> catalog = Catalog(project='Foobar', version='1.0',
345 ... copyright_holder='Foo Company')
346 >>> catalog.header_comment = '''\\
347 ... # The POT for my really cool PROJECT project.
348 ... # Copyright (C) 1990-2003 ORGANIZATION
349 ... # This file is distributed under the same license as the PROJECT
352 >>> print(catalog.header_comment)
353 # The POT for my really cool Foobar project.
354 # Copyright (C) 1990-2003 Foo Company
355 # This file is distributed under the same license as the Foobar
362 def _get_mime_headers(self):
364 headers.append(('Project-Id-Version',
365 '%s %s' % (self.project, self.version)))
366 headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
367 headers.append(('POT-Creation-Date',
368 format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
370 if isinstance(self.revision_date, (datetime, time_) + number_types):
371 headers.append(('PO-Revision-Date',
372 format_datetime(self.revision_date,
373 'yyyy-MM-dd HH:mmZ', locale='en')))
375 headers.append(('PO-Revision-Date', self.revision_date))
376 headers.append(('Last-Translator', self.last_translator))
377 if self.locale is not None:
378 headers.append(('Language', str(self.locale)))
379 if (self.locale is not None) and ('LANGUAGE' in self.language_team):
380 headers.append(('Language-Team',
381 self.language_team.replace('LANGUAGE',
384 headers.append(('Language-Team', self.language_team))
385 if self.locale is not None:
386 headers.append(('Plural-Forms', self.plural_forms))
387 headers.append(('MIME-Version', '1.0'))
388 headers.append(('Content-Type',
389 'text/plain; charset=%s' % self.charset))
390 headers.append(('Content-Transfer-Encoding', '8bit'))
391 headers.append(('Generated-By', 'Babel %s\n' % VERSION))
394 def _set_mime_headers(self, headers):
395 for name, value in headers:
397 if name == 'project-id-version':
398 parts = value.split(' ')
399 self.project = u' '.join(parts[:-1])
400 self.version = parts[-1]
401 elif name == 'report-msgid-bugs-to':
402 self.msgid_bugs_address = value
403 elif name == 'last-translator':
404 self.last_translator = value
405 elif name == 'language-team':
406 self.language_team = value
407 elif name == 'content-type':
408 mimetype, params = parse_header(value)
409 if 'charset' in params:
410 self.charset = params['charset'].lower()
411 elif name == 'plural-forms':
412 _, params = parse_header(' ;' + value)
413 self._num_plurals = int(params.get('nplurals', 2))
414 self._plural_expr = params.get('plural', '(n != 1)')
415 elif name == 'pot-creation-date':
416 self.creation_date = _parse_datetime_header(value)
417 elif name == 'po-revision-date':
418 # Keep the value if it's not the default one
419 if 'YEAR' not in value:
420 self.revision_date = _parse_datetime_header(value)
422 mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
423 The MIME headers of the catalog, used for the special ``msgid ""`` entry.
425 The behavior of this property changes slightly depending on whether a locale
426 is set or not, the latter indicating that the catalog is actually a template
427 for actual translations.
429 Here's an example of the output for such a catalog template:
431 >>> from babel.dates import UTC
432 >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
433 >>> catalog = Catalog(project='Foobar', version='1.0',
434 ... creation_date=created)
435 >>> for name, value in catalog.mime_headers:
436 ... print('%s: %s' % (name, value))
437 Project-Id-Version: Foobar 1.0
438 Report-Msgid-Bugs-To: EMAIL@ADDRESS
439 POT-Creation-Date: 1990-04-01 15:30+0000
440 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
441 Last-Translator: FULL NAME <EMAIL@ADDRESS>
442 Language-Team: LANGUAGE <LL@li.org>
444 Content-Type: text/plain; charset=utf-8
445 Content-Transfer-Encoding: 8bit
446 Generated-By: Babel ...
448 And here's an example of the output when the locale is set:
450 >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
451 >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
452 ... creation_date=created, revision_date=revised,
453 ... last_translator='John Doe <jd@example.com>',
454 ... language_team='de_DE <de@example.com>')
455 >>> for name, value in catalog.mime_headers:
456 ... print('%s: %s' % (name, value))
457 Project-Id-Version: Foobar 1.0
458 Report-Msgid-Bugs-To: EMAIL@ADDRESS
459 POT-Creation-Date: 1990-04-01 15:30+0000
460 PO-Revision-Date: 1990-08-03 12:00+0000
461 Last-Translator: John Doe <jd@example.com>
463 Language-Team: de_DE <de@example.com>
464 Plural-Forms: nplurals=2; plural=(n != 1)
466 Content-Type: text/plain; charset=utf-8
467 Content-Transfer-Encoding: 8bit
468 Generated-By: Babel ...
474 def num_plurals(self):
475 """The number of plurals used by the catalog or locale.
477 >>> Catalog(locale='en').num_plurals
479 >>> Catalog(locale='ga').num_plurals
483 if self._num_plurals is None:
486 num = get_plural(self.locale)[0]
487 self._num_plurals = num
488 return self._num_plurals
491 def plural_expr(self):
492 """The plural expression used by the catalog or locale.
494 >>> Catalog(locale='en').plural_expr
496 >>> Catalog(locale='ga').plural_expr
497 '(n==1 ? 0 : n==2 ? 1 : 2)'
499 :type: `string_types`"""
500 if self._plural_expr is None:
503 expr = get_plural(self.locale)[1]
504 self._plural_expr = expr
505 return self._plural_expr
508 def plural_forms(self):
509 """Return the plural forms declaration for the locale.
511 >>> Catalog(locale='en').plural_forms
512 'nplurals=2; plural=(n != 1)'
513 >>> Catalog(locale='pt_BR').plural_forms
514 'nplurals=2; plural=(n > 1)'
517 return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
519 def __contains__(self, id):
520 """Return whether the catalog has a message with the specified ID."""
521 return self._key_for(id) in self._messages
524 """The number of messages in the catalog.
526 This does not include the special ``msgid ""`` entry."""
527 return len(self._messages)
530 """Iterates through all the entries in the catalog, in the order they
531 were added, yielding a `Message` object for every entry.
533 :rtype: ``iterator``"""
535 for name, value in self.mime_headers:
536 buf.append('%s: %s' % (name, value))
539 flags |= set(['fuzzy'])
540 yield Message(u'', '\n'.join(buf), flags=flags)
541 for key in self._messages:
542 yield self._messages[key]
547 locale = ' %s' % self.locale
548 return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
550 def __delitem__(self, id):
551 """Delete the message with the specified ID."""
554 def __getitem__(self, id):
555 """Return the message with the specified ID.
557 :param id: the message ID
561 def __setitem__(self, id, message):
562 """Add or update the message with the specified ID.
564 >>> catalog = Catalog()
565 >>> catalog[u'foo'] = Message(u'foo')
567 <Message u'foo' (flags: [])>
569 If a message with that ID is already in the catalog, it is updated
570 to include the locations and flags of the new message.
572 >>> catalog = Catalog()
573 >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
574 >>> catalog[u'foo'].locations
576 >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
577 >>> catalog[u'foo'].locations
578 [('main.py', 1), ('utils.py', 5)]
580 :param id: the message ID
581 :param message: the `Message` object
583 assert isinstance(message, Message), 'expected a Message object'
584 key = self._key_for(id, message.context)
585 current = self._messages.get(key)
587 if message.pluralizable and not current.pluralizable:
588 # The new message adds pluralization
589 current.id = message.id
590 current.string = message.string
591 current.locations = list(distinct(current.locations +
593 current.auto_comments = list(distinct(current.auto_comments +
594 message.auto_comments))
595 current.user_comments = list(distinct(current.user_comments +
596 message.user_comments))
597 current.flags |= message.flags
600 # special treatment for the header message
601 self.mime_headers = _parse_header(message.string).items()
602 self.header_comment = '\n'.join([('# %s' % c).rstrip() for c
603 in message.user_comments])
604 self.fuzzy = message.fuzzy
606 if isinstance(id, (list, tuple)):
607 assert isinstance(message.string, (list, tuple)), \
608 'Expected sequence but got %s' % type(message.string)
609 self._messages[key] = message
611 def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
612 user_comments=(), previous_id=(), lineno=None, context=None):
613 """Add or update the message with the specified ID.
615 >>> catalog = Catalog()
616 >>> catalog.add(u'foo')
619 <Message u'foo' (flags: [])>
621 This method simply constructs a `Message` object with the given
622 arguments and invokes `__setitem__` with that object.
624 :param id: the message ID, or a ``(singular, plural)`` tuple for
625 pluralizable messages
626 :param string: the translated message string, or a
627 ``(singular, plural)`` tuple for pluralizable messages
628 :param locations: a sequence of ``(filenname, lineno)`` tuples
629 :param flags: a set or sequence of flags
630 :param auto_comments: a sequence of automatic comments
631 :param user_comments: a sequence of user comments
632 :param previous_id: the previous message ID, or a ``(singular, plural)``
633 tuple for pluralizable messages
634 :param lineno: the line number on which the msgid line was found in the
636 :param context: the message context
638 message = Message(id, string, list(locations), flags, auto_comments,
639 user_comments, previous_id, lineno=lineno,
645 """Run various validation checks on the translations in the catalog.
647 For every message which fails validation, this method yield a
648 ``(message, errors)`` tuple, where ``message`` is the `Message` object
649 and ``errors`` is a sequence of `TranslationError` objects.
653 for message in self._messages.values():
654 errors = message.check(catalog=self)
656 yield message, errors
658 def get(self, id, context=None):
659 """Return the message with the specified ID and context.
661 :param id: the message ID
662 :param context: the message context, or ``None`` for no context
664 return self._messages.get(self._key_for(id, context))
666 def delete(self, id, context=None):
667 """Delete the message with the specified ID and context.
669 :param id: the message ID
670 :param context: the message context, or ``None`` for no context
672 key = self._key_for(id, context)
673 if key in self._messages:
674 del self._messages[key]
676 def update(self, template, no_fuzzy_matching=False, update_header_comment=False):
677 """Update the catalog based on the given template catalog.
679 >>> from babel.messages import Catalog
680 >>> template = Catalog()
681 >>> template.add('green', locations=[('main.py', 99)])
683 >>> template.add('blue', locations=[('main.py', 100)])
685 >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
687 >>> catalog = Catalog(locale='de_DE')
688 >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
690 >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
692 >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
693 ... locations=[('util.py', 38)])
696 >>> catalog.update(template)
700 >>> msg1 = catalog['green']
705 >>> msg2 = catalog['blue']
711 >>> msg3 = catalog['salad']
713 (u'Salat', u'Salate')
717 Messages that are in the catalog but not in the template are removed
718 from the main collection, but can still be accessed via the `obsolete`
721 >>> 'head' in catalog
723 >>> list(catalog.obsolete.values())
724 [<Message 'head' (flags: [])>]
726 :param template: the reference catalog, usually read from a POT file
727 :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
729 messages = self._messages
730 remaining = messages.copy()
731 self._messages = odict()
733 # Prepare for fuzzy matching
734 fuzzy_candidates = []
735 if not no_fuzzy_matching:
736 fuzzy_candidates = dict([
737 (self._key_for(msgid), messages[msgid].context)
738 for msgid in messages if msgid and messages[msgid].string
740 fuzzy_matches = set()
742 def _merge(message, oldkey, newkey):
743 message = message.clone()
747 fuzzy_matches.add(oldkey)
748 oldmsg = messages.get(oldkey)
749 if isinstance(oldmsg.id, string_types):
750 message.previous_id = [oldmsg.id]
752 message.previous_id = list(oldmsg.id)
754 oldmsg = remaining.pop(oldkey, None)
755 message.string = oldmsg.string
756 if isinstance(message.id, (list, tuple)):
757 if not isinstance(message.string, (list, tuple)):
759 message.string = tuple(
760 [message.string] + ([u''] * (len(message.id) - 1))
762 elif len(message.string) != self.num_plurals:
764 message.string = tuple(message.string[:len(oldmsg.string)])
765 elif isinstance(message.string, (list, tuple)):
767 message.string = message.string[0]
768 message.flags |= oldmsg.flags
770 message.flags |= set([u'fuzzy'])
771 self[message.id] = message
773 for message in template:
775 key = self._key_for(message.id, message.context)
777 _merge(message, key, key)
779 if no_fuzzy_matching is False:
780 # do some fuzzy matching with difflib
781 if isinstance(key, tuple):
782 matchkey = key[0] # just the msgid, no context
785 matches = get_close_matches(matchkey.lower().strip(),
786 fuzzy_candidates.keys(), 1)
789 newctxt = fuzzy_candidates[newkey]
790 if newctxt is not None:
791 newkey = newkey, newctxt
792 _merge(message, newkey, key)
795 self[message.id] = message
797 for msgid in remaining:
798 if no_fuzzy_matching or msgid not in fuzzy_matches:
799 self.obsolete[msgid] = remaining[msgid]
801 if update_header_comment:
802 # Allow the updated catalog's header to be rewritten based on the
804 self.header_comment = template.header_comment
806 # Make updated catalog's POT-Creation-Date equal to the template
807 # used to update the catalog
808 self.creation_date = template.creation_date
810 def _key_for(self, id, context=None):
811 """The key for a message is just the singular ID even for pluralizable
812 messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
816 if isinstance(key, (list, tuple)):
818 if context is not None: