1 # -*- coding: utf-8 -*-
6 Reading and writing of files in the ``gettext`` PO (portable object)
9 :copyright: (c) 2013 by the Babel Team.
10 :license: BSD, see LICENSE for more details.
13 from __future__ import print_function
17 from babel.messages.catalog import Catalog, Message
18 from babel.util import wraptext
19 from babel._compat import text_type
23 r"""Reverse `escape` the given string.
25 >>> print(unescape('"Say:\\n \\"hello, world!\\"\\n"'))
30 :param string: the string to unescape
32 def replace_escapes(match):
42 return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
45 def denormalize(string):
46 r"""Reverse the normalization done by the `normalize` function.
48 >>> print(denormalize(r'''""
50 ... " \"hello, world!\"\n"'''))
55 >>> print(denormalize(r'''""
57 ... " \"Lorem ipsum dolor sit "
58 ... "amet, consectetur adipisicing"
59 ... " elit, \"\n"'''))
61 "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
64 :param string: the string to denormalize
67 escaped_lines = string.splitlines()
68 if string.startswith('""'):
69 escaped_lines = escaped_lines[1:]
70 lines = map(unescape, escaped_lines)
73 return unescape(string)
76 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
77 """Read messages from a ``gettext`` PO (portable object) file from the given
78 file-like object and return a `Catalog`.
80 >>> from datetime import datetime
81 >>> from babel._compat import StringIO
82 >>> buf = StringIO('''
84 ... #, fuzzy, python-format
85 ... msgid "foo %(name)s"
86 ... msgstr "quux %(name)s"
89 ... #. An auto comment
92 ... msgid_plural "baz"
96 >>> catalog = read_po(buf)
97 >>> catalog.revision_date = datetime(2007, 4, 1)
99 >>> for message in catalog:
101 ... print((message.id, message.string))
102 ... print(' ', (message.locations, sorted(list(message.flags))))
103 ... print(' ', (message.user_comments, message.auto_comments))
104 (u'foo %(name)s', u'quux %(name)s')
105 ([(u'main.py', 1)], [u'fuzzy', u'python-format'])
107 ((u'bar', u'baz'), (u'bar', u'baaz'))
108 ([(u'main.py', 3)], [])
109 ([u'A user comment'], [u'An auto comment'])
111 .. versionadded:: 1.0
112 Added support for explicit charset argument.
114 :param fileobj: the file-like object to read the PO file from
115 :param locale: the locale identifier or `Locale` object, or `None`
116 if the catalog is not bound to a locale (which basically
117 means it's a template)
118 :param domain: the message domain
119 :param ignore_obsolete: whether to ignore obsolete messages in the input
120 :param charset: the character set of the catalog.
122 catalog = Catalog(locale=locale, domain=domain, charset=charset)
140 if len(messages) > 1:
141 msgid = tuple([denormalize(m) for m in messages])
143 msgid = denormalize(messages[0])
144 if isinstance(msgid, (list, tuple)):
146 for idx in range(catalog.num_plurals):
148 string.append(translations[idx])
150 string.append((idx, ''))
151 string = tuple([denormalize(t[1]) for t in string])
153 string = denormalize(translations[0][1])
155 msgctxt = denormalize('\n'.join(context))
158 message = Message(msgid, string, list(locations), set(flags),
159 auto_comments, user_comments, lineno=offset[0] + 1,
162 if not ignore_obsolete:
163 catalog.obsolete[msgid] = message
165 catalog[msgid] = message
176 def _process_message_line(lineno, line):
177 if line.startswith('msgid_plural'):
179 msg = line[12:].lstrip()
181 elif line.startswith('msgid'):
184 txt = line[5:].lstrip()
188 elif line.startswith('msgstr'):
191 msg = line[6:].lstrip()
192 if msg.startswith('['):
193 idx, msg = msg[1:].split(']', 1)
194 translations.append([int(idx), msg.lstrip()])
196 translations.append([0, msg])
197 elif line.startswith('msgctxt'):
200 in_msgid[0] = in_msgstr[0] = False
201 context.append(line[7:].lstrip())
202 elif line.startswith('"'):
204 messages[-1] += u'\n' + line.rstrip()
206 translations[-1][1] += u'\n' + line.rstrip()
208 context.append(line.rstrip())
210 for lineno, line in enumerate(fileobj.readlines()):
212 if not isinstance(line, text_type):
213 line = line.decode(catalog.charset)
214 if line.startswith('#'):
215 in_msgid[0] = in_msgstr[0] = False
216 if messages and translations:
218 if line[1:].startswith(':'):
219 for location in line[2:].lstrip().split():
220 pos = location.rfind(':')
223 lineno = int(location[pos + 1:])
226 locations.append((location[:pos], lineno))
228 locations.append((location, None))
229 elif line[1:].startswith(','):
230 for flag in line[2:].lstrip().split(','):
231 flags.append(flag.strip())
232 elif line[1:].startswith('~'):
234 _process_message_line(lineno, line[2:].lstrip())
235 elif line[1:].startswith('.'):
236 # These are called auto-comments
237 comment = line[2:].strip()
238 if comment: # Just check that we're not adding empty comments
239 auto_comments.append(comment)
241 # These are called user comments
242 user_comments.append(line[1:].strip())
244 _process_message_line(lineno, line)
249 # No actual messages found, but there was some info in comments, from which
250 # we'll construct an empty header message
251 elif not counter[0] and (flags or user_comments or auto_comments):
253 translations.append([0, u''])
259 WORD_SEP = re.compile('('
260 r'\s+|' # any whitespace
261 r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|' # hyphenated words
262 r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)' # em-dash
267 r"""Escape the given string so that it can be included in double-quoted
268 strings in ``PO`` files.
273 '"Say:\\n \\"hello, world!\\"\\n"'
275 :param string: the string to escape
277 return '"%s"' % string.replace('\\', '\\\\') \
278 .replace('\t', '\\t') \
279 .replace('\r', '\\r') \
280 .replace('\n', '\\n') \
281 .replace('\"', '\\"')
284 def normalize(string, prefix='', width=76):
285 r"""Convert a string into a format that is appropriate for .po files.
287 >>> print(normalize('''Say:
289 ... ''', width=None))
292 " \"hello, world!\"\n"
294 >>> print(normalize('''Say:
295 ... "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
299 " \"Lorem ipsum dolor sit "
300 "amet, consectetur adipisicing"
303 :param string: the string to normalize
304 :param prefix: a string that should be prepended to every line
305 :param width: the maximum line width; use `None`, 0, or a negative number
306 to completely disable line wrapping
308 if width and width > 0:
309 prefixlen = len(prefix)
311 for line in string.splitlines(True):
312 if len(escape(line)) + prefixlen > width:
313 chunks = WORD_SEP.split(line)
319 l = len(escape(chunks[-1])) - 2 + prefixlen
321 buf.append(chunks.pop())
325 # handle long chunks by putting them on a
327 buf.append(chunks.pop())
329 lines.append(u''.join(buf))
333 lines = string.splitlines(True)
336 return escape(string)
338 # Remove empty trailing line
339 if lines and not lines[-1]:
342 return u'""\n' + u'\n'.join([(prefix + escape(line)) for line in lines])
345 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
346 sort_output=False, sort_by_file=False, ignore_obsolete=False,
347 include_previous=False):
348 r"""Write a ``gettext`` PO (portable object) template file for a given
349 message catalog to the provided file-like object.
351 >>> catalog = Catalog()
352 >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
353 ... flags=('fuzzy',))
355 >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
357 >>> from babel._compat import BytesIO
359 >>> write_po(buf, catalog, omit_header=True)
360 >>> print(buf.getvalue().decode("utf8"))
362 #, fuzzy, python-format
374 :param fileobj: the file-like object to write to
375 :param catalog: the `Catalog` instance
376 :param width: the maximum line width for the generated output; use `None`,
377 0, or a negative number to completely disable line wrapping
378 :param no_location: do not emit a location comment for every message
379 :param omit_header: do not include the ``msgid ""`` entry at the top of the
381 :param sort_output: whether to sort the messages in the output by msgid
382 :param sort_by_file: whether to sort the messages in the output by their
384 :param ignore_obsolete: whether to ignore obsolete messages and not include
385 them in the output; by default they are included as
387 :param include_previous: include the old msgid as a comment when
390 def _normalize(key, prefix=''):
391 return normalize(key, prefix=prefix, width=width)
394 if isinstance(text, text_type):
395 text = text.encode(catalog.charset, 'backslashreplace')
398 def _write_comment(comment, prefix=''):
399 # xgettext always wraps comments even if --no-wrap is passed;
400 # provide the same behaviour
401 if width and width > 0:
405 for line in wraptext(comment, _width):
406 _write('#%s %s\n' % (prefix, line.strip()))
408 def _write_message(message, prefix=''):
409 if isinstance(message.id, (list, tuple)):
411 _write('%smsgctxt %s\n' % (prefix,
412 _normalize(message.context, prefix)))
413 _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
414 _write('%smsgid_plural %s\n' % (
415 prefix, _normalize(message.id[1], prefix)
418 for idx in range(catalog.num_plurals):
420 string = message.string[idx]
423 _write('%smsgstr[%d] %s\n' % (
424 prefix, idx, _normalize(string, prefix)
428 _write('%smsgctxt %s\n' % (prefix,
429 _normalize(message.context, prefix)))
430 _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
431 _write('%smsgstr %s\n' % (
432 prefix, _normalize(message.string or '', prefix)
441 for message in _sort_messages(catalog, sort_by=sort_by):
442 if not message.id: # This is the header "message"
445 comment_header = catalog.header_comment
446 if width and width > 0:
448 for line in comment_header.splitlines():
449 lines += wraptext(line, width=width,
450 subsequent_indent='# ')
451 comment_header = u'\n'.join(lines)
452 _write(comment_header + u'\n')
454 for comment in message.user_comments:
455 _write_comment(comment)
456 for comment in message.auto_comments:
457 _write_comment(comment, prefix='.')
461 for filename, lineno in sorted(message.locations):
463 locs.append(u'%s:%d' % (filename.replace(os.sep, '/'), lineno))
465 locs.append(u'%s' % filename.replace(os.sep, '/'))
466 _write_comment(' '.join(locs), prefix=':')
468 _write('#%s\n' % ', '.join([''] + sorted(message.flags)))
470 if message.previous_id and include_previous:
471 _write_comment('msgid %s' % _normalize(message.previous_id[0]),
473 if len(message.previous_id) > 1:
474 _write_comment('msgid_plural %s' % _normalize(
475 message.previous_id[1]
478 _write_message(message)
481 if not ignore_obsolete:
482 for message in _sort_messages(
483 catalog.obsolete.values(),
486 for comment in message.user_comments:
487 _write_comment(comment)
488 _write_message(message, prefix='#~ ')
492 def _sort_messages(messages, sort_by):
494 Sort the given message iterable by the given criteria.
496 Always returns a list.
498 :param messages: An iterable of Messages.
499 :param sort_by: Sort by which criteria? Options are `message` and `location`.
500 :return: list[Message]
502 messages = list(messages)
503 if sort_by == "message":
505 elif sort_by == "location":
506 messages.sort(key=lambda m: m.locations)