gerrit.onap Code Review - sdc/sdc-distribution-client.git/blob

   1 # -*- coding: utf-8 -*-
   2 """
   3     babel.messages.pofile
   4     ~~~~~~~~~~~~~~~~~~~~~
   5
   6     Reading and writing of files in the ``gettext`` PO (portable object)
   7     format.
   8
   9     :copyright: (c) 2013 by the Babel Team.
  10     :license: BSD, see LICENSE for more details.
  11 """
  12
  13 from __future__ import print_function
  14 import os
  15 import re
  16
  17 from babel.messages.catalog import Catalog, Message
  18 from babel.util import wraptext
  19 from babel._compat import text_type
  20
  21
  22 def unescape(string):
  23     r"""Reverse `escape` the given string.
  24
  25     >>> print(unescape('"Say:\\n  \\"hello, world!\\"\\n"'))
  26     Say:
  27       "hello, world!"
  28     <BLANKLINE>
  29
  30     :param string: the string to unescape
  31     """
  32     def replace_escapes(match):
  33         m = match.group(1)
  34         if m == 'n':
  35             return '\n'
  36         elif m == 't':
  37             return '\t'
  38         elif m == 'r':
  39             return '\r'
  40         # m is \ or "
  41         return m
  42     return re.compile(r'\\([\\trn"])').sub(replace_escapes, string[1:-1])
  43
  44
  45 def denormalize(string):
  46     r"""Reverse the normalization done by the `normalize` function.
  47
  48     >>> print(denormalize(r'''""
  49     ... "Say:\n"
  50     ... "  \"hello, world!\"\n"'''))
  51     Say:
  52       "hello, world!"
  53     <BLANKLINE>
  54
  55     >>> print(denormalize(r'''""
  56     ... "Say:\n"
  57     ... "  \"Lorem ipsum dolor sit "
  58     ... "amet, consectetur adipisicing"
  59     ... " elit, \"\n"'''))
  60     Say:
  61       "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
  62     <BLANKLINE>
  63
  64     :param string: the string to denormalize
  65     """
  66     if '\n' in string:
  67         escaped_lines = string.splitlines()
  68         if string.startswith('""'):
  69             escaped_lines = escaped_lines[1:]
  70         lines = map(unescape, escaped_lines)
  71         return ''.join(lines)
  72     else:
  73         return unescape(string)
  74
  75
  76 def read_po(fileobj, locale=None, domain=None, ignore_obsolete=False, charset=None):
  77     """Read messages from a ``gettext`` PO (portable object) file from the given
  78     file-like object and return a `Catalog`.
  79
  80     >>> from datetime import datetime
  81     >>> from babel._compat import StringIO
  82     >>> buf = StringIO('''
  83     ... #: main.py:1
  84     ... #, fuzzy, python-format
  85     ... msgid "foo %(name)s"
  86     ... msgstr "quux %(name)s"
  87     ...
  88     ... # A user comment
  89     ... #. An auto comment
  90     ... #: main.py:3
  91     ... msgid "bar"
  92     ... msgid_plural "baz"
  93     ... msgstr[0] "bar"
  94     ... msgstr[1] "baaz"
  95     ... ''')
  96     >>> catalog = read_po(buf)
  97     >>> catalog.revision_date = datetime(2007, 4, 1)
  98
  99     >>> for message in catalog:
 100     ...     if message.id:
 101     ...         print((message.id, message.string))
 102     ...         print(' ', (message.locations, sorted(list(message.flags))))
 103     ...         print(' ', (message.user_comments, message.auto_comments))
 104     (u'foo %(name)s', u'quux %(name)s')
 105       ([(u'main.py', 1)], [u'fuzzy', u'python-format'])
 106       ([], [])
 107     ((u'bar', u'baz'), (u'bar', u'baaz'))
 108       ([(u'main.py', 3)], [])
 109       ([u'A user comment'], [u'An auto comment'])
 110
 111     .. versionadded:: 1.0
 112        Added support for explicit charset argument.
 113
 114     :param fileobj: the file-like object to read the PO file from
 115     :param locale: the locale identifier or `Locale` object, or `None`
 116                    if the catalog is not bound to a locale (which basically
 117                    means it's a template)
 118     :param domain: the message domain
 119     :param ignore_obsolete: whether to ignore obsolete messages in the input
 120     :param charset: the character set of the catalog.
 121     """
 122     catalog = Catalog(locale=locale, domain=domain, charset=charset)
 123
 124     counter = [0]
 125     offset = [0]
 126     messages = []
 127     translations = []
 128     locations = []
 129     flags = []
 130     user_comments = []
 131     auto_comments = []
 132     obsolete = [False]
 133     context = []
 134     in_msgid = [False]
 135     in_msgstr = [False]
 136     in_msgctxt = [False]
 137
 138     def _add_message():
 139         translations.sort()
 140         if len(messages) > 1:
 141             msgid = tuple([denormalize(m) for m in messages])
 142         else:
 143             msgid = denormalize(messages[0])
 144         if isinstance(msgid, (list, tuple)):
 145             string = []
 146             for idx in range(catalog.num_plurals):
 147                 try:
 148                     string.append(translations[idx])
 149                 except IndexError:
 150                     string.append((idx, ''))
 151             string = tuple([denormalize(t[1]) for t in string])
 152         else:
 153             string = denormalize(translations[0][1])
 154         if context:
 155             msgctxt = denormalize('\n'.join(context))
 156         else:
 157             msgctxt = None
 158         message = Message(msgid, string, list(locations), set(flags),
 159                           auto_comments, user_comments, lineno=offset[0] + 1,
 160                           context=msgctxt)
 161         if obsolete[0]:
 162             if not ignore_obsolete:
 163                 catalog.obsolete[msgid] = message
 164         else:
 165             catalog[msgid] = message
 166         del messages[:]
 167         del translations[:]
 168         del context[:]
 169         del locations[:]
 170         del flags[:]
 171         del auto_comments[:]
 172         del user_comments[:]
 173         obsolete[0] = False
 174         counter[0] += 1
 175
 176     def _process_message_line(lineno, line):
 177         if line.startswith('msgid_plural'):
 178             in_msgid[0] = True
 179             msg = line[12:].lstrip()
 180             messages.append(msg)
 181         elif line.startswith('msgid'):
 182             in_msgid[0] = True
 183             offset[0] = lineno
 184             txt = line[5:].lstrip()
 185             if messages:
 186                 _add_message()
 187             messages.append(txt)
 188         elif line.startswith('msgstr'):
 189             in_msgid[0] = False
 190             in_msgstr[0] = True
 191             msg = line[6:].lstrip()
 192             if msg.startswith('['):
 193                 idx, msg = msg[1:].split(']', 1)
 194                 translations.append([int(idx), msg.lstrip()])
 195             else:
 196                 translations.append([0, msg])
 197         elif line.startswith('msgctxt'):
 198             if messages:
 199                 _add_message()
 200             in_msgid[0] = in_msgstr[0] = False
 201             context.append(line[7:].lstrip())
 202         elif line.startswith('"'):
 203             if in_msgid[0]:
 204                 messages[-1] += u'\n' + line.rstrip()
 205             elif in_msgstr[0]:
 206                 translations[-1][1] += u'\n' + line.rstrip()
 207             elif in_msgctxt[0]:
 208                 context.append(line.rstrip())
 209
 210     for lineno, line in enumerate(fileobj.readlines()):
 211         line = line.strip()
 212         if not isinstance(line, text_type):
 213             line = line.decode(catalog.charset)
 214         if line.startswith('#'):
 215             in_msgid[0] = in_msgstr[0] = False
 216             if messages and translations:
 217                 _add_message()
 218             if line[1:].startswith(':'):
 219                 for location in line[2:].lstrip().split():
 220                     pos = location.rfind(':')
 221                     if pos >= 0:
 222                         try:
 223                             lineno = int(location[pos + 1:])
 224                         except ValueError:
 225                             continue
 226                         locations.append((location[:pos], lineno))
 227                     else:
 228                         locations.append((location, None))
 229             elif line[1:].startswith(','):
 230                 for flag in line[2:].lstrip().split(','):
 231                     flags.append(flag.strip())
 232             elif line[1:].startswith('~'):
 233                 obsolete[0] = True
 234                 _process_message_line(lineno, line[2:].lstrip())
 235             elif line[1:].startswith('.'):
 236                 # These are called auto-comments
 237                 comment = line[2:].strip()
 238                 if comment:  # Just check that we're not adding empty comments
 239                     auto_comments.append(comment)
 240             else:
 241                 # These are called user comments
 242                 user_comments.append(line[1:].strip())
 243         else:
 244             _process_message_line(lineno, line)
 245
 246     if messages:
 247         _add_message()
 248
 249     # No actual messages found, but there was some info in comments, from which
 250     # we'll construct an empty header message
 251     elif not counter[0] and (flags or user_comments or auto_comments):
 252         messages.append(u'')
 253         translations.append([0, u''])
 254         _add_message()
 255
 256     return catalog
 257
 258
 259 WORD_SEP = re.compile('('
 260                       r'\s+|'                                 # any whitespace
 261                       r'[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|'  # hyphenated words
 262                       r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w)'   # em-dash
 263                       ')')
 264
 265
 266 def escape(string):
 267     r"""Escape the given string so that it can be included in double-quoted
 268     strings in ``PO`` files.
 269
 270     >>> escape('''Say:
 271     ...   "hello, world!"
 272     ... ''')
 273     '"Say:\\n  \\"hello, world!\\"\\n"'
 274
 275     :param string: the string to escape
 276     """
 277     return '"%s"' % string.replace('\\', '\\\\') \
 278                           .replace('\t', '\\t') \
 279                           .replace('\r', '\\r') \
 280                           .replace('\n', '\\n') \
 281                           .replace('\"', '\\"')
 282
 283
 284 def normalize(string, prefix='', width=76):
 285     r"""Convert a string into a format that is appropriate for .po files.
 286
 287     >>> print(normalize('''Say:
 288     ...   "hello, world!"
 289     ... ''', width=None))
 290     ""
 291     "Say:\n"
 292     "  \"hello, world!\"\n"
 293
 294     >>> print(normalize('''Say:
 295     ...   "Lorem ipsum dolor sit amet, consectetur adipisicing elit, "
 296     ... ''', width=32))
 297     ""
 298     "Say:\n"
 299     "  \"Lorem ipsum dolor sit "
 300     "amet, consectetur adipisicing"
 301     " elit, \"\n"
 302
 303     :param string: the string to normalize
 304     :param prefix: a string that should be prepended to every line
 305     :param width: the maximum line width; use `None`, 0, or a negative number
 306                   to completely disable line wrapping
 307     """
 308     if width and width > 0:
 309         prefixlen = len(prefix)
 310         lines = []
 311         for line in string.splitlines(True):
 312             if len(escape(line)) + prefixlen > width:
 313                 chunks = WORD_SEP.split(line)
 314                 chunks.reverse()
 315                 while chunks:
 316                     buf = []
 317                     size = 2
 318                     while chunks:
 319                         l = len(escape(chunks[-1])) - 2 + prefixlen
 320                         if size + l < width:
 321                             buf.append(chunks.pop())
 322                             size += l
 323                         else:
 324                             if not buf:
 325                                 # handle long chunks by putting them on a
 326                                 # separate line
 327                                 buf.append(chunks.pop())
 328                             break
 329                     lines.append(u''.join(buf))
 330             else:
 331                 lines.append(line)
 332     else:
 333         lines = string.splitlines(True)
 334
 335     if len(lines) <= 1:
 336         return escape(string)
 337
 338     # Remove empty trailing line
 339     if lines and not lines[-1]:
 340         del lines[-1]
 341         lines[-1] += '\n'
 342     return u'""\n' + u'\n'.join([(prefix + escape(line)) for line in lines])
 343
 344
 345 def write_po(fileobj, catalog, width=76, no_location=False, omit_header=False,
 346              sort_output=False, sort_by_file=False, ignore_obsolete=False,
 347              include_previous=False):
 348     r"""Write a ``gettext`` PO (portable object) template file for a given
 349     message catalog to the provided file-like object.
 350
 351     >>> catalog = Catalog()
 352     >>> catalog.add(u'foo %(name)s', locations=[('main.py', 1)],
 353     ...             flags=('fuzzy',))
 354     <Message...>
 355     >>> catalog.add((u'bar', u'baz'), locations=[('main.py', 3)])
 356     <Message...>
 357     >>> from babel._compat import BytesIO
 358     >>> buf = BytesIO()
 359     >>> write_po(buf, catalog, omit_header=True)
 360     >>> print(buf.getvalue().decode("utf8"))
 361     #: main.py:1
 362     #, fuzzy, python-format
 363     msgid "foo %(name)s"
 364     msgstr ""
 365     <BLANKLINE>
 366     #: main.py:3
 367     msgid "bar"
 368     msgid_plural "baz"
 369     msgstr[0] ""
 370     msgstr[1] ""
 371     <BLANKLINE>
 372     <BLANKLINE>
 373
 374     :param fileobj: the file-like object to write to
 375     :param catalog: the `Catalog` instance
 376     :param width: the maximum line width for the generated output; use `None`,
 377                   0, or a negative number to completely disable line wrapping
 378     :param no_location: do not emit a location comment for every message
 379     :param omit_header: do not include the ``msgid ""`` entry at the top of the
 380                         output
 381     :param sort_output: whether to sort the messages in the output by msgid
 382     :param sort_by_file: whether to sort the messages in the output by their
 383                          locations
 384     :param ignore_obsolete: whether to ignore obsolete messages and not include
 385                             them in the output; by default they are included as
 386                             comments
 387     :param include_previous: include the old msgid as a comment when
 388                              updating the catalog
 389     """
 390     def _normalize(key, prefix=''):
 391         return normalize(key, prefix=prefix, width=width)
 392
 393     def _write(text):
 394         if isinstance(text, text_type):
 395             text = text.encode(catalog.charset, 'backslashreplace')
 396         fileobj.write(text)
 397
 398     def _write_comment(comment, prefix=''):
 399         # xgettext always wraps comments even if --no-wrap is passed;
 400         # provide the same behaviour
 401         if width and width > 0:
 402             _width = width
 403         else:
 404             _width = 76
 405         for line in wraptext(comment, _width):
 406             _write('#%s %s\n' % (prefix, line.strip()))
 407
 408     def _write_message(message, prefix=''):
 409         if isinstance(message.id, (list, tuple)):
 410             if message.context:
 411                 _write('%smsgctxt %s\n' % (prefix,
 412                                            _normalize(message.context, prefix)))
 413             _write('%smsgid %s\n' % (prefix, _normalize(message.id[0], prefix)))
 414             _write('%smsgid_plural %s\n' % (
 415                 prefix, _normalize(message.id[1], prefix)
 416             ))
 417
 418             for idx in range(catalog.num_plurals):
 419                 try:
 420                     string = message.string[idx]
 421                 except IndexError:
 422                     string = ''
 423                 _write('%smsgstr[%d] %s\n' % (
 424                     prefix, idx, _normalize(string, prefix)
 425                 ))
 426         else:
 427             if message.context:
 428                 _write('%smsgctxt %s\n' % (prefix,
 429                                            _normalize(message.context, prefix)))
 430             _write('%smsgid %s\n' % (prefix, _normalize(message.id, prefix)))
 431             _write('%smsgstr %s\n' % (
 432                 prefix, _normalize(message.string or '', prefix)
 433             ))
 434
 435     sort_by = None
 436     if sort_output:
 437         sort_by = "message"
 438     elif sort_by_file:
 439         sort_by = "location"
 440
 441     for message in _sort_messages(catalog, sort_by=sort_by):
 442         if not message.id:  # This is the header "message"
 443             if omit_header:
 444                 continue
 445             comment_header = catalog.header_comment
 446             if width and width > 0:
 447                 lines = []
 448                 for line in comment_header.splitlines():
 449                     lines += wraptext(line, width=width,
 450                                       subsequent_indent='# ')
 451                 comment_header = u'\n'.join(lines)
 452             _write(comment_header + u'\n')
 453
 454         for comment in message.user_comments:
 455             _write_comment(comment)
 456         for comment in message.auto_comments:
 457             _write_comment(comment, prefix='.')
 458
 459         if not no_location:
 460             locs = []
 461             for filename, lineno in sorted(message.locations):
 462                 if lineno:
 463                     locs.append(u'%s:%d' % (filename.replace(os.sep, '/'), lineno))
 464                 else:
 465                     locs.append(u'%s' % filename.replace(os.sep, '/'))
 466             _write_comment(' '.join(locs), prefix=':')
 467         if message.flags:
 468             _write('#%s\n' % ', '.join([''] + sorted(message.flags)))
 469
 470         if message.previous_id and include_previous:
 471             _write_comment('msgid %s' % _normalize(message.previous_id[0]),
 472                            prefix='|')
 473             if len(message.previous_id) > 1:
 474                 _write_comment('msgid_plural %s' % _normalize(
 475                     message.previous_id[1]
 476                 ), prefix='|')
 477
 478         _write_message(message)
 479         _write('\n')
 480
 481     if not ignore_obsolete:
 482         for message in _sort_messages(
 483             catalog.obsolete.values(),
 484             sort_by=sort_by
 485         ):
 486             for comment in message.user_comments:
 487                 _write_comment(comment)
 488             _write_message(message, prefix='#~ ')
 489             _write('\n')
 490
 491
 492 def _sort_messages(messages, sort_by):
 493     """
 494     Sort the given message iterable by the given criteria.
 495
 496     Always returns a list.
 497
 498     :param messages: An iterable of Messages.
 499     :param sort_by: Sort by which criteria? Options are `message` and `location`.
 500     :return: list[Message]
 501     """
 502     messages = list(messages)
 503     if sort_by == "message":
 504         messages.sort()
 505     elif sort_by == "location":
 506         messages.sort(key=lambda m: m.locations)
 507     return messages