1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""The 'grit android2grd' tool."""
7
8
9import getopt
10import os.path
11import StringIO
12from xml.dom import Node
13import xml.dom.minidom
14
15import grit.node.empty
16from grit.node import io
17from grit.node import message
18
19from grit.tool import interface
20
21from grit import grd_reader
22from grit import lazy_re
23from grit import tclib
24from grit import util
25
26
27# The name of a string in strings.xml
28_STRING_NAME = lazy_re.compile(r'[a-z0-9_]+\Z')
29
30# A string's character limit in strings.xml
31_CHAR_LIMIT = lazy_re.compile(r'\[CHAR-LIMIT=(\d+)\]')
32
33# Finds String.Format() style format specifiers such as "%-5.2f".
34_FORMAT_SPECIFIER = lazy_re.compile(
35  '%'
36  '([1-9][0-9]*\$|<)?'            # argument_index
37  '([-#+ 0,(]*)'                  # flags
38  '([0-9]+)?'                     # width
39  '(\.[0-9]+)?'                   # precision
40  '([bBhHsScCdoxXeEfgGaAtT%n])')  # conversion
41
42
43class Android2Grd(interface.Tool):
44  """Tool for converting Android string.xml files into chrome Grd files.
45
46Usage: grit [global options] android2grd [OPTIONS] STRINGS_XML
47
48The Android2Grd tool will convert an Android strings.xml file (whose path is
49specified by STRINGS_XML) and create a chrome style grd file containing the
50relevant information.
51
52Because grd documents are much richer than strings.xml documents we supplement
53the information required by grds using OPTIONS with sensible defaults.
54
55OPTIONS may be any of the following:
56
57    --name       FILENAME    Specify the base FILENAME. This should be without
58                             any file type suffix. By default
59                             "chrome_android_strings" will be used.
60
61    --languages  LANGUAGES   Comma separated list of ISO language codes (e.g.
62                             en-US, en-GB, ru, zh-CN). These codes will be used
63                             to determine the names of resource and translations
64                             files that will be declared by the output grd file.
65
66    --grd-dir    GRD_DIR     Specify where the resultant grd file
67                             (FILENAME.grd) should be output. By default this
68                             will be the present working directory.
69
70    --header-dir HEADER_DIR  Specify the location of the directory where grit
71                             generated C++ headers (whose name will be
72                             FILENAME.h) will be placed. Use an empty string to
73                             disable rc generation. Default: empty.
74
75    --rc-dir     RC_DIR      Specify the directory where resource files will
76                             be located relative to grit build's output
77                             directory. Use an empty string to disable rc
78                             generation. Default: empty.
79
80    --xml-dir    XML_DIR     Specify where to place localized strings.xml files
81                             relative to grit build's output directory. For each
82                             language xx a values-xx/strings.xml file will be
83                             generated. Use an empty string to disable
84                             strings.xml generation. Default: '.'.
85
86    --xtb-dir    XTB_DIR     Specify where the xtb files containing translations
87                             will be located relative to the grd file. Default:
88                             '.'.
89"""
90
91  _NAME_FLAG = 'name'
92  _LANGUAGES_FLAG = 'languages'
93  _GRD_DIR_FLAG = 'grd-dir'
94  _RC_DIR_FLAG = 'rc-dir'
95  _HEADER_DIR_FLAG = 'header-dir'
96  _XTB_DIR_FLAG = 'xtb-dir'
97  _XML_DIR_FLAG = 'xml-dir'
98
99  def __init__(self):
100    self.name = 'chrome_android_strings'
101    self.languages = []
102    self.grd_dir = '.'
103    self.rc_dir = None
104    self.xtb_dir = '.'
105    self.xml_res_dir = '.'
106    self.header_dir = None
107
108  def ShortDescription(self):
109    """Returns a short description of the Android2Grd tool.
110
111    Overridden from grit.interface.Tool
112
113    Returns:
114      A string containing a short description of the android2grd tool.
115    """
116    return 'Converts Android string.xml files into Chrome grd files.'
117
118  def ParseOptions(self, args):
119    """Set this objects and return all non-option arguments."""
120    flags = [
121        Android2Grd._NAME_FLAG,
122        Android2Grd._LANGUAGES_FLAG,
123        Android2Grd._GRD_DIR_FLAG,
124        Android2Grd._RC_DIR_FLAG,
125        Android2Grd._HEADER_DIR_FLAG,
126        Android2Grd._XTB_DIR_FLAG,
127        Android2Grd._XML_DIR_FLAG, ]
128    (opts, args) = getopt.getopt(args, None, ['%s=' % o for o in flags])
129
130    for key, val in opts:
131      # Get rid of the preceding hypens.
132      k = key[2:]
133      if k == Android2Grd._NAME_FLAG:
134        self.name = val
135      elif k == Android2Grd._LANGUAGES_FLAG:
136        self.languages = val.split(',')
137      elif k == Android2Grd._GRD_DIR_FLAG:
138        self.grd_dir = val
139      elif k == Android2Grd._RC_DIR_FLAG:
140        self.rc_dir = val
141      elif k == Android2Grd._HEADER_DIR_FLAG:
142        self.header_dir = val
143      elif k == Android2Grd._XTB_DIR_FLAG:
144        self.xtb_dir = val
145      elif k == Android2Grd._XML_DIR_FLAG:
146        self.xml_res_dir = val
147    return args
148
149  def Run(self, opts, args):
150    """Runs the Android2Grd tool.
151
152    Inherited from grit.interface.Tool.
153
154    Args:
155      opts: List of string arguments that should be parsed.
156      args: String containing the path of the strings.xml file to be converted.
157    """
158    args = self.ParseOptions(args)
159    if len(args) != 1:
160      print ('Tool requires one argument, the path to the Android '
161             'strings.xml resource file to be converted.')
162      return 2
163    self.SetOptions(opts)
164
165    android_path = args[0]
166
167    # Read and parse the Android strings.xml file.
168    with open(android_path) as android_file:
169      android_dom = xml.dom.minidom.parse(android_file)
170
171    # Do the hard work -- convert the Android dom to grd file contents.
172    grd_dom = self.AndroidDomToGrdDom(android_dom)
173    grd_string = unicode(grd_dom)
174
175    # Write the grd string to a file in grd_dir.
176    grd_filename = self.name + '.grd'
177    grd_path = os.path.join(self.grd_dir, grd_filename)
178    with open(grd_path, 'w') as grd_file:
179      grd_file.write(grd_string)
180
181  def AndroidDomToGrdDom(self, android_dom):
182    """Converts a strings.xml DOM into a DOM representing the contents of
183    a grd file.
184
185    Args:
186      android_dom: A xml.dom.Document containing the contents of the Android
187          string.xml document.
188    Returns:
189      The DOM for the grd xml document produced by converting the Android DOM.
190    """
191
192    # Start with a basic skeleton for the .grd file.
193    root = grd_reader.Parse(StringIO.StringIO(
194      '''<?xml version="1.0" encoding="UTF-8"?>
195         <grit base_dir="." latest_public_release="0"
196             current_release="1" source_lang_id="en">
197           <outputs />
198           <translations />
199           <release allow_pseudo="false" seq="1">
200             <messages fallback_to_english="true" />
201           </release>
202         </grit>'''), dir='.')
203    outputs = root.children[0]
204    translations = root.children[1]
205    messages = root.children[2].children[0]
206    assert (isinstance(messages, grit.node.empty.MessagesNode) and
207            isinstance(translations, grit.node.empty.TranslationsNode) and
208            isinstance(outputs, grit.node.empty.OutputsNode))
209
210    if self.header_dir:
211      cpp_header = self.__CreateCppHeaderOutputNode(outputs, self.header_dir)
212    for lang in self.languages:
213      # Create an output element for each language.
214      if self.rc_dir:
215        self.__CreateRcOutputNode(outputs, lang, self.rc_dir)
216      if self.xml_res_dir:
217        self.__CreateAndroidXmlOutputNode(outputs, lang, self.xml_res_dir)
218      if lang != 'en':
219        self.__CreateFileNode(translations, lang)
220    # Convert all the strings.xml strings into grd messages.
221    self.__CreateMessageNodes(messages, android_dom.documentElement)
222
223    return root
224
225  def __CreateMessageNodes(self, messages, resources):
226    """Creates the <message> elements and adds them as children of <messages>.
227
228    Args:
229      messages: the <messages> element in the strings.xml dom.
230      resources: the <resources> element in the grd dom.
231    """
232    # <string> elements contain the definition of the resource.
233    # The description of a <string> element is contained within the comment
234    # node element immediately preceeding the string element in question.
235    description = ''
236    for child in resources.childNodes:
237      if child.nodeType == Node.COMMENT_NODE:
238        # Remove leading/trailing whitespace; collapse consecutive whitespaces.
239        description = ' '.join(child.data.split())
240      elif child.nodeType == Node.ELEMENT_NODE:
241        if child.tagName != 'string':
242          print 'Warning: ignoring unknown tag <%s>' % child.tagName
243        else:
244          translatable = self.IsTranslatable(child)
245          raw_name = child.getAttribute('name')
246          product = child.getAttribute('product') or None
247          grd_name = self.__FormatName(raw_name, product)
248          # Transform the <string> node contents into a tclib.Message, taking
249          # care to handle whitespace transformations and escaped characters,
250          # and coverting <xliff:g> placeholders into <ph> placeholders.
251          msg = self.CreateTclibMessage(child)
252          msg_node = self.__CreateMessageNode(messages, grd_name, description,
253              msg, translatable)
254          messages.AddChild(msg_node)
255          # Reset the description once a message has been parsed.
256          description = ''
257
258  def __FormatName(self, name, product=None):
259    """Formats the message name.
260
261    Names in the strings.xml files should be lowercase with underscores. In grd
262    files message names should be mostly uppercase with a IDS prefix. We also
263    will annotate names with product information (lowercase) where appropriate.
264
265    Args:
266      name: The message name as found in the string.xml file.
267      product: An optional product annotation.
268
269    Returns:
270      String containing the grd style name that will be used in the translation
271      console.
272    """
273    if not _STRING_NAME.match(name):
274      print 'Error: string name contains illegal characters: %s' % name
275    grd_name = 'IDS_%s' % name.upper()
276    product_suffix = ('_product_%s' % product.lower()) if product else ''
277    return grd_name + product_suffix
278
279  def CreateTclibMessage(self, android_string):
280    """Transforms a <string/> element from strings.xml into a tclib.Message.
281
282    Interprets whitespace, quotes, and escaped characters in the android_string
283    according to Android's formatting and styling rules for strings.  Also
284    converts <xliff:g> placeholders into <ph> placeholders, e.g.:
285
286      <xliff:g id="website" example="google.com">%s</xliff:g>
287        becomes
288      <ph name="website"><ex>google.com</ex>%s</ph>
289
290    Returns:
291      The tclib.Message.
292    """
293    msg = tclib.Message()
294    current_text = ''  # Accumulated text that hasn't yet been added to msg.
295    nodes = android_string.childNodes
296
297    for i, node in enumerate(nodes):
298      # Handle text nodes.
299      if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
300        current_text += node.data
301
302      # Handle <xliff:g> and other tags.
303      elif node.nodeType == Node.ELEMENT_NODE:
304        if node.tagName == 'xliff:g':
305          assert node.hasAttribute('id'), 'missing id: ' + node.data()
306          placeholder_id = node.getAttribute('id')
307          placeholder_text = self.__FormatPlaceholderText(node)
308          placeholder_example = node.getAttribute('example')
309          if not placeholder_example:
310            print ('Info: placeholder does not contain an example: %s' %
311                   node.toxml())
312            placeholder_example = placeholder_id.upper()
313          msg.AppendPlaceholder(tclib.Placeholder(placeholder_id,
314              placeholder_text, placeholder_example))
315        else:
316          print ('Warning: removing tag <%s> which must be inside a '
317                 'placeholder: %s' % (node.tagName, node.toxml()))
318          msg.AppendText(self.__FormatPlaceholderText(node))
319
320      # Handle other nodes.
321      elif node.nodeType != Node.COMMENT_NODE:
322        assert False, 'Unknown node type: %s' % node.nodeType
323
324      is_last_node = (i == len(nodes) - 1)
325      if (current_text and
326          (is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)):
327        # For messages containing just text and comments (no xml tags) Android
328        # strips leading and trailing whitespace.  We mimic that behavior.
329        if not msg.GetContent() and is_last_node:
330          current_text = current_text.strip()
331        msg.AppendText(self.__FormatAndroidString(current_text))
332        current_text = ''
333
334    return msg
335
336  def __FormatAndroidString(self, android_string, inside_placeholder=False):
337    r"""Returns android_string formatted for a .grd file.
338
339      * Collapses consecutive whitespaces, except when inside double-quotes.
340      * Replaces \\, \n, \t, \", \' with \, newline, tab, ", '.
341    """
342    backslash_map = {'\\' : '\\', 'n' : '\n', 't' : '\t', '"' : '"', "'" : "'"}
343    is_quoted_section = False  # True when we're inside double quotes.
344    is_backslash_sequence = False  # True after seeing an unescaped backslash.
345    prev_char = ''
346    output = []
347    for c in android_string:
348      if is_backslash_sequence:
349        # Unescape \\, \n, \t, \", and \'.
350        assert c in backslash_map, 'Illegal escape sequence: \\%s' % c
351        output.append(backslash_map[c])
352        is_backslash_sequence = False
353      elif c == '\\':
354        is_backslash_sequence = True
355      elif c.isspace() and not is_quoted_section:
356        # Turn whitespace into ' ' and collapse consecutive whitespaces.
357        if not prev_char.isspace():
358          output.append(' ')
359      elif c == '"':
360        is_quoted_section = not is_quoted_section
361      else:
362        output.append(c)
363      prev_char = c
364    output = ''.join(output)
365
366    if is_quoted_section:
367      print 'Warning: unbalanced quotes in string: %s' % android_string
368
369    if is_backslash_sequence:
370      print 'Warning: trailing backslash in string: %s' % android_string
371
372    # Check for format specifiers outside of placeholder tags.
373    if not inside_placeholder:
374      format_specifier = _FORMAT_SPECIFIER.search(output)
375      if format_specifier:
376        print ('Warning: format specifiers are not inside a placeholder '
377               '<xliff:g/> tag: %s' % output)
378
379    return output
380
381  def __FormatPlaceholderText(self, placeholder_node):
382    """Returns the text inside of an <xliff:g> placeholder node."""
383    text = []
384    for childNode in placeholder_node.childNodes:
385      if childNode.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
386        text.append(childNode.data)
387      elif childNode.nodeType != Node.COMMENT_NODE:
388        assert False, 'Unknown node type in ' + placeholder_node.toxml()
389    return self.__FormatAndroidString(''.join(text), inside_placeholder=True)
390
391  def __CreateMessageNode(self, messages_node, grd_name, description, msg,
392                          translatable):
393    """Creates and initializes a <message> element.
394
395    Message elements correspond to Android <string> elements in that they
396    declare a string resource along with a programmatic id.
397    """
398    if not description:
399      print 'Warning: no description for %s' % grd_name
400    # Check that we actually fit within the character limit we've specified.
401    match = _CHAR_LIMIT.search(description)
402    if match:
403      char_limit = int(match.group(1))
404      msg_content = msg.GetRealContent()
405      if len(msg_content) > char_limit:
406        print ('Warning: char-limit for %s is %d, but length is %d: %s' %
407               (grd_name, char_limit, len(msg_content), msg_content))
408    return message.MessageNode.Construct(parent=messages_node,
409                                         name=grd_name,
410                                         message=msg,
411                                         desc=description,
412                                         translateable=translatable)
413
414  def __CreateFileNode(self, translations_node, lang):
415    """Creates and initializes the <file> elements.
416
417    File elements provide information on the location of translation files
418    (xtbs)
419    """
420    xtb_file = os.path.normpath(os.path.join(
421        self.xtb_dir, '%s_%s.xtb' % (self.name, lang)))
422    fnode = io.FileNode()
423    fnode.StartParsing(u'file', translations_node)
424    fnode.HandleAttribute('path', xtb_file)
425    fnode.HandleAttribute('lang', lang)
426    fnode.EndParsing()
427    translations_node.AddChild(fnode)
428    return fnode
429
430  def __CreateCppHeaderOutputNode(self, outputs_node, header_dir):
431    """Creates the <output> element corresponding to the generated c header."""
432    header_file_name = os.path.join(header_dir, self.name + '.h')
433    header_node = io.OutputNode()
434    header_node.StartParsing(u'output', outputs_node)
435    header_node.HandleAttribute('filename', header_file_name)
436    header_node.HandleAttribute('type', 'rc_header')
437    emit_node = io.EmitNode()
438    emit_node.StartParsing(u'emit', header_node)
439    emit_node.HandleAttribute('emit_type', 'prepend')
440    emit_node.EndParsing()
441    header_node.AddChild(emit_node)
442    header_node.EndParsing()
443    outputs_node.AddChild(header_node)
444    return header_node
445
446  def __CreateRcOutputNode(self, outputs_node, lang, rc_dir):
447    """Creates the <output> element corresponding to various rc file output."""
448    rc_file_name = self.name + '_' + lang + ".rc"
449    rc_path = os.path.join(rc_dir, rc_file_name)
450    node = io.OutputNode()
451    node.StartParsing(u'output', outputs_node)
452    node.HandleAttribute('filename', rc_path)
453    node.HandleAttribute('lang', lang)
454    node.HandleAttribute('type', 'rc_all')
455    node.EndParsing()
456    outputs_node.AddChild(node)
457    return node
458
459  def __CreateAndroidXmlOutputNode(self, outputs_node, locale, xml_res_dir):
460    """Creates the <output> element corresponding to various rc file output."""
461    # Need to check to see if the locale has a region, e.g. the GB in en-GB.
462    # When a locale has a region Android expects the region to be prefixed
463    # with an 'r'. For example for en-GB Android expects a values-en-rGB
464    # directory.  Also, Android expects nb, tl, in, iw, ji as the language
465    # codes for Norwegian, Tagalog/Filipino, Indonesian, Hebrew, and Yiddish:
466    # http://developer.android.com/reference/java/util/Locale.html
467    if locale == 'es-419':
468      android_locale = 'es-rUS'
469    else:
470      android_lang, dash, region = locale.partition('-')
471      lang_map = {'no': 'nb', 'fil': 'tl', 'id': 'in', 'he': 'iw', 'yi': 'ji'}
472      android_lang = lang_map.get(android_lang, android_lang)
473      android_locale = android_lang + ('-r' + region if region else '')
474    values = 'values-' + android_locale if android_locale != 'en' else 'values'
475    xml_path = os.path.normpath(os.path.join(
476        xml_res_dir, values, 'strings.xml'))
477
478    node = io.OutputNode()
479    node.StartParsing(u'output', outputs_node)
480    node.HandleAttribute('filename', xml_path)
481    node.HandleAttribute('lang', locale)
482    node.HandleAttribute('type', 'android')
483    node.EndParsing()
484    outputs_node.AddChild(node)
485    return node
486
487  def IsTranslatable(self, android_string):
488    """Determines if a <string> element is a candidate for translation.
489
490    A <string> element is by default translatable unless otherwise marked.
491    """
492    if android_string.hasAttribute('translatable'):
493      value = android_string.getAttribute('translatable').lower()
494      if value not in ('true', 'false'):
495        print 'Warning: translatable attribute has invalid value: %s' % value
496      return value == 'true'
497    else:
498      return True
499
500