1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""The 'grit android2grd' tool.""" 7 8 9import getopt 10import os.path 11import StringIO 12from xml.dom import Node 13import xml.dom.minidom 14 15import grit.node.empty 16from grit.node import io 17from grit.node import message 18 19from grit.tool import interface 20 21from grit import grd_reader 22from grit import lazy_re 23from grit import tclib 24from grit import util 25 26 27# The name of a string in strings.xml 28_STRING_NAME = lazy_re.compile(r'[a-z0-9_]+\Z') 29 30# A string's character limit in strings.xml 31_CHAR_LIMIT = lazy_re.compile(r'\[CHAR-LIMIT=(\d+)\]') 32 33# Finds String.Format() style format specifiers such as "%-5.2f". 34_FORMAT_SPECIFIER = lazy_re.compile( 35 '%' 36 '([1-9][0-9]*\$|<)?' # argument_index 37 '([-#+ 0,(]*)' # flags 38 '([0-9]+)?' # width 39 '(\.[0-9]+)?' # precision 40 '([bBhHsScCdoxXeEfgGaAtT%n])') # conversion 41 42 43class Android2Grd(interface.Tool): 44 """Tool for converting Android string.xml files into chrome Grd files. 45 46Usage: grit [global options] android2grd [OPTIONS] STRINGS_XML 47 48The Android2Grd tool will convert an Android strings.xml file (whose path is 49specified by STRINGS_XML) and create a chrome style grd file containing the 50relevant information. 51 52Because grd documents are much richer than strings.xml documents we supplement 53the information required by grds using OPTIONS with sensible defaults. 54 55OPTIONS may be any of the following: 56 57 --name FILENAME Specify the base FILENAME. This should be without 58 any file type suffix. By default 59 "chrome_android_strings" will be used. 60 61 --languages LANGUAGES Comma separated list of ISO language codes (e.g. 62 en-US, en-GB, ru, zh-CN). These codes will be used 63 to determine the names of resource and translations 64 files that will be declared by the output grd file. 65 66 --grd-dir GRD_DIR Specify where the resultant grd file 67 (FILENAME.grd) should be output. By default this 68 will be the present working directory. 69 70 --header-dir HEADER_DIR Specify the location of the directory where grit 71 generated C++ headers (whose name will be 72 FILENAME.h) will be placed. Use an empty string to 73 disable rc generation. Default: empty. 74 75 --rc-dir RC_DIR Specify the directory where resource files will 76 be located relative to grit build's output 77 directory. Use an empty string to disable rc 78 generation. Default: empty. 79 80 --xml-dir XML_DIR Specify where to place localized strings.xml files 81 relative to grit build's output directory. For each 82 language xx a values-xx/strings.xml file will be 83 generated. Use an empty string to disable 84 strings.xml generation. Default: '.'. 85 86 --xtb-dir XTB_DIR Specify where the xtb files containing translations 87 will be located relative to the grd file. Default: 88 '.'. 89""" 90 91 _NAME_FLAG = 'name' 92 _LANGUAGES_FLAG = 'languages' 93 _GRD_DIR_FLAG = 'grd-dir' 94 _RC_DIR_FLAG = 'rc-dir' 95 _HEADER_DIR_FLAG = 'header-dir' 96 _XTB_DIR_FLAG = 'xtb-dir' 97 _XML_DIR_FLAG = 'xml-dir' 98 99 def __init__(self): 100 self.name = 'chrome_android_strings' 101 self.languages = [] 102 self.grd_dir = '.' 103 self.rc_dir = None 104 self.xtb_dir = '.' 105 self.xml_res_dir = '.' 106 self.header_dir = None 107 108 def ShortDescription(self): 109 """Returns a short description of the Android2Grd tool. 110 111 Overridden from grit.interface.Tool 112 113 Returns: 114 A string containing a short description of the android2grd tool. 115 """ 116 return 'Converts Android string.xml files into Chrome grd files.' 117 118 def ParseOptions(self, args): 119 """Set this objects and return all non-option arguments.""" 120 flags = [ 121 Android2Grd._NAME_FLAG, 122 Android2Grd._LANGUAGES_FLAG, 123 Android2Grd._GRD_DIR_FLAG, 124 Android2Grd._RC_DIR_FLAG, 125 Android2Grd._HEADER_DIR_FLAG, 126 Android2Grd._XTB_DIR_FLAG, 127 Android2Grd._XML_DIR_FLAG, ] 128 (opts, args) = getopt.getopt(args, None, ['%s=' % o for o in flags]) 129 130 for key, val in opts: 131 # Get rid of the preceding hypens. 132 k = key[2:] 133 if k == Android2Grd._NAME_FLAG: 134 self.name = val 135 elif k == Android2Grd._LANGUAGES_FLAG: 136 self.languages = val.split(',') 137 elif k == Android2Grd._GRD_DIR_FLAG: 138 self.grd_dir = val 139 elif k == Android2Grd._RC_DIR_FLAG: 140 self.rc_dir = val 141 elif k == Android2Grd._HEADER_DIR_FLAG: 142 self.header_dir = val 143 elif k == Android2Grd._XTB_DIR_FLAG: 144 self.xtb_dir = val 145 elif k == Android2Grd._XML_DIR_FLAG: 146 self.xml_res_dir = val 147 return args 148 149 def Run(self, opts, args): 150 """Runs the Android2Grd tool. 151 152 Inherited from grit.interface.Tool. 153 154 Args: 155 opts: List of string arguments that should be parsed. 156 args: String containing the path of the strings.xml file to be converted. 157 """ 158 args = self.ParseOptions(args) 159 if len(args) != 1: 160 print ('Tool requires one argument, the path to the Android ' 161 'strings.xml resource file to be converted.') 162 return 2 163 self.SetOptions(opts) 164 165 android_path = args[0] 166 167 # Read and parse the Android strings.xml file. 168 with open(android_path) as android_file: 169 android_dom = xml.dom.minidom.parse(android_file) 170 171 # Do the hard work -- convert the Android dom to grd file contents. 172 grd_dom = self.AndroidDomToGrdDom(android_dom) 173 grd_string = unicode(grd_dom) 174 175 # Write the grd string to a file in grd_dir. 176 grd_filename = self.name + '.grd' 177 grd_path = os.path.join(self.grd_dir, grd_filename) 178 with open(grd_path, 'w') as grd_file: 179 grd_file.write(grd_string) 180 181 def AndroidDomToGrdDom(self, android_dom): 182 """Converts a strings.xml DOM into a DOM representing the contents of 183 a grd file. 184 185 Args: 186 android_dom: A xml.dom.Document containing the contents of the Android 187 string.xml document. 188 Returns: 189 The DOM for the grd xml document produced by converting the Android DOM. 190 """ 191 192 # Start with a basic skeleton for the .grd file. 193 root = grd_reader.Parse(StringIO.StringIO( 194 '''<?xml version="1.0" encoding="UTF-8"?> 195 <grit base_dir="." latest_public_release="0" 196 current_release="1" source_lang_id="en"> 197 <outputs /> 198 <translations /> 199 <release allow_pseudo="false" seq="1"> 200 <messages fallback_to_english="true" /> 201 </release> 202 </grit>'''), dir='.') 203 outputs = root.children[0] 204 translations = root.children[1] 205 messages = root.children[2].children[0] 206 assert (isinstance(messages, grit.node.empty.MessagesNode) and 207 isinstance(translations, grit.node.empty.TranslationsNode) and 208 isinstance(outputs, grit.node.empty.OutputsNode)) 209 210 if self.header_dir: 211 cpp_header = self.__CreateCppHeaderOutputNode(outputs, self.header_dir) 212 for lang in self.languages: 213 # Create an output element for each language. 214 if self.rc_dir: 215 self.__CreateRcOutputNode(outputs, lang, self.rc_dir) 216 if self.xml_res_dir: 217 self.__CreateAndroidXmlOutputNode(outputs, lang, self.xml_res_dir) 218 if lang != 'en': 219 self.__CreateFileNode(translations, lang) 220 # Convert all the strings.xml strings into grd messages. 221 self.__CreateMessageNodes(messages, android_dom.documentElement) 222 223 return root 224 225 def __CreateMessageNodes(self, messages, resources): 226 """Creates the <message> elements and adds them as children of <messages>. 227 228 Args: 229 messages: the <messages> element in the strings.xml dom. 230 resources: the <resources> element in the grd dom. 231 """ 232 # <string> elements contain the definition of the resource. 233 # The description of a <string> element is contained within the comment 234 # node element immediately preceeding the string element in question. 235 description = '' 236 for child in resources.childNodes: 237 if child.nodeType == Node.COMMENT_NODE: 238 # Remove leading/trailing whitespace; collapse consecutive whitespaces. 239 description = ' '.join(child.data.split()) 240 elif child.nodeType == Node.ELEMENT_NODE: 241 if child.tagName != 'string': 242 print 'Warning: ignoring unknown tag <%s>' % child.tagName 243 else: 244 translatable = self.IsTranslatable(child) 245 raw_name = child.getAttribute('name') 246 product = child.getAttribute('product') or None 247 grd_name = self.__FormatName(raw_name, product) 248 # Transform the <string> node contents into a tclib.Message, taking 249 # care to handle whitespace transformations and escaped characters, 250 # and coverting <xliff:g> placeholders into <ph> placeholders. 251 msg = self.CreateTclibMessage(child) 252 msg_node = self.__CreateMessageNode(messages, grd_name, description, 253 msg, translatable) 254 messages.AddChild(msg_node) 255 # Reset the description once a message has been parsed. 256 description = '' 257 258 def __FormatName(self, name, product=None): 259 """Formats the message name. 260 261 Names in the strings.xml files should be lowercase with underscores. In grd 262 files message names should be mostly uppercase with a IDS prefix. We also 263 will annotate names with product information (lowercase) where appropriate. 264 265 Args: 266 name: The message name as found in the string.xml file. 267 product: An optional product annotation. 268 269 Returns: 270 String containing the grd style name that will be used in the translation 271 console. 272 """ 273 if not _STRING_NAME.match(name): 274 print 'Error: string name contains illegal characters: %s' % name 275 grd_name = 'IDS_%s' % name.upper() 276 product_suffix = ('_product_%s' % product.lower()) if product else '' 277 return grd_name + product_suffix 278 279 def CreateTclibMessage(self, android_string): 280 """Transforms a <string/> element from strings.xml into a tclib.Message. 281 282 Interprets whitespace, quotes, and escaped characters in the android_string 283 according to Android's formatting and styling rules for strings. Also 284 converts <xliff:g> placeholders into <ph> placeholders, e.g.: 285 286 <xliff:g id="website" example="google.com">%s</xliff:g> 287 becomes 288 <ph name="website"><ex>google.com</ex>%s</ph> 289 290 Returns: 291 The tclib.Message. 292 """ 293 msg = tclib.Message() 294 current_text = '' # Accumulated text that hasn't yet been added to msg. 295 nodes = android_string.childNodes 296 297 for i, node in enumerate(nodes): 298 # Handle text nodes. 299 if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 300 current_text += node.data 301 302 # Handle <xliff:g> and other tags. 303 elif node.nodeType == Node.ELEMENT_NODE: 304 if node.tagName == 'xliff:g': 305 assert node.hasAttribute('id'), 'missing id: ' + node.data() 306 placeholder_id = node.getAttribute('id') 307 placeholder_text = self.__FormatPlaceholderText(node) 308 placeholder_example = node.getAttribute('example') 309 if not placeholder_example: 310 print ('Info: placeholder does not contain an example: %s' % 311 node.toxml()) 312 placeholder_example = placeholder_id.upper() 313 msg.AppendPlaceholder(tclib.Placeholder(placeholder_id, 314 placeholder_text, placeholder_example)) 315 else: 316 print ('Warning: removing tag <%s> which must be inside a ' 317 'placeholder: %s' % (node.tagName, node.toxml())) 318 msg.AppendText(self.__FormatPlaceholderText(node)) 319 320 # Handle other nodes. 321 elif node.nodeType != Node.COMMENT_NODE: 322 assert False, 'Unknown node type: %s' % node.nodeType 323 324 is_last_node = (i == len(nodes) - 1) 325 if (current_text and 326 (is_last_node or nodes[i + 1].nodeType == Node.ELEMENT_NODE)): 327 # For messages containing just text and comments (no xml tags) Android 328 # strips leading and trailing whitespace. We mimic that behavior. 329 if not msg.GetContent() and is_last_node: 330 current_text = current_text.strip() 331 msg.AppendText(self.__FormatAndroidString(current_text)) 332 current_text = '' 333 334 return msg 335 336 def __FormatAndroidString(self, android_string, inside_placeholder=False): 337 r"""Returns android_string formatted for a .grd file. 338 339 * Collapses consecutive whitespaces, except when inside double-quotes. 340 * Replaces \\, \n, \t, \", \' with \, newline, tab, ", '. 341 """ 342 backslash_map = {'\\' : '\\', 'n' : '\n', 't' : '\t', '"' : '"', "'" : "'"} 343 is_quoted_section = False # True when we're inside double quotes. 344 is_backslash_sequence = False # True after seeing an unescaped backslash. 345 prev_char = '' 346 output = [] 347 for c in android_string: 348 if is_backslash_sequence: 349 # Unescape \\, \n, \t, \", and \'. 350 assert c in backslash_map, 'Illegal escape sequence: \\%s' % c 351 output.append(backslash_map[c]) 352 is_backslash_sequence = False 353 elif c == '\\': 354 is_backslash_sequence = True 355 elif c.isspace() and not is_quoted_section: 356 # Turn whitespace into ' ' and collapse consecutive whitespaces. 357 if not prev_char.isspace(): 358 output.append(' ') 359 elif c == '"': 360 is_quoted_section = not is_quoted_section 361 else: 362 output.append(c) 363 prev_char = c 364 output = ''.join(output) 365 366 if is_quoted_section: 367 print 'Warning: unbalanced quotes in string: %s' % android_string 368 369 if is_backslash_sequence: 370 print 'Warning: trailing backslash in string: %s' % android_string 371 372 # Check for format specifiers outside of placeholder tags. 373 if not inside_placeholder: 374 format_specifier = _FORMAT_SPECIFIER.search(output) 375 if format_specifier: 376 print ('Warning: format specifiers are not inside a placeholder ' 377 '<xliff:g/> tag: %s' % output) 378 379 return output 380 381 def __FormatPlaceholderText(self, placeholder_node): 382 """Returns the text inside of an <xliff:g> placeholder node.""" 383 text = [] 384 for childNode in placeholder_node.childNodes: 385 if childNode.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): 386 text.append(childNode.data) 387 elif childNode.nodeType != Node.COMMENT_NODE: 388 assert False, 'Unknown node type in ' + placeholder_node.toxml() 389 return self.__FormatAndroidString(''.join(text), inside_placeholder=True) 390 391 def __CreateMessageNode(self, messages_node, grd_name, description, msg, 392 translatable): 393 """Creates and initializes a <message> element. 394 395 Message elements correspond to Android <string> elements in that they 396 declare a string resource along with a programmatic id. 397 """ 398 if not description: 399 print 'Warning: no description for %s' % grd_name 400 # Check that we actually fit within the character limit we've specified. 401 match = _CHAR_LIMIT.search(description) 402 if match: 403 char_limit = int(match.group(1)) 404 msg_content = msg.GetRealContent() 405 if len(msg_content) > char_limit: 406 print ('Warning: char-limit for %s is %d, but length is %d: %s' % 407 (grd_name, char_limit, len(msg_content), msg_content)) 408 return message.MessageNode.Construct(parent=messages_node, 409 name=grd_name, 410 message=msg, 411 desc=description, 412 translateable=translatable) 413 414 def __CreateFileNode(self, translations_node, lang): 415 """Creates and initializes the <file> elements. 416 417 File elements provide information on the location of translation files 418 (xtbs) 419 """ 420 xtb_file = os.path.normpath(os.path.join( 421 self.xtb_dir, '%s_%s.xtb' % (self.name, lang))) 422 fnode = io.FileNode() 423 fnode.StartParsing(u'file', translations_node) 424 fnode.HandleAttribute('path', xtb_file) 425 fnode.HandleAttribute('lang', lang) 426 fnode.EndParsing() 427 translations_node.AddChild(fnode) 428 return fnode 429 430 def __CreateCppHeaderOutputNode(self, outputs_node, header_dir): 431 """Creates the <output> element corresponding to the generated c header.""" 432 header_file_name = os.path.join(header_dir, self.name + '.h') 433 header_node = io.OutputNode() 434 header_node.StartParsing(u'output', outputs_node) 435 header_node.HandleAttribute('filename', header_file_name) 436 header_node.HandleAttribute('type', 'rc_header') 437 emit_node = io.EmitNode() 438 emit_node.StartParsing(u'emit', header_node) 439 emit_node.HandleAttribute('emit_type', 'prepend') 440 emit_node.EndParsing() 441 header_node.AddChild(emit_node) 442 header_node.EndParsing() 443 outputs_node.AddChild(header_node) 444 return header_node 445 446 def __CreateRcOutputNode(self, outputs_node, lang, rc_dir): 447 """Creates the <output> element corresponding to various rc file output.""" 448 rc_file_name = self.name + '_' + lang + ".rc" 449 rc_path = os.path.join(rc_dir, rc_file_name) 450 node = io.OutputNode() 451 node.StartParsing(u'output', outputs_node) 452 node.HandleAttribute('filename', rc_path) 453 node.HandleAttribute('lang', lang) 454 node.HandleAttribute('type', 'rc_all') 455 node.EndParsing() 456 outputs_node.AddChild(node) 457 return node 458 459 def __CreateAndroidXmlOutputNode(self, outputs_node, locale, xml_res_dir): 460 """Creates the <output> element corresponding to various rc file output.""" 461 # Need to check to see if the locale has a region, e.g. the GB in en-GB. 462 # When a locale has a region Android expects the region to be prefixed 463 # with an 'r'. For example for en-GB Android expects a values-en-rGB 464 # directory. Also, Android expects nb, tl, in, iw, ji as the language 465 # codes for Norwegian, Tagalog/Filipino, Indonesian, Hebrew, and Yiddish: 466 # http://developer.android.com/reference/java/util/Locale.html 467 if locale == 'es-419': 468 android_locale = 'es-rUS' 469 else: 470 android_lang, dash, region = locale.partition('-') 471 lang_map = {'no': 'nb', 'fil': 'tl', 'id': 'in', 'he': 'iw', 'yi': 'ji'} 472 android_lang = lang_map.get(android_lang, android_lang) 473 android_locale = android_lang + ('-r' + region if region else '') 474 values = 'values-' + android_locale if android_locale != 'en' else 'values' 475 xml_path = os.path.normpath(os.path.join( 476 xml_res_dir, values, 'strings.xml')) 477 478 node = io.OutputNode() 479 node.StartParsing(u'output', outputs_node) 480 node.HandleAttribute('filename', xml_path) 481 node.HandleAttribute('lang', locale) 482 node.HandleAttribute('type', 'android') 483 node.EndParsing() 484 outputs_node.AddChild(node) 485 return node 486 487 def IsTranslatable(self, android_string): 488 """Determines if a <string> element is a candidate for translation. 489 490 A <string> element is by default translatable unless otherwise marked. 491 """ 492 if android_string.hasAttribute('translatable'): 493 value = android_string.getAttribute('translatable').lower() 494 if value not in ('true', 'false'): 495 print 'Warning: translatable attribute has invalid value: %s' % value 496 return value == 'true' 497 else: 498 return True 499 500