1#!/usr/bin/env python 2# Copyright (c) 2012 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""The 'grit xmb' tool. 7""" 8 9import getopt 10import os 11 12from xml.sax import saxutils 13 14from grit import grd_reader 15from grit import lazy_re 16from grit import tclib 17from grit import util 18from grit.tool import interface 19 20 21# Used to collapse presentable content to determine if 22# xml:space="preserve" is needed. 23_WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*') 24 25 26# See XmlEscape below. 27_XML_QUOTE_ESCAPES = { 28 u"'": u''', 29 u'"': u'"', 30} 31_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' 32 u'\u0020-\uD7FF\uE000-\uFFFD]') 33 34 35def _XmlEscape(s): 36 """Returns text escaped for XML in a way compatible with Google's 37 internal Translation Console tool. May be used for attributes as 38 well as for contents. 39 """ 40 if not type(s) == unicode: 41 s = unicode(s) 42 result = saxutils.escape(s, _XML_QUOTE_ESCAPES) 43 return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8') 44 45 46def _WriteAttribute(file, name, value): 47 """Writes an XML attribute to the specified file. 48 49 Args: 50 file: file to write to 51 name: name of the attribute 52 value: (unescaped) value of the attribute 53 """ 54 if value: 55 file.write(' %s="%s"' % (name, _XmlEscape(value))) 56 57 58def _WriteMessage(file, message): 59 presentable_content = message.GetPresentableContent() 60 assert (type(presentable_content) == unicode or 61 (len(message.parts) == 1 and 62 type(message.parts[0] == tclib.Placeholder))) 63 preserve_space = presentable_content != _WHITESPACES_REGEX.sub( 64 u' ', presentable_content.strip()) 65 66 file.write('<msg') 67 _WriteAttribute(file, 'desc', message.GetDescription()) 68 _WriteAttribute(file, 'id', message.GetId()) 69 _WriteAttribute(file, 'meaning', message.GetMeaning()) 70 if preserve_space: 71 _WriteAttribute(file, 'xml:space', 'preserve') 72 file.write('>') 73 if not preserve_space: 74 file.write('\n ') 75 76 parts = message.GetContent() 77 for part in parts: 78 if isinstance(part, tclib.Placeholder): 79 file.write('<ph') 80 _WriteAttribute(file, 'name', part.GetPresentation()) 81 file.write('><ex>') 82 file.write(_XmlEscape(part.GetExample())) 83 file.write('</ex>') 84 file.write(_XmlEscape(part.GetOriginal())) 85 file.write('</ph>') 86 else: 87 file.write(_XmlEscape(part)) 88 if not preserve_space: 89 file.write('\n') 90 file.write('</msg>\n') 91 92 93def WriteXmbFile(file, messages): 94 """Writes the given grit.tclib.Message items to the specified open 95 file-like object in the XMB format. 96 """ 97 file.write("""<?xml version="1.0" encoding="UTF-8"?> 98<!DOCTYPE messagebundle [ 99<!ELEMENT messagebundle (msg)*> 100<!ATTLIST messagebundle class CDATA #IMPLIED> 101 102<!ELEMENT msg (#PCDATA|ph|source)*> 103<!ATTLIST msg id CDATA #IMPLIED> 104<!ATTLIST msg seq CDATA #IMPLIED> 105<!ATTLIST msg name CDATA #IMPLIED> 106<!ATTLIST msg desc CDATA #IMPLIED> 107<!ATTLIST msg meaning CDATA #IMPLIED> 108<!ATTLIST msg obsolete (obsolete) #IMPLIED> 109<!ATTLIST msg xml:space (default|preserve) "default"> 110<!ATTLIST msg is_hidden CDATA #IMPLIED> 111 112<!ELEMENT source (#PCDATA)> 113 114<!ELEMENT ph (#PCDATA|ex)*> 115<!ATTLIST ph name CDATA #REQUIRED> 116 117<!ELEMENT ex (#PCDATA)> 118]> 119<messagebundle> 120""") 121 for message in messages: 122 _WriteMessage(file, message) 123 file.write('</messagebundle>') 124 125 126class OutputXmb(interface.Tool): 127 """Outputs all translateable messages in the .grd input file to an 128.xmb file, which is the format used to give source messages to 129Google's internal Translation Console tool. The format could easily 130be used for other systems. 131 132Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH 133 134OUTPUTPATH is the path you want to output the .xmb file to. 135 136The -l option can be used to output only some of the resources to the .xmb file. 137LIMITFILE is the path to a file that is used to limit the items output to the 138xmb file. If the filename extension is .grd, the file must be a .grd file 139and the tool only output the contents of nodes from the input file that also 140exist in the limit file (as compared on the 'name' attribute). Otherwise it must 141contain a list of the IDs that output should be limited to, one ID per line, and 142the tool will only output nodes with 'name' attributes that match one of the 143IDs. 144 145The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB 146file. The "IDs only" file contains the message ID of each message that would 147normally be output to the XMB file, one message ID per line. It is designed for 148use with the 'grit transl2tc' tool's -l option. 149 150Other options: 151 152 -D NAME[=VAL] Specify a C-preprocessor-like define NAME with optional 153 value VAL (defaults to 1) which will be used to control 154 conditional inclusion of resources. 155 156 -E NAME=VALUE Set environment variable NAME to VALUE (within grit). 157 158""" 159 # The different output formats supported by this tool 160 FORMAT_XMB = 0 161 FORMAT_IDS_ONLY = 1 162 163 def __init__(self, defines=None): 164 super(OutputXmb, self).__init__() 165 self.format = self.FORMAT_XMB 166 self.defines = defines or {} 167 168 def ShortDescription(self): 169 return 'Exports all translateable messages into an XMB file.' 170 171 def Run(self, opts, args): 172 self.SetOptions(opts) 173 174 limit_file = None 175 limit_is_grd = False 176 limit_file_dir = None 177 own_opts, args = getopt.getopt(args, 'l:D:ih') 178 for key, val in own_opts: 179 if key == '-l': 180 limit_file = open(val, 'r') 181 limit_file_dir = util.dirname(val) 182 if not len(limit_file_dir): 183 limit_file_dir = '.' 184 limit_is_grd = os.path.splitext(val)[1] == '.grd' 185 elif key == '-i': 186 self.format = self.FORMAT_IDS_ONLY 187 elif key == '-D': 188 name, val = util.ParseDefine(val) 189 self.defines[name] = val 190 elif key == '-E': 191 (env_name, env_value) = val.split('=', 1) 192 os.environ[env_name] = env_value 193 if not len(args) == 1: 194 print ('grit xmb takes exactly one argument, the path to the XMB file ' 195 'to output.') 196 return 2 197 198 xmb_path = args[0] 199 res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose) 200 res_tree.SetOutputLanguage('en') 201 res_tree.SetDefines(self.defines) 202 res_tree.OnlyTheseTranslations([]) 203 res_tree.RunGatherers() 204 205 with open(xmb_path, 'wb') as output_file: 206 self.Process( 207 res_tree, output_file, limit_file, limit_is_grd, limit_file_dir) 208 if limit_file: 209 limit_file.close() 210 print "Wrote %s" % xmb_path 211 212 def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False, 213 dir=None): 214 """Writes a document with the contents of res_tree into output_file, 215 limiting output to the IDs specified in limit_file, which is a GRD file if 216 limit_is_grd is true, otherwise a file with one ID per line. 217 218 The format of the output document depends on this object's format attribute. 219 It can be FORMAT_XMB or FORMAT_IDS_ONLY. 220 221 The FORMAT_IDS_ONLY format causes this function to write just a list 222 of the IDs of all messages that would have been added to the XMB file, one 223 ID per line. 224 225 The FORMAT_XMB format causes this function to output the (default) XMB 226 format. 227 228 Args: 229 res_tree: base.Node() 230 output_file: file open for writing 231 limit_file: None or file open for reading 232 limit_is_grd: True | False 233 dir: Directory of the limit file 234 """ 235 if limit_file: 236 if limit_is_grd: 237 limit_list = [] 238 limit_tree = grd_reader.Parse(limit_file, 239 dir=dir, 240 debug=self.o.extra_verbose) 241 for node in limit_tree: 242 if 'name' in node.attrs: 243 limit_list.append(node.attrs['name']) 244 else: 245 # Not a GRD file, so it's just a file with one ID per line 246 limit_list = [item.strip() for item in limit_file.read().split('\n')] 247 248 ids_already_done = {} 249 messages = [] 250 for node in res_tree: 251 if (limit_file and 252 not ('name' in node.attrs and node.attrs['name'] in limit_list)): 253 continue 254 if not node.IsTranslateable(): 255 continue 256 257 for clique in node.GetCliques(): 258 if not clique.IsTranslateable(): 259 continue 260 if not clique.GetMessage().GetRealContent(): 261 continue 262 263 # Some explanation is in order here. Note that we can have 264 # many messages with the same ID. 265 # 266 # The way we work around this is to maintain a list of cliques 267 # per message ID (in the UberClique) and select the "best" one 268 # (the first one that has a description, or an arbitrary one 269 # if there is no description) for inclusion in the XMB file. 270 # The translations are all going to be the same for messages 271 # with the same ID, although the way we replace placeholders 272 # might be slightly different. 273 id = clique.GetMessage().GetId() 274 if id in ids_already_done: 275 continue 276 ids_already_done[id] = 1 277 278 message = node.UberClique().BestClique(id).GetMessage() 279 messages += [message] 280 281 # Ensure a stable order of messages, to help regression testing. 282 messages.sort(key=lambda x:x.GetId()) 283 284 if self.format == self.FORMAT_IDS_ONLY: 285 # We just print the list of IDs to the output file. 286 for msg in messages: 287 output_file.write(msg.GetId()) 288 output_file.write('\n') 289 else: 290 assert self.format == self.FORMAT_XMB 291 WriteXmbFile(output_file, messages) 292