1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""The 'grit xmb' tool.
7"""
8
9import getopt
10import os
11
12from xml.sax import saxutils
13
14from grit import grd_reader
15from grit import lazy_re
16from grit import tclib
17from grit import util
18from grit.tool import interface
19
20
21# Used to collapse presentable content to determine if
22# xml:space="preserve" is needed.
23_WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*')
24
25
26# See XmlEscape below.
27_XML_QUOTE_ESCAPES = {
28    u"'":  u''',
29    u'"':  u'"',
30}
31_XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D'
32                                      u'\u0020-\uD7FF\uE000-\uFFFD]')
33
34
35def _XmlEscape(s):
36  """Returns text escaped for XML in a way compatible with Google's
37  internal Translation Console tool.  May be used for attributes as
38  well as for contents.
39  """
40  if not type(s) == unicode:
41    s = unicode(s)
42  result = saxutils.escape(s, _XML_QUOTE_ESCAPES)
43  return _XML_BAD_CHAR_REGEX.sub(u'', result).encode('utf-8')
44
45
46def _WriteAttribute(file, name, value):
47  """Writes an XML attribute to the specified file.
48
49    Args:
50      file: file to write to
51      name: name of the attribute
52      value: (unescaped) value of the attribute
53    """
54  if value:
55    file.write(' %s="%s"' % (name, _XmlEscape(value)))
56
57
58def _WriteMessage(file, message):
59  presentable_content = message.GetPresentableContent()
60  assert (type(presentable_content) == unicode or
61          (len(message.parts) == 1 and
62           type(message.parts[0] == tclib.Placeholder)))
63  preserve_space = presentable_content != _WHITESPACES_REGEX.sub(
64      u' ', presentable_content.strip())
65
66  file.write('<msg')
67  _WriteAttribute(file, 'desc', message.GetDescription())
68  _WriteAttribute(file, 'id', message.GetId())
69  _WriteAttribute(file, 'meaning', message.GetMeaning())
70  if preserve_space:
71    _WriteAttribute(file, 'xml:space', 'preserve')
72  file.write('>')
73  if not preserve_space:
74    file.write('\n  ')
75
76  parts = message.GetContent()
77  for part in parts:
78    if isinstance(part, tclib.Placeholder):
79      file.write('<ph')
80      _WriteAttribute(file, 'name', part.GetPresentation())
81      file.write('><ex>')
82      file.write(_XmlEscape(part.GetExample()))
83      file.write('</ex>')
84      file.write(_XmlEscape(part.GetOriginal()))
85      file.write('</ph>')
86    else:
87      file.write(_XmlEscape(part))
88  if not preserve_space:
89    file.write('\n')
90  file.write('</msg>\n')
91
92
93def WriteXmbFile(file, messages):
94  """Writes the given grit.tclib.Message items to the specified open
95  file-like object in the XMB format.
96  """
97  file.write("""<?xml version="1.0" encoding="UTF-8"?>
98<!DOCTYPE messagebundle [
99<!ELEMENT messagebundle (msg)*>
100<!ATTLIST messagebundle class CDATA #IMPLIED>
101
102<!ELEMENT msg (#PCDATA|ph|source)*>
103<!ATTLIST msg id CDATA #IMPLIED>
104<!ATTLIST msg seq CDATA #IMPLIED>
105<!ATTLIST msg name CDATA #IMPLIED>
106<!ATTLIST msg desc CDATA #IMPLIED>
107<!ATTLIST msg meaning CDATA #IMPLIED>
108<!ATTLIST msg obsolete (obsolete) #IMPLIED>
109<!ATTLIST msg xml:space (default|preserve) "default">
110<!ATTLIST msg is_hidden CDATA #IMPLIED>
111
112<!ELEMENT source (#PCDATA)>
113
114<!ELEMENT ph (#PCDATA|ex)*>
115<!ATTLIST ph name CDATA #REQUIRED>
116
117<!ELEMENT ex (#PCDATA)>
118]>
119<messagebundle>
120""")
121  for message in messages:
122    _WriteMessage(file, message)
123  file.write('</messagebundle>')
124
125
126class OutputXmb(interface.Tool):
127  """Outputs all translateable messages in the .grd input file to an
128.xmb file, which is the format used to give source messages to
129Google's internal Translation Console tool.  The format could easily
130be used for other systems.
131
132Usage: grit xmb [-i|-h] [-l LIMITFILE] OUTPUTPATH
133
134OUTPUTPATH is the path you want to output the .xmb file to.
135
136The -l option can be used to output only some of the resources to the .xmb file.
137LIMITFILE is the path to a file that is used to limit the items output to the
138xmb file.  If the filename extension is .grd, the file must be a .grd file
139and the tool only output the contents of nodes from the input file that also
140exist in the limit file (as compared on the 'name' attribute). Otherwise it must
141contain a list of the IDs that output should be limited to, one ID per line, and
142the tool will only output nodes with 'name' attributes that match one of the
143IDs.
144
145The -i option causes 'grit xmb' to output an "IDs only" file instead of an XMB
146file.  The "IDs only" file contains the message ID of each message that would
147normally be output to the XMB file, one message ID per line.  It is designed for
148use with the 'grit transl2tc' tool's -l option.
149
150Other options:
151
152  -D NAME[=VAL]     Specify a C-preprocessor-like define NAME with optional
153                    value VAL (defaults to 1) which will be used to control
154                    conditional inclusion of resources.
155
156  -E NAME=VALUE     Set environment variable NAME to VALUE (within grit).
157
158"""
159  # The different output formats supported by this tool
160  FORMAT_XMB = 0
161  FORMAT_IDS_ONLY = 1
162
163  def __init__(self, defines=None):
164    super(OutputXmb, self).__init__()
165    self.format = self.FORMAT_XMB
166    self.defines = defines or {}
167
168  def ShortDescription(self):
169    return 'Exports all translateable messages into an XMB file.'
170
171  def Run(self, opts, args):
172    self.SetOptions(opts)
173
174    limit_file = None
175    limit_is_grd = False
176    limit_file_dir = None
177    own_opts, args = getopt.getopt(args, 'l:D:ih')
178    for key, val in own_opts:
179      if key == '-l':
180        limit_file = open(val, 'r')
181        limit_file_dir = util.dirname(val)
182        if not len(limit_file_dir):
183          limit_file_dir = '.'
184        limit_is_grd = os.path.splitext(val)[1] == '.grd'
185      elif key == '-i':
186        self.format = self.FORMAT_IDS_ONLY
187      elif key == '-D':
188        name, val = util.ParseDefine(val)
189        self.defines[name] = val
190      elif key == '-E':
191        (env_name, env_value) = val.split('=', 1)
192        os.environ[env_name] = env_value
193    if not len(args) == 1:
194      print ('grit xmb takes exactly one argument, the path to the XMB file '
195             'to output.')
196      return 2
197
198    xmb_path = args[0]
199    res_tree = grd_reader.Parse(opts.input, debug=opts.extra_verbose)
200    res_tree.SetOutputLanguage('en')
201    res_tree.SetDefines(self.defines)
202    res_tree.OnlyTheseTranslations([])
203    res_tree.RunGatherers()
204
205    with open(xmb_path, 'wb') as output_file:
206      self.Process(
207        res_tree, output_file, limit_file, limit_is_grd, limit_file_dir)
208    if limit_file:
209      limit_file.close()
210    print "Wrote %s" % xmb_path
211
212  def Process(self, res_tree, output_file, limit_file=None, limit_is_grd=False,
213              dir=None):
214    """Writes a document with the contents of res_tree into output_file,
215    limiting output to the IDs specified in limit_file, which is a GRD file if
216    limit_is_grd is true, otherwise a file with one ID per line.
217
218    The format of the output document depends on this object's format attribute.
219    It can be FORMAT_XMB or FORMAT_IDS_ONLY.
220
221    The FORMAT_IDS_ONLY format causes this function to write just a list
222    of the IDs of all messages that would have been added to the XMB file, one
223    ID per line.
224
225    The FORMAT_XMB format causes this function to output the (default) XMB
226    format.
227
228    Args:
229      res_tree: base.Node()
230      output_file: file open for writing
231      limit_file: None or file open for reading
232      limit_is_grd: True | False
233      dir: Directory of the limit file
234    """
235    if limit_file:
236      if limit_is_grd:
237        limit_list = []
238        limit_tree = grd_reader.Parse(limit_file,
239                                      dir=dir,
240                                      debug=self.o.extra_verbose)
241        for node in limit_tree:
242          if 'name' in node.attrs:
243            limit_list.append(node.attrs['name'])
244      else:
245        # Not a GRD file, so it's just a file with one ID per line
246        limit_list = [item.strip() for item in limit_file.read().split('\n')]
247
248    ids_already_done = {}
249    messages = []
250    for node in res_tree:
251      if (limit_file and
252          not ('name' in node.attrs and node.attrs['name'] in limit_list)):
253        continue
254      if not node.IsTranslateable():
255        continue
256
257      for clique in node.GetCliques():
258        if not clique.IsTranslateable():
259          continue
260        if not clique.GetMessage().GetRealContent():
261          continue
262
263        # Some explanation is in order here.  Note that we can have
264        # many messages with the same ID.
265        #
266        # The way we work around this is to maintain a list of cliques
267        # per message ID (in the UberClique) and select the "best" one
268        # (the first one that has a description, or an arbitrary one
269        # if there is no description) for inclusion in the XMB file.
270        # The translations are all going to be the same for messages
271        # with the same ID, although the way we replace placeholders
272        # might be slightly different.
273        id = clique.GetMessage().GetId()
274        if id in ids_already_done:
275          continue
276        ids_already_done[id] = 1
277
278        message = node.UberClique().BestClique(id).GetMessage()
279        messages += [message]
280
281    # Ensure a stable order of messages, to help regression testing.
282    messages.sort(key=lambda x:x.GetId())
283
284    if self.format == self.FORMAT_IDS_ONLY:
285      # We just print the list of IDs to the output file.
286      for msg in messages:
287        output_file.write(msg.GetId())
288        output_file.write('\n')
289    else:
290      assert self.format == self.FORMAT_XMB
291      WriteXmbFile(output_file, messages)
292