1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''The 'grit transl2tc' tool.
7'''
8
9
10from grit import grd_reader
11from grit import util
12from grit.tool import interface
13from grit.tool import rc2grd
14
15from grit.extern import tclib
16
17
18class TranslationToTc(interface.Tool):
19  '''A tool for importing existing translations in RC format into the
20Translation Console.
21
22Usage:
23
24grit -i GRD transl2tc [-l LIMITS] [RCOPTS] SOURCE_RC TRANSLATED_RC OUT_FILE
25
26The tool needs a "source" RC file, i.e. in English, and an RC file that is a
27translation of precisely the source RC file (not of an older or newer version).
28
29The tool also requires you to provide a .grd file (input file) e.g. using the
30-i global option or the GRIT_INPUT environment variable.  The tool uses
31information from your .grd file to correct placeholder names in the
32translations and ensure that only translatable items and translations still
33being used are output.
34
35This tool will accept all the same RCOPTS as the 'grit rc2grd' tool.  To get
36a list of these options, run 'grit help rc2grd'.
37
38Additionally, you can use the -l option (which must be the first option to the
39tool) to specify a file containing a list of message IDs to which output should
40be limited.  This is only useful if you are limiting the output to your XMB
41files using the 'grit xmb' tool's -l option.  See 'grit help xmb' for how to
42generate a file containing a list of the message IDs in an XMB file.
43
44The tool will scan through both of the RC files as well as any HTML files they
45refer to, and match together the source messages and translated messages.  It
46will output a file (OUTPUT_FILE) you can import directly into the TC using the
47Bulk Translation Upload tool.
48'''
49
50  def ShortDescription(self):
51    return 'Import existing translations in RC format into the TC'
52
53  def Setup(self, globopt, args):
54    '''Sets the instance up for use.
55    '''
56    self.SetOptions(globopt)
57    self.rc2grd = rc2grd.Rc2Grd()
58    self.rc2grd.SetOptions(globopt)
59    self.limits = None
60    if len(args) and args[0] == '-l':
61      self.limits = util.ReadFile(args[1], util.RAW_TEXT).split('\n')
62      args = args[2:]
63    return self.rc2grd.ParseOptions(args)
64
65  def Run(self, globopt, args):
66    args = self.Setup(globopt, args)
67
68    if len(args) != 3:
69      self.Out('This tool takes exactly three arguments:\n'
70             '  1. The path to the original RC file\n'
71             '  2. The path to the translated RC file\n'
72             '  3. The output file path.\n')
73      return 2
74
75    grd = grd_reader.Parse(self.o.input, debug=self.o.extra_verbose)
76    grd.RunGatherers()
77
78    source_rc = util.ReadFile(args[0], self.rc2grd.input_encoding)
79    transl_rc = util.ReadFile(args[1], self.rc2grd.input_encoding)
80    translations = self.ExtractTranslations(grd,
81                                            source_rc, args[0],
82                                            transl_rc, args[1])
83
84    with util.WrapOutputStream(open(args[2], 'w')) as output_file:
85      self.WriteTranslations(output_file, translations.items())
86
87    self.Out('Wrote output file %s' % args[2])
88
89  def ExtractTranslations(self, current_grd, source_rc, source_path,
90                                             transl_rc, transl_path):
91    '''Extracts translations from the translated RC file, matching them with
92    translations in the source RC file to calculate their ID, and correcting
93    placeholders, limiting output to translateables, etc. using the supplied
94    .grd file which is the current .grd file for your project.
95
96    If this object's 'limits' attribute is not None but a list, the output of
97    this function will be further limited to include only messages that have
98    message IDs in the 'limits' list.
99
100    Args:
101      current_grd: grit.node.base.Node child, that has had RunGatherers() run
102                   on it
103      source_rc: Complete text of source RC file
104      source_path: Path to the source RC file
105      transl_rc: Complete text of translated RC file
106      transl_path: Path to the translated RC file
107
108    Return:
109      { id1 : text1, '12345678' : 'Hello USERNAME, howzit?' }
110    '''
111    source_grd = self.rc2grd.Process(source_rc, source_path)
112    self.VerboseOut('Read %s into GRIT format, running gatherers.\n' % source_path)
113    source_grd.SetOutputLanguage(current_grd.output_language)
114    source_grd.SetDefines(current_grd.defines)
115    source_grd.RunGatherers(debug=self.o.extra_verbose)
116    transl_grd = self.rc2grd.Process(transl_rc, transl_path)
117    transl_grd.SetOutputLanguage(current_grd.output_language)
118    transl_grd.SetDefines(current_grd.defines)
119    self.VerboseOut('Read %s into GRIT format, running gatherers.\n' % transl_path)
120    transl_grd.RunGatherers(debug=self.o.extra_verbose)
121    self.VerboseOut('Done running gatherers for %s.\n' % transl_path)
122
123    # Proceed to create a map from ID to translation, getting the ID from the
124    # source GRD and the translation from the translated GRD.
125    id2transl = {}
126    for source_node in source_grd:
127      source_cliques = source_node.GetCliques()
128      if not len(source_cliques):
129        continue
130
131      assert 'name' in source_node.attrs, 'All nodes with cliques should have an ID'
132      node_id = source_node.attrs['name']
133      self.ExtraVerboseOut('Processing node %s\n' % node_id)
134      transl_node = transl_grd.GetNodeById(node_id)
135
136      if transl_node:
137        transl_cliques = transl_node.GetCliques()
138        if not len(transl_cliques) == len(source_cliques):
139          self.Out(
140            'Warning: Translation for %s has wrong # of cliques, skipping.\n' %
141            node_id)
142          continue
143      else:
144        self.Out('Warning: No translation for %s, skipping.\n' % node_id)
145        continue
146
147      if source_node.name == 'message':
148        # Fixup placeholders as well as possible based on information from
149        # the current .grd file if they are 'TODO_XXXX' placeholders.  We need
150        # to fixup placeholders in the translated message so that it looks right
151        # and we also need to fixup placeholders in the source message so that
152        # its calculated ID will match the current message.
153        current_node = current_grd.GetNodeById(node_id)
154        if current_node:
155          assert len(source_cliques) == len(current_node.GetCliques()) == 1
156
157          source_msg = source_cliques[0].GetMessage()
158          current_msg = current_node.GetCliques()[0].GetMessage()
159
160          # Only do this for messages whose source version has not changed.
161          if (source_msg.GetRealContent() != current_msg.GetRealContent()):
162            self.VerboseOut('Info: Message %s has changed; skipping\n' % node_id)
163          else:
164            transl_msg = transl_cliques[0].GetMessage()
165            transl_content = transl_msg.GetContent()
166            current_content = current_msg.GetContent()
167            source_content = source_msg.GetContent()
168
169            ok_to_fixup = True
170            if (len(transl_content) != len(current_content)):
171              # message structure of translation is different, don't try fixup
172              ok_to_fixup = False
173            if ok_to_fixup:
174              for ix in range(len(transl_content)):
175                if isinstance(transl_content[ix], tclib.Placeholder):
176                  if not isinstance(current_content[ix], tclib.Placeholder):
177                    ok_to_fixup = False  # structure has changed
178                    break
179                  if (transl_content[ix].GetOriginal() !=
180                      current_content[ix].GetOriginal()):
181                    ok_to_fixup = False  # placeholders have likely been reordered
182                    break
183                else:  # translated part is not a placeholder but a string
184                  if isinstance(current_content[ix], tclib.Placeholder):
185                    ok_to_fixup = False  # placeholders have likely been reordered
186                    break
187
188            if not ok_to_fixup:
189              self.VerboseOut(
190                'Info: Structure of message %s has changed; skipping.\n' % node_id)
191            else:
192              def Fixup(content, ix):
193                if (isinstance(content[ix], tclib.Placeholder) and
194                    content[ix].GetPresentation().startswith('TODO_')):
195                  assert isinstance(current_content[ix], tclib.Placeholder)
196                  # Get the placeholder ID and example from the current message
197                  content[ix] = current_content[ix]
198              for ix in range(len(transl_content)):
199                Fixup(transl_content, ix)
200                Fixup(source_content, ix)
201
202      # Only put each translation once into the map.  Warn if translations
203      # for the same message are different.
204      for ix in range(len(transl_cliques)):
205        source_msg = source_cliques[ix].GetMessage()
206        source_msg.GenerateId()  # needed to refresh ID based on new placeholders
207        message_id = source_msg.GetId()
208        translated_content = transl_cliques[ix].GetMessage().GetPresentableContent()
209
210        if message_id in id2transl:
211          existing_translation = id2transl[message_id]
212          if existing_translation != translated_content:
213            original_text = source_cliques[ix].GetMessage().GetPresentableContent()
214            self.Out('Warning: Two different translations for "%s":\n'
215                   '  Translation 1: "%s"\n'
216                   '  Translation 2: "%s"\n' %
217                   (original_text, existing_translation, translated_content))
218        else:
219          id2transl[message_id] = translated_content
220
221    # Remove translations for messages that do not occur in the current .grd
222    # or have been marked as not translateable, or do not occur in the 'limits'
223    # list (if it has been set).
224    current_message_ids = current_grd.UberClique().AllMessageIds()
225    for message_id in id2transl.keys():
226      if (message_id not in current_message_ids or
227          not current_grd.UberClique().BestClique(message_id).IsTranslateable() or
228          (self.limits and message_id not in self.limits)):
229        del id2transl[message_id]
230
231    return id2transl
232
233  @staticmethod
234  def WriteTranslations(output_file, translations):
235    '''Writes the provided list of translations to the provided output file
236    in the format used by the TC's Bulk Translation Upload tool.  The file
237    must be UTF-8 encoded.
238
239    Args:
240      output_file: util.WrapOutputStream(open('bingo.out', 'w'))
241      translations: [ [id1, text1], ['12345678', 'Hello USERNAME, howzit?'] ]
242
243    Return:
244      None
245    '''
246    for id, text in translations:
247      text = text.replace('<', '&lt;').replace('>', '&gt;')
248      output_file.write(id)
249      output_file.write(' ')
250      output_file.write(text)
251      output_file.write('\n')
252
253