1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''Collections of messages and their translations, called cliques.  Also
7collections of cliques (uber-cliques).
8'''
9
10import re
11import types
12
13from grit import constants
14from grit import exception
15from grit import lazy_re
16from grit import pseudo
17from grit import pseudo_rtl
18from grit import tclib
19
20
21class UberClique(object):
22  '''A factory (NOT a singleton factory) for making cliques.  It has several
23  methods for working with the cliques created using the factory.
24  '''
25
26  def __init__(self):
27    # A map from message ID to list of cliques whose source messages have
28    # that ID.  This will contain all cliques created using this factory.
29    # Different messages can have the same ID because they have the
30    # same translateable portion and placeholder names, but occur in different
31    # places in the resource tree.
32    #
33    # Each list of cliques is kept sorted by description, to achieve
34    # stable results from the BestClique method, see below.
35    self.cliques_ = {}
36
37    # A map of clique IDs to list of languages to indicate translations where we
38    # fell back to English.
39    self.fallback_translations_ = {}
40
41    # A map of clique IDs to list of languages to indicate missing translations.
42    self.missing_translations_ = {}
43
44  def _AddMissingTranslation(self, lang, clique, is_error):
45    tl = self.fallback_translations_
46    if is_error:
47      tl = self.missing_translations_
48    id = clique.GetId()
49    if id not in tl:
50      tl[id] = {}
51    if lang not in tl[id]:
52      tl[id][lang] = 1
53
54  def HasMissingTranslations(self):
55    return len(self.missing_translations_) > 0
56
57  def MissingTranslationsReport(self):
58    '''Returns a string suitable for printing to report missing
59    and fallback translations to the user.
60    '''
61    def ReportTranslation(clique, langs):
62      text = clique.GetMessage().GetPresentableContent()
63      # The text 'error' (usually 'Error:' but we are conservative)
64      # can trigger some build environments (Visual Studio, we're
65      # looking at you) to consider invocation of grit to have failed,
66      # so we make sure never to output that word.
67      extract = re.sub('(?i)error', 'REDACTED', text[0:40])[0:40]
68      ellipsis = ''
69      if len(text) > 40:
70        ellipsis = '...'
71      langs_extract = langs[0:6]
72      describe_langs = ','.join(langs_extract)
73      if len(langs) > 6:
74        describe_langs += " and %d more" % (len(langs) - 6)
75      return "  %s \"%s%s\" %s" % (clique.GetId(), extract, ellipsis,
76                                   describe_langs)
77    lines = []
78    if len(self.fallback_translations_):
79      lines.append(
80        "WARNING: Fell back to English for the following translations:")
81      for (id, langs) in self.fallback_translations_.items():
82        lines.append(ReportTranslation(self.cliques_[id][0], langs.keys()))
83    if len(self.missing_translations_):
84      lines.append("ERROR: The following translations are MISSING:")
85      for (id, langs) in self.missing_translations_.items():
86        lines.append(ReportTranslation(self.cliques_[id][0], langs.keys()))
87    return '\n'.join(lines)
88
89  def MakeClique(self, message, translateable=True):
90    '''Create a new clique initialized  with a message.
91
92    Args:
93      message: tclib.Message()
94      translateable: True | False
95    '''
96    clique = MessageClique(self, message, translateable)
97
98    # Enable others to find this clique by its message ID
99    if message.GetId() in self.cliques_:
100      presentable_text = clique.GetMessage().GetPresentableContent()
101      if not message.HasAssignedId():
102        for c in self.cliques_[message.GetId()]:
103          assert c.GetMessage().GetPresentableContent() == presentable_text
104      self.cliques_[message.GetId()].append(clique)
105      # We need to keep each list of cliques sorted by description, to
106      # achieve stable results from the BestClique method, see below.
107      self.cliques_[message.GetId()].sort(
108          key=lambda c:c.GetMessage().GetDescription())
109    else:
110      self.cliques_[message.GetId()] = [clique]
111
112    return clique
113
114  def FindCliqueAndAddTranslation(self, translation, language):
115    '''Adds the specified translation to the clique with the source message
116    it is a translation of.
117
118    Args:
119      translation: tclib.Translation()
120      language: 'en' | 'fr' ...
121
122    Return:
123      True if the source message was found, otherwise false.
124    '''
125    if translation.GetId() in self.cliques_:
126      for clique in self.cliques_[translation.GetId()]:
127        clique.AddTranslation(translation, language)
128      return True
129    else:
130      return False
131
132  def BestClique(self, id):
133    '''Returns the "best" clique from a list of cliques.  All the cliques
134    must have the same ID.  The "best" clique is chosen in the following
135    order of preference:
136    - The first clique that has a non-ID-based description.
137    - If no such clique found, the first clique with an ID-based description.
138    - Otherwise the first clique.
139
140    This method is stable in terms of always returning a clique with
141    an identical description (on different runs of GRIT on the same
142    data) because self.cliques_ is sorted by description.
143    '''
144    clique_list = self.cliques_[id]
145    clique_with_id = None
146    clique_default = None
147    for clique in clique_list:
148      if not clique_default:
149        clique_default = clique
150
151      description = clique.GetMessage().GetDescription()
152      if description and len(description) > 0:
153        if not description.startswith('ID:'):
154          # this is the preferred case so we exit right away
155          return clique
156        elif not clique_with_id:
157          clique_with_id = clique
158    if clique_with_id:
159      return clique_with_id
160    else:
161      return clique_default
162
163  def BestCliquePerId(self):
164    '''Iterates over the list of all cliques and returns the best clique for
165    each ID.  This will be the first clique with a source message that has a
166    non-empty description, or an arbitrary clique if none of them has a
167    description.
168    '''
169    for id in self.cliques_:
170      yield self.BestClique(id)
171
172  def BestCliqueByOriginalText(self, text, meaning):
173    '''Finds the "best" (as in BestClique()) clique that has original text
174    'text' and meaning 'meaning'.  Returns None if there is no such clique.
175    '''
176    # If needed, this can be optimized by maintaining a map of
177    # fingerprints of original text+meaning to cliques.
178    for c in self.BestCliquePerId():
179      msg = c.GetMessage()
180      if msg.GetRealContent() == text and msg.GetMeaning() == meaning:
181        return msg
182    return None
183
184  def AllMessageIds(self):
185    '''Returns a list of all defined message IDs.
186    '''
187    return self.cliques_.keys()
188
189  def AllCliques(self):
190    '''Iterates over all cliques.  Note that this can return multiple cliques
191    with the same ID.
192    '''
193    for cliques in self.cliques_.values():
194      for c in cliques:
195        yield c
196
197  def GenerateXtbParserCallback(self, lang, debug=False):
198    '''Creates a callback function as required by grit.xtb_reader.Parse().
199    This callback will create Translation objects for each message from
200    the XTB that exists in this uberclique, and add them as translations for
201    the relevant cliques.  The callback will add translations to the language
202    specified by 'lang'
203
204    Args:
205      lang: 'fr'
206      debug: True | False
207    '''
208    def Callback(id, structure):
209      if id not in self.cliques_:
210        if debug: print "Ignoring translation #%s" % id
211        return
212
213      if debug: print "Adding translation #%s" % id
214
215      # We fetch placeholder information from the original message (the XTB file
216      # only contains placeholder names).
217      original_msg = self.BestClique(id).GetMessage()
218
219      translation = tclib.Translation(id=id)
220      for is_ph,text in structure:
221        if not is_ph:
222          translation.AppendText(text)
223        else:
224          found_placeholder = False
225          for ph in original_msg.GetPlaceholders():
226            if ph.GetPresentation() == text:
227              translation.AppendPlaceholder(tclib.Placeholder(
228                ph.GetPresentation(), ph.GetOriginal(), ph.GetExample()))
229              found_placeholder = True
230              break
231          if not found_placeholder:
232            raise exception.MismatchingPlaceholders(
233              'Translation for message ID %s had <ph name="%s"/>, no match\n'
234              'in original message' % (id, text))
235      self.FindCliqueAndAddTranslation(translation, lang)
236    return Callback
237
238
239class CustomType(object):
240  '''A base class you should implement if you wish to specify a custom type
241  for a message clique (i.e. custom validation and optional modification of
242  translations).'''
243
244  def Validate(self, message):
245    '''Returns true if the message (a tclib.Message object) is valid,
246    otherwise false.
247    '''
248    raise NotImplementedError()
249
250  def ValidateAndModify(self, lang, translation):
251    '''Returns true if the translation (a tclib.Translation object) is valid,
252    otherwise false.  The language is also passed in.  This method may modify
253    the translation that is passed in, if it so wishes.
254    '''
255    raise NotImplementedError()
256
257  def ModifyTextPart(self, lang, text):
258    '''If you call ModifyEachTextPart, it will turn around and call this method
259    for each text part of the translation.  You should return the modified
260    version of the text, or just the original text to not change anything.
261    '''
262    raise NotImplementedError()
263
264  def ModifyEachTextPart(self, lang, translation):
265    '''Call this to easily modify one or more of the textual parts of a
266    translation.  It will call ModifyTextPart for each part of the
267    translation.
268    '''
269    contents = translation.GetContent()
270    for ix in range(len(contents)):
271      if (isinstance(contents[ix], types.StringTypes)):
272        contents[ix] = self.ModifyTextPart(lang, contents[ix])
273
274
275class OneOffCustomType(CustomType):
276  '''A very simple custom type that performs the validation expressed by
277  the input expression on all languages including the source language.
278  The expression can access the variables 'lang', 'msg' and 'text()' where 'lang'
279  is the language of 'msg', 'msg' is the message or translation being
280  validated and 'text()' returns the real contents of 'msg' (for shorthand).
281  '''
282  def __init__(self, expression):
283    self.expr = expression
284  def Validate(self, message):
285    return self.ValidateAndModify(MessageClique.source_language, message)
286  def ValidateAndModify(self, lang, msg):
287    def text():
288      return msg.GetRealContent()
289    return eval(self.expr, {},
290            {'lang' : lang,
291             'text' : text,
292             'msg' : msg,
293             })
294
295
296class MessageClique(object):
297  '''A message along with all of its translations.  Also code to bring
298  translations together with their original message.'''
299
300  # change this to the language code of Messages you add to cliques_.
301  # TODO(joi) Actually change this based on the <grit> node's source language
302  source_language = 'en'
303
304  # A constant translation we use when asked for a translation into the
305  # special language constants.CONSTANT_LANGUAGE.
306  CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT')
307
308  # A pattern to match messages that are empty or whitespace only.
309  WHITESPACE_MESSAGE = lazy_re.compile(u'^\s*$')
310
311  def __init__(self, uber_clique, message, translateable=True, custom_type=None):
312    '''Create a new clique initialized with just a message.
313
314    Note that messages with a body comprised only of whitespace will implicitly
315    be marked non-translatable.
316
317    Args:
318      uber_clique: Our uber-clique (collection of cliques)
319      message: tclib.Message()
320      translateable: True | False
321      custom_type: instance of clique.CustomType interface
322    '''
323    # Our parent
324    self.uber_clique = uber_clique
325    # If not translateable, we only store the original message.
326    self.translateable = translateable
327
328    # We implicitly mark messages that have a whitespace-only body as
329    # non-translateable.
330    if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()):
331      self.translateable = False
332
333    # A mapping of language identifiers to tclib.BaseMessage and its
334    # subclasses (i.e. tclib.Message and tclib.Translation).
335    self.clique = { MessageClique.source_language : message }
336    # A list of the "shortcut groups" this clique is
337    # part of.  Within any given shortcut group, no shortcut key (e.g. &J)
338    # must appear more than once in each language for all cliques that
339    # belong to the group.
340    self.shortcut_groups = []
341    # An instance of the CustomType interface, or None.  If this is set, it will
342    # be used to validate the original message and translations thereof, and
343    # will also get a chance to modify translations of the message.
344    self.SetCustomType(custom_type)
345
346  def GetMessage(self):
347    '''Retrieves the tclib.Message that is the source for this clique.'''
348    return self.clique[MessageClique.source_language]
349
350  def GetId(self):
351    '''Retrieves the message ID of the messages in this clique.'''
352    return self.GetMessage().GetId()
353
354  def IsTranslateable(self):
355    return self.translateable
356
357  def AddToShortcutGroup(self, group):
358    self.shortcut_groups.append(group)
359
360  def SetCustomType(self, custom_type):
361    '''Makes this clique use custom_type for validating messages and
362    translations, and optionally modifying translations.
363    '''
364    self.custom_type = custom_type
365    if custom_type and not custom_type.Validate(self.GetMessage()):
366      raise exception.InvalidMessage(self.GetMessage().GetRealContent())
367
368  def MessageForLanguage(self, lang, pseudo_if_no_match=True, fallback_to_english=False):
369    '''Returns the message/translation for the specified language, providing
370    a pseudotranslation if there is no available translation and a pseudo-
371    translation is requested.
372
373    The translation of any message whatsoever in the special language
374    'x_constant' is the message "TTTTTT".
375
376    Args:
377      lang: 'en'
378      pseudo_if_no_match: True
379      fallback_to_english: False
380
381    Return:
382      tclib.BaseMessage
383    '''
384    if not self.translateable:
385      return self.GetMessage()
386
387    if lang == constants.CONSTANT_LANGUAGE:
388      return self.CONSTANT_TRANSLATION
389
390    for msglang in self.clique.keys():
391      if lang == msglang:
392        return self.clique[msglang]
393
394    if lang == constants.FAKE_BIDI:
395      return pseudo_rtl.PseudoRTLMessage(self.GetMessage())
396
397    if fallback_to_english:
398      self.uber_clique._AddMissingTranslation(lang, self, is_error=False)
399      return self.GetMessage()
400
401    # If we're not supposed to generate pseudotranslations, we add an error
402    # report to a list of errors, then fail at a higher level, so that we
403    # get a list of all messages that are missing translations.
404    if not pseudo_if_no_match:
405      self.uber_clique._AddMissingTranslation(lang, self, is_error=True)
406
407    return pseudo.PseudoMessage(self.GetMessage())
408
409  def AllMessagesThatMatch(self, lang_re, include_pseudo = True):
410    '''Returns a map of all messages that match 'lang', including the pseudo
411    translation if requested.
412
413    Args:
414      lang_re: re.compile('fr|en')
415      include_pseudo: True
416
417    Return:
418      { 'en' : tclib.Message,
419        'fr' : tclib.Translation,
420        pseudo.PSEUDO_LANG : tclib.Translation }
421    '''
422    if not self.translateable:
423      return [self.GetMessage()]
424
425    matches = {}
426    for msglang in self.clique:
427      if lang_re.match(msglang):
428        matches[msglang] = self.clique[msglang]
429
430    if include_pseudo:
431      matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage(self.GetMessage())
432
433    return matches
434
435  def AddTranslation(self, translation, language):
436    '''Add a translation to this clique.  The translation must have the same
437    ID as the message that is the source for this clique.
438
439    If this clique is not translateable, the function just returns.
440
441    Args:
442      translation: tclib.Translation()
443      language: 'en'
444
445    Throws:
446      grit.exception.InvalidTranslation if the translation you're trying to add
447      doesn't have the same message ID as the source message of this clique.
448    '''
449    if not self.translateable:
450      return
451    if translation.GetId() != self.GetId():
452      raise exception.InvalidTranslation(
453        'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId()))
454
455    assert not language in self.clique
456
457    # Because two messages can differ in the original content of their
458    # placeholders yet share the same ID (because they are otherwise the
459    # same), the translation we are getting may have different original
460    # content for placeholders than our message, yet it is still the right
461    # translation for our message (because it is for the same ID).  We must
462    # therefore fetch the original content of placeholders from our original
463    # English message.
464    #
465    # See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques
466    # for a concrete explanation of why this is necessary.
467
468    original = self.MessageForLanguage(self.source_language, False)
469    if len(original.GetPlaceholders()) != len(translation.GetPlaceholders()):
470      print ("ERROR: '%s' translation of message id %s does not match" %
471             (language, translation.GetId()))
472      assert False
473
474    transl_msg = tclib.Translation(id=self.GetId(),
475                                   text=translation.GetPresentableContent(),
476                                   placeholders=original.GetPlaceholders())
477
478    if self.custom_type and not self.custom_type.ValidateAndModify(language, transl_msg):
479      print "WARNING: %s translation failed validation: %s" % (
480        language, transl_msg.GetId())
481
482    self.clique[language] = transl_msg
483
484