1#!/usr/bin/env python
2# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6'''Pseudotranslation support.  Our pseudotranslations are based on the
7P-language, which is a simple vowel-extending language.  Examples of P:
8  - "hello" becomes "hepellopo"
9  - "howdie" becomes "hopowdiepie"
10  - "because" becomes "bepecaupause" (but in our implementation we don't
11    handle the silent e at the end so it actually would return "bepecaupausepe"
12
13The P-language has the excellent quality of increasing the length of text
14by around 30-50% which is great for pseudotranslations, to stress test any
15GUI layouts etc.
16
17To make the pseudotranslations more obviously "not a translation" and to make
18them exercise any code that deals with encodings, we also transform all English
19vowels into equivalent vowels with diacriticals on them (rings, acutes,
20diaresis, and circumflex), and we write the "p" in the P-language as a Hebrew
21character Qof.  It looks sort of like a latin character "p" but it is outside
22the latin-1 character set which will stress character encoding bugs.
23'''
24
25from grit import lazy_re
26from grit import tclib
27
28
29# An RFC language code for the P pseudolanguage.
30PSEUDO_LANG = 'x-P-pseudo'
31
32# Hebrew character Qof.  It looks kind of like a 'p' but is outside
33# the latin-1 character set which is good for our purposes.
34# TODO(joi) For now using P instead of Qof, because of some bugs it used.  Find
35# a better solution, i.e. one that introduces a non-latin1 character into the
36# pseudotranslation.
37#_QOF = u'\u05e7'
38_QOF = u'P'
39
40# How we map each vowel.
41_VOWELS = {
42  u'a' : u'\u00e5',  # a with ring
43  u'e' : u'\u00e9',  # e acute
44  u'i' : u'\u00ef',  # i diaresis
45  u'o' : u'\u00f4',  # o circumflex
46  u'u' : u'\u00fc',  # u diaresis
47  u'y' : u'\u00fd',  # y acute
48  u'A' : u'\u00c5',  # A with ring
49  u'E' : u'\u00c9',  # E acute
50  u'I' : u'\u00cf',  # I diaresis
51  u'O' : u'\u00d4',  # O circumflex
52  u'U' : u'\u00dc',  # U diaresis
53  u'Y' : u'\u00dd',  # Y acute
54}
55
56# Matches vowels and P
57_PSUB_RE = lazy_re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P']))
58
59
60# Pseudotranslations previously created.  This is important for performance
61# reasons, especially since we routinely pseudotranslate the whole project
62# several or many different times for each build.
63_existing_translations = {}
64
65
66def MapVowels(str, also_p = False):
67  '''Returns a copy of 'str' where characters that exist as keys in _VOWELS
68  have been replaced with the corresponding value.  If also_p is true, this
69  function will also change capital P characters into a Hebrew character Qof.
70  '''
71  def Repl(match):
72    if match.group() == 'p':
73      if also_p:
74        return _QOF
75      else:
76        return 'p'
77    else:
78      return _VOWELS[match.group()]
79  return _PSUB_RE.sub(Repl, str)
80
81
82def PseudoString(str):
83  '''Returns a pseudotranslation of the provided string, in our enhanced
84  P-language.'''
85  if str in _existing_translations:
86    return _existing_translations[str]
87
88  outstr = u''
89  ix = 0
90  while ix < len(str):
91    if str[ix] not in _VOWELS.keys():
92      outstr += str[ix]
93      ix += 1
94    else:
95      # We want to treat consecutive vowels as one composite vowel.  This is not
96      # always accurate e.g. in composite words but good enough.
97      consecutive_vowels = u''
98      while ix < len(str) and str[ix] in _VOWELS.keys():
99        consecutive_vowels += str[ix]
100        ix += 1
101      changed_vowels = MapVowels(consecutive_vowels)
102      outstr += changed_vowels
103      outstr += _QOF
104      outstr += changed_vowels
105
106  _existing_translations[str] = outstr
107  return outstr
108
109
110def PseudoMessage(message):
111  '''Returns a pseudotranslation of the provided message.
112
113  Args:
114    message: tclib.Message()
115
116  Return:
117    tclib.Translation()
118  '''
119  transl = tclib.Translation()
120
121  for part in message.GetContent():
122    if isinstance(part, tclib.Placeholder):
123      transl.AppendPlaceholder(part)
124    else:
125      transl.AppendText(PseudoString(part))
126
127  return transl
128
129