195640e3a20adea634b4df4ccf8c93f411184c438joi@chromium.org#!/usr/bin/env python
295640e3a20adea634b4df4ccf8c93f411184c438joi@chromium.org# Copyright (c) 2012 The Chromium Authors. All rights reserved.
301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Use of this source code is governed by a BSD-style license that can be
401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# found in the LICENSE file.
501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org'''Pseudotranslation support.  Our pseudotranslations are based on the
701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgP-language, which is a simple vowel-extending language.  Examples of P:
801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  - "hello" becomes "hepellopo"
901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  - "howdie" becomes "hopowdiepie"
1001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  - "because" becomes "bepecaupause" (but in our implementation we don't
1101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    handle the silent e at the end so it actually would return "bepecaupausepe"
1201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
1301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgThe P-language has the excellent quality of increasing the length of text
1401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgby around 30-50% which is great for pseudotranslations, to stress test any
1501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgGUI layouts etc.
1601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
1701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgTo make the pseudotranslations more obviously "not a translation" and to make
1801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgthem exercise any code that deals with encodings, we also transform all English
1901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgvowels into equivalent vowels with diacriticals on them (rings, acutes,
2001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdiaresis, and circumflex), and we write the "p" in the P-language as a Hebrew
2101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgcharacter Qof.  It looks sort of like a latin character "p" but it is outside
2201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgthe latin-1 character set which will stress character encoding bugs.
2301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org'''
2401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
2501fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.orgfrom grit import lazy_re
2601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom grit import tclib
2701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
2801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
2901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# An RFC language code for the P pseudolanguage.
3001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgPSEUDO_LANG = 'x-P-pseudo'
3101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
3201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Hebrew character Qof.  It looks kind of like a 'p' but is outside
3301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# the latin-1 character set which is good for our purposes.
3401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# TODO(joi) For now using P instead of Qof, because of some bugs it used.  Find
3501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# a better solution, i.e. one that introduces a non-latin1 character into the
3601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# pseudotranslation.
3701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org#_QOF = u'\u05e7'
3801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org_QOF = u'P'
3901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
4001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# How we map each vowel.
4101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org_VOWELS = {
4201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'a' : u'\u00e5',  # a with ring
4301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'e' : u'\u00e9',  # e acute
4401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'i' : u'\u00ef',  # i diaresis
4501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'o' : u'\u00f4',  # o circumflex
4601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'u' : u'\u00fc',  # u diaresis
4701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'y' : u'\u00fd',  # y acute
4801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'A' : u'\u00c5',  # A with ring
4901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'E' : u'\u00c9',  # E acute
5001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'I' : u'\u00cf',  # I diaresis
5101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'O' : u'\u00d4',  # O circumflex
5201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'U' : u'\u00dc',  # U diaresis
5301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  u'Y' : u'\u00dd',  # Y acute
5401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org}
5501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
5601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Matches vowels and P
5701fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.org_PSUB_RE = lazy_re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P']))
5801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
5901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
6001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Pseudotranslations previously created.  This is important for performance
6101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# reasons, especially since we routinely pseudotranslate the whole project
6201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# several or many different times for each build.
6301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org_existing_translations = {}
6401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
6501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
6601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdef MapVowels(str, also_p = False):
6701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  '''Returns a copy of 'str' where characters that exist as keys in _VOWELS
6801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  have been replaced with the corresponding value.  If also_p is true, this
6901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  function will also change capital P characters into a Hebrew character Qof.
7001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  '''
7101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  def Repl(match):
7201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    if match.group() == 'p':
7301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      if also_p:
7401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return _QOF
7501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      else:
7601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        return 'p'
7701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    else:
7801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      return _VOWELS[match.group()]
7901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  return _PSUB_RE.sub(Repl, str)
8001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
8101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
8201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdef PseudoString(str):
8301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  '''Returns a pseudotranslation of the provided string, in our enhanced
8401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  P-language.'''
8501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  if str in _existing_translations:
8601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    return _existing_translations[str]
8701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
8801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  outstr = u''
8901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  ix = 0
9001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  while ix < len(str):
9101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    if str[ix] not in _VOWELS.keys():
9201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      outstr += str[ix]
9301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      ix += 1
9401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    else:
9501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      # We want to treat consecutive vowels as one composite vowel.  This is not
9601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      # always accurate e.g. in composite words but good enough.
9701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      consecutive_vowels = u''
9801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      while ix < len(str) and str[ix] in _VOWELS.keys():
9901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        consecutive_vowels += str[ix]
10001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org        ix += 1
10101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      changed_vowels = MapVowels(consecutive_vowels)
10201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      outstr += changed_vowels
10301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      outstr += _QOF
10401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      outstr += changed_vowels
10501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
10601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  _existing_translations[str] = outstr
10701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  return outstr
10801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
10901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
11001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdef PseudoMessage(message):
11101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  '''Returns a pseudotranslation of the provided message.
11201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
11301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  Args:
11401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    message: tclib.Message()
11501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
11601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  Return:
11701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    tclib.Translation()
11801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  '''
11901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  transl = tclib.Translation()
12001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
12101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  for part in message.GetContent():
12201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    if isinstance(part, tclib.Placeholder):
12301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      transl.AppendPlaceholder(part)
12401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org    else:
12501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org      transl.AppendText(PseudoString(part))
12601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
12701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org  return transl
12801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org
129