195640e3a20adea634b4df4ccf8c93f411184c438joi@chromium.org#!/usr/bin/env python 295640e3a20adea634b4df4ccf8c93f411184c438joi@chromium.org# Copyright (c) 2012 The Chromium Authors. All rights reserved. 301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Use of this source code is governed by a BSD-style license that can be 401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# found in the LICENSE file. 501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org'''Pseudo RTL, (aka Fake Bidi) support. It simply wraps each word with 701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgUnicode RTL overrides. 801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgMore info at https://sites.google.com/a/chromium.org/dev/Home/fake-bidi 901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org''' 1001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 1101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgimport re 1201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 1301fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.orgfrom grit import lazy_re 1401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgfrom grit import tclib 1501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 1601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgACCENTED_STRINGS = { 1701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'a': u"\u00e5", 'e': u"\u00e9", 'i': u"\u00ee", 'o': u"\u00f6", 1801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'u': u"\u00fb", 'A': u"\u00c5", 'E': u"\u00c9", 'I': u"\u00ce", 1901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'O': u"\u00d6", 'U': u"\u00db", 'c': u"\u00e7", 'd': u"\u00f0", 2001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'n': u"\u00f1", 'p': u"\u00fe", 'y': u"\u00fd", 'C': u"\u00c7", 2101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'D': u"\u00d0", 'N': u"\u00d1", 'P': u"\u00de", 'Y': u"\u00dd", 2201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'f': u"\u0192", 's': u"\u0161", 'S': u"\u0160", 'z': u"\u017e", 2301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'Z': u"\u017d", 'g': u"\u011d", 'G': u"\u011c", 'h': u"\u0125", 2401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'H': u"\u0124", 'j': u"\u0135", 'J': u"\u0134", 'k': u"\u0137", 2501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'K': u"\u0136", 'l': u"\u013c", 'L': u"\u013b", 't': u"\u0163", 2601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 'T': u"\u0162", 'w': u"\u0175", 'W': u"\u0174", 2701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org '$': u"\u20ac", '?': u"\u00bf", 'R': u"\u00ae", r'!': u"\u00a1", 2801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org} 2901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 3001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# a character set containing the keys in ACCENTED_STRINGS 3101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# We should not accent characters in an escape sequence such as "\n". 3201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# To be safe, we assume every character following a backslash is an escaped 3301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# character. We also need to consider the case like "\\n", which means 3401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# a blackslash and a character "n", we will accent the character "n". 3501fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.orgTO_ACCENT = lazy_re.compile( 3601fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.org r'[%s]|\\[a-z\\]' % ''.join(ACCENTED_STRINGS.keys())) 3701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 3801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# Lex text so that we don't interfere with html tokens and entities. 3901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# This lexing scheme will handle all well formed tags and entities, html or 4001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# xhtml. It will not handle comments, CDATA sections, or the unescaping tags: 4101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# script, style, xmp or listing. If any of those appear in messages, 4201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org# something is wrong. 4301fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.orgTOKENS = [ lazy_re.compile( 4401fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.org '^%s' % pattern, # match at the beginning of input 4501fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.org re.I | re.S # html tokens are case-insensitive 4601fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.org ) 4701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org for pattern in 4801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org ( 4901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org # a run of non html special characters 5001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org r'[^<&]+', 5101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org # a tag 5201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org (r'</?[a-z]\w*' # beginning of tag 5301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org r'(?:\s+\w+(?:\s*=\s*' # attribute start 5401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org r'(?:[^\s"\'>]+|"[^\"]*"|\'[^\']*\'))?' # attribute value 5501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org r')*\s*/?>'), 5601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org # an entity 5701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org r'&(?:[a-z]\w+|#\d+|#x[\da-f]+);', 5801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org # an html special character not part of a special sequence 5901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org r'.' 6001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org ) ] 6101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 6201fadb72b6e94e6511eaffd1874a8cc095f098a7joi@chromium.orgALPHABETIC_RUN = lazy_re.compile(r'([^\W0-9_]+)') 6301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 6401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgRLO = u'\u202e' 6501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgPDF = u'\u202c' 6601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 6701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdef PseudoRTLString(text): 6801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org '''Returns a fake bidirectional version of the source string. This code is 6901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org based on accentString above, in turn copied from Frank Tang. 7001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org ''' 7101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org parts = [] 7201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org while text: 7301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org m = None 7401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org for token in TOKENS: 7501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org m = token.search(text) 7601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org if m: 7701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org part = m.group(0) 7801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org text = text[len(part):] 7901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org if part[0] not in ('<', '&'): 8001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org # not a tag or entity, so accent 8101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org part = ALPHABETIC_RUN.sub(lambda run: RLO + run.group() + PDF, part) 8201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org parts.append(part) 8301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org break 8401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org return ''.join(parts) 8501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 8601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 8701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.orgdef PseudoRTLMessage(message): 8801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org '''Returns a pseudo-RTL (aka Fake-Bidi) translation of the provided message. 8901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 9001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org Args: 9101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org message: tclib.Message() 9201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 9301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org Return: 9401b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org tclib.Translation() 9501b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org ''' 9601b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org transl = tclib.Translation() 9701b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org for part in message.GetContent(): 9801b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org if isinstance(part, tclib.Placeholder): 9901b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org transl.AppendPlaceholder(part) 10001b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org else: 10101b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org transl.AppendText(PseudoRTLString(part)) 10201b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org 10301b3bc768461bd303bff39f8cd1663682254e407joi@chromium.org return transl 104