1393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski/* 2393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Copyright (C) 2015 The Android Open Source Project 3393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * 4393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Licensed under the Apache License, Version 2.0 (the "License"); 5393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * you may not use this file except in compliance with the License. 6393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * You may obtain a copy of the License at 7393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * 8393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * http://www.apache.org/licenses/LICENSE-2.0 9393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * 10393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Unless required by applicable law or agreed to in writing, software 11393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * distributed under the License is distributed on an "AS IS" BASIS, 12393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * See the License for the specific language governing permissions and 14393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * limitations under the License. 15393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski */ 16393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 17393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski#include "compile/Pseudolocalizer.h" 18393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski#include "util/Util.h" 19393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 20393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskinamespace aapt { 21393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 22393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski// String basis to generate expansion 23393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const std::u16string k_expansion_string = u"one two three " 24393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski "four five six seven eight nine ten eleven twelve thirteen " 25393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski "fourteen fiveteen sixteen seventeen nineteen twenty"; 26393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 27393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski// Special unicode characters to override directionality of the words 28393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const std::u16string k_rlm = u"\u200f"; 29393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const std::u16string k_rlo = u"\u202e"; 30393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const std::u16string k_pdf = u"\u202c"; 31393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 32393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski// Placeholder marks 33393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const std::u16string k_placeholder_open = u"\u00bb"; 34393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const std::u16string k_placeholder_close = u"\u00ab"; 35393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 36393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const char16_t k_arg_start = u'{'; 37393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const char16_t k_arg_end = u'}'; 38393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 39393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskiclass PseudoMethodNone : public PseudoMethodImpl { 40393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskipublic: 41393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string text(const StringPiece16& text) override { return text.toString(); } 42393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string placeholder(const StringPiece16& text) override { return text.toString(); } 43393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}; 44393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 45393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskiclass PseudoMethodBidi : public PseudoMethodImpl { 46393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskipublic: 47393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string text(const StringPiece16& text) override; 48393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string placeholder(const StringPiece16& text) override; 49393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}; 50393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 51393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskiclass PseudoMethodAccent : public PseudoMethodImpl { 52393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskipublic: 53393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski PseudoMethodAccent() : mDepth(0), mWordCount(0), mLength(0) {} 54393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string start() override; 55393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string end() override; 56393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string text(const StringPiece16& text) override; 57393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string placeholder(const StringPiece16& text) override; 58393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskiprivate: 59393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t mDepth; 60393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t mWordCount; 61393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t mLength; 62393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski}; 63393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 64393b5f0d6130d3848dd82075986a5cf40c09ce44Adam LesinskiPseudolocalizer::Pseudolocalizer(Method method) : mLastDepth(0) { 65393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski setMethod(method); 66393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 67393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 68393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskivoid Pseudolocalizer::setMethod(Method method) { 69393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski switch (method) { 70393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case Method::kNone: 71393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mImpl = util::make_unique<PseudoMethodNone>(); 72393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski break; 73393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case Method::kAccent: 74393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mImpl = util::make_unique<PseudoMethodAccent>(); 75393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski break; 76393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case Method::kBidi: 77393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mImpl = util::make_unique<PseudoMethodBidi>(); 78393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski break; 79393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 80393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 81393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 82393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string Pseudolocalizer::text(const StringPiece16& text) { 83393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string out; 84393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t depth = mLastDepth; 85393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t lastpos, pos; 86393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const size_t length = text.size(); 87393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const char16_t* str = text.data(); 88393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool escaped = false; 89393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski for (lastpos = pos = 0; pos < length; pos++) { 90393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski char16_t c = str[pos]; 91393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (escaped) { 92393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski escaped = false; 93393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski continue; 94393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 95393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (c == '\'') { 96393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski escaped = true; 97393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski continue; 98393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 99393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 100393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (c == k_arg_start) { 101393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski depth++; 102393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else if (c == k_arg_end && depth) { 103393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski depth--; 104393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 105393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 106393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (mLastDepth != depth || pos == length - 1) { 107393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool pseudo = ((mLastDepth % 2) == 0); 108393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t nextpos = pos; 109393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (!pseudo || depth == mLastDepth) { 110393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski nextpos++; 111393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 112393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t size = nextpos - lastpos; 113393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (size) { 114393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string chunk = text.substr(lastpos, size).toString(); 115393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (pseudo) { 116393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski chunk = mImpl->text(chunk); 117393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else if (str[lastpos] == k_arg_start && str[nextpos - 1] == k_arg_end) { 118393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski chunk = mImpl->placeholder(chunk); 119393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 120393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski out.append(chunk); 121393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 122393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (pseudo && depth < mLastDepth) { // End of message 123393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski out.append(mImpl->end()); 124393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else if (!pseudo && depth > mLastDepth) { // Start of message 125393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski out.append(mImpl->start()); 126393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 127393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski lastpos = nextpos; 128393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mLastDepth = depth; 129393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 130393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 131393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return out; 132393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 133393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 134393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic const char16_t* pseudolocalizeChar(const char16_t c) { 135393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski switch (c) { 136393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'a': return u"\u00e5"; 137393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'b': return u"\u0253"; 138393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'c': return u"\u00e7"; 139393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'd': return u"\u00f0"; 140393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'e': return u"\u00e9"; 141393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'f': return u"\u0192"; 142393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'g': return u"\u011d"; 143393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'h': return u"\u0125"; 144393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'i': return u"\u00ee"; 145393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'j': return u"\u0135"; 146393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'k': return u"\u0137"; 147393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'l': return u"\u013c"; 148393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'm': return u"\u1e3f"; 149393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'n': return u"\u00f1"; 150393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'o': return u"\u00f6"; 151393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'p': return u"\u00fe"; 152393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'q': return u"\u0051"; 153393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'r': return u"\u0155"; 154393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 's': return u"\u0161"; 155393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 't': return u"\u0163"; 156393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'u': return u"\u00fb"; 157393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'v': return u"\u0056"; 158393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'w': return u"\u0175"; 159393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'x': return u"\u0445"; 160393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'y': return u"\u00fd"; 161393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'z': return u"\u017e"; 162393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'A': return u"\u00c5"; 163393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'B': return u"\u03b2"; 164393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'C': return u"\u00c7"; 165393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'D': return u"\u00d0"; 166393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'E': return u"\u00c9"; 167393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'G': return u"\u011c"; 168393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'H': return u"\u0124"; 169393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'I': return u"\u00ce"; 170393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'J': return u"\u0134"; 171393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'K': return u"\u0136"; 172393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'L': return u"\u013b"; 173393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'M': return u"\u1e3e"; 174393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'N': return u"\u00d1"; 175393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'O': return u"\u00d6"; 176393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'P': return u"\u00de"; 177393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'Q': return u"\u0071"; 178393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'R': return u"\u0154"; 179393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'S': return u"\u0160"; 180393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'T': return u"\u0162"; 181393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'U': return u"\u00db"; 182393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'V': return u"\u03bd"; 183393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'W': return u"\u0174"; 184393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'X': return u"\u00d7"; 185393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'Y': return u"\u00dd"; 186393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'Z': return u"\u017d"; 187393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case '!': return u"\u00a1"; 188393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case '?': return u"\u00bf"; 189393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case '$': return u"\u20ac"; 190393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski default: return NULL; 191393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 192393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 193393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 194393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic bool isPossibleNormalPlaceholderEnd(const char16_t c) { 195393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski switch (c) { 196393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 's': return true; 197393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'S': return true; 198393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'c': return true; 199393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'C': return true; 200393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'd': return true; 201393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'o': return true; 202393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'x': return true; 203393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'X': return true; 204393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'f': return true; 205393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'e': return true; 206393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'E': return true; 207393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'g': return true; 208393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'G': return true; 209393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'a': return true; 210393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'A': return true; 211393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'b': return true; 212393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'B': return true; 213393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'h': return true; 214393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'H': return true; 215393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case '%': return true; 216393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski case 'n': return true; 217393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski default: return false; 218393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 219393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 220393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 221393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistatic std::u16string pseudoGenerateExpansion(const unsigned int length) { 222393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string result = k_expansion_string; 223393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const char16_t* s = result.data(); 224393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (result.size() < length) { 225393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += u" "; 226393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += pseudoGenerateExpansion(length - result.size()); 227393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else { 228393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski int ext = 0; 229393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Should contain only whole words, so looking for a space 230393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski for (unsigned int i = length + 1; i < result.size(); ++i) { 231393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski ++ext; 232393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (s[i] == ' ') { 233393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski break; 234393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 235393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 236393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result = result.substr(0, length + ext); 237393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 238393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return result; 239393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 240393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 241393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string PseudoMethodAccent::start() { 242393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string result; 243393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (mDepth == 0) { 244393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result = u"["; 245393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 246393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mWordCount = mLength = 0; 247393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mDepth++; 248393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return result; 249393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 250393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 251393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string PseudoMethodAccent::end() { 252393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string result; 253393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (mLength) { 254393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += u" "; 255393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += pseudoGenerateExpansion(mWordCount > 3 ? mLength : mLength / 2); 256393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 257393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mWordCount = mLength = 0; 258393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mDepth--; 259393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (mDepth == 0) { 260393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += u"]"; 261393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 262393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return result; 263393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 264393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 265393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski/** 266393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Converts characters so they look like they've been localized. 267393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * 268393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski * Note: This leaves placeholder syntax untouched. 269393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski */ 270393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string PseudoMethodAccent::text(const StringPiece16& source) 271393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski{ 272393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const char16_t* s = source.data(); 273393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string result; 274393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const size_t I = source.size(); 275393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool lastspace = true; 276393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski for (size_t i = 0; i < I; i++) { 277393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski char16_t c = s[i]; 278393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (c == '%') { 279393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Placeholder syntax, no need to pseudolocalize 280393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string chunk; 281393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool end = false; 282393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski chunk.append(&c, 1); 283393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski while (!end && i < I) { 284393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski ++i; 285393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski c = s[i]; 286393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski chunk.append(&c, 1); 287393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (isPossibleNormalPlaceholderEnd(c)) { 288393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski end = true; 289393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else if (c == 't') { 290393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski ++i; 291393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski c = s[i]; 292393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski chunk.append(&c, 1); 293393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski end = true; 294393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 295393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 296393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Treat chunk as a placeholder unless it ends with %. 297393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += ((c == '%') ? chunk : placeholder(chunk)); 298393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else if (c == '<' || c == '&') { 299393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // html syntax, no need to pseudolocalize 300393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool tag_closed = false; 301393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski while (!tag_closed && i < I) { 302393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (c == '&') { 303393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string escapeText; 304393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski escapeText.append(&c, 1); 305393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool end = false; 306393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski size_t htmlCodePos = i; 307393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski while (!end && htmlCodePos < I) { 308393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski ++htmlCodePos; 309393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski c = s[htmlCodePos]; 310393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski escapeText.append(&c, 1); 311393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Valid html code 312393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (c == ';') { 313393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski end = true; 314393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski i = htmlCodePos; 315393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 316393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Wrong html code 317393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski else if (!((c == '#' || 318393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski (c >= 'a' && c <= 'z') || 319393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski (c >= 'A' && c <= 'Z') || 320393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski (c >= '0' && c <= '9')))) { 321393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski end = true; 322393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 323393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 324393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += escapeText; 325393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (escapeText != u"<") { 326393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski tag_closed = true; 327393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 328393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski continue; 329393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 330393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (c == '>') { 331393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski tag_closed = true; 332393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result.append(&c, 1); 333393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski continue; 334393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 335393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result.append(&c, 1); 336393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski i++; 337393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski c = s[i]; 338393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 339393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else { 340393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // This is a pure text that should be pseudolocalized 341393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const char16_t* p = pseudolocalizeChar(c); 342393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (p != nullptr) { 343393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += p; 344393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else { 345393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool space = util::isspace16(c); 346393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (lastspace && !space) { 347393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mWordCount++; 348393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 349393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski lastspace = space; 350393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result.append(&c, 1); 351393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 352393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Count only pseudolocalizable chars and delimiters 353393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski mLength++; 354393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 355393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 356393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return result; 357393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 358393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 359393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string PseudoMethodAccent::placeholder(const StringPiece16& source) { 360393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Surround a placeholder with brackets 361393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return k_placeholder_open + source.toString() + k_placeholder_close; 362393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 363393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 364393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string PseudoMethodBidi::text(const StringPiece16& source) { 365393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski const char16_t* s = source.data(); 366393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski std::u16string result; 367393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool lastspace = true; 368393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski bool space = true; 369393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski for (size_t i = 0; i < source.size(); i++) { 370393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski char16_t c = s[i]; 371393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski space = util::isspace16(c); 372393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (lastspace && !space) { 373393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Word start 374393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += k_rlm + k_rlo; 375393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } else if (!lastspace && space) { 376393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Word end 377393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += k_pdf + k_rlm; 378393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 379393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski lastspace = space; 380393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result.append(&c, 1); 381393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 382393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski if (!lastspace) { 383393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // End of last word 384393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski result += k_pdf + k_rlm; 385393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski } 386393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return result; 387393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 388393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 389393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinskistd::u16string PseudoMethodBidi::placeholder(const StringPiece16& source) { 390393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski // Surround a placeholder with directionality change sequence 391393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski return k_rlm + k_rlo + source.toString() + k_pdf + k_rlm; 392393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} 393393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski 394393b5f0d6130d3848dd82075986a5cf40c09ce44Adam Lesinski} // namespace aapt 395