pseudolocalize.cpp revision 0a1eed394fa287523b105aa569ed6348f4986483
1#include "pseudolocalize.h"
2
3using namespace std;
4
5// String basis to generate expansion
6static const String16 k_expansion_string = String16("one two three "
7    "four five six seven eight nine ten eleven twelve thirteen "
8    "fourteen fiveteen sixteen seventeen nineteen twenty");
9
10// Special unicode characters to override directionality of the words
11static const String16 k_rlm = String16("\xe2\x80\x8f");
12static const String16 k_rlo = String16("\xE2\x80\xae");
13static const String16 k_pdf = String16("\xE2\x80\xac");
14
15// Placeholder marks
16static const String16 k_placeholder_open = String16("\xc2\xbb");
17static const String16 k_placeholder_close = String16("\xc2\xab");
18
19static const char*
20pseudolocalize_char(const char16_t c)
21{
22    switch (c) {
23        case 'a':   return "\xc3\xa5";
24        case 'b':   return "\xc9\x93";
25        case 'c':   return "\xc3\xa7";
26        case 'd':   return "\xc3\xb0";
27        case 'e':   return "\xc3\xa9";
28        case 'f':   return "\xc6\x92";
29        case 'g':   return "\xc4\x9d";
30        case 'h':   return "\xc4\xa5";
31        case 'i':   return "\xc3\xae";
32        case 'j':   return "\xc4\xb5";
33        case 'k':   return "\xc4\xb7";
34        case 'l':   return "\xc4\xbc";
35        case 'm':   return "\xe1\xb8\xbf";
36        case 'n':   return "\xc3\xb1";
37        case 'o':   return "\xc3\xb6";
38        case 'p':   return "\xc3\xbe";
39        case 'q':   return "\x51";
40        case 'r':   return "\xc5\x95";
41        case 's':   return "\xc5\xa1";
42        case 't':   return "\xc5\xa3";
43        case 'u':   return "\xc3\xbb";
44        case 'v':   return "\x56";
45        case 'w':   return "\xc5\xb5";
46        case 'x':   return "\xd1\x85";
47        case 'y':   return "\xc3\xbd";
48        case 'z':   return "\xc5\xbe";
49        case 'A':   return "\xc3\x85";
50        case 'B':   return "\xce\xb2";
51        case 'C':   return "\xc3\x87";
52        case 'D':   return "\xc3\x90";
53        case 'E':   return "\xc3\x89";
54        case 'G':   return "\xc4\x9c";
55        case 'H':   return "\xc4\xa4";
56        case 'I':   return "\xc3\x8e";
57        case 'J':   return "\xc4\xb4";
58        case 'K':   return "\xc4\xb6";
59        case 'L':   return "\xc4\xbb";
60        case 'M':   return "\xe1\xb8\xbe";
61        case 'N':   return "\xc3\x91";
62        case 'O':   return "\xc3\x96";
63        case 'P':   return "\xc3\x9e";
64        case 'Q':   return "\x71";
65        case 'R':   return "\xc5\x94";
66        case 'S':   return "\xc5\xa0";
67        case 'T':   return "\xc5\xa2";
68        case 'U':   return "\xc3\x9b";
69        case 'V':   return "\xce\xbd";
70        case 'W':   return "\xc5\xb4";
71        case 'X':   return "\xc3\x97";
72        case 'Y':   return "\xc3\x9d";
73        case 'Z':   return "\xc5\xbd";
74        case '!':   return "\xc2\xa1";
75        case '?':   return "\xc2\xbf";
76        case '$':   return "\xe2\x82\xac";
77        default:    return NULL;
78    }
79}
80
81static bool
82is_possible_normal_placeholder_end(const char16_t c) {
83    switch (c) {
84        case 's': return true;
85        case 'S': return true;
86        case 'c': return true;
87        case 'C': return true;
88        case 'd': return true;
89        case 'o': return true;
90        case 'x': return true;
91        case 'X': return true;
92        case 'f': return true;
93        case 'e': return true;
94        case 'E': return true;
95        case 'g': return true;
96        case 'G': return true;
97        case 'a': return true;
98        case 'A': return true;
99        case 'b': return true;
100        case 'B': return true;
101        case 'h': return true;
102        case 'H': return true;
103        case '%': return true;
104        case 'n': return true;
105        default:  return false;
106    }
107}
108
109String16
110pseudo_generate_expansion(const unsigned int length) {
111    String16 result = k_expansion_string;
112    const char16_t* s = result.string();
113    if (result.size() < length) {
114        result += String16(" ");
115        result += pseudo_generate_expansion(length - result.size());
116    } else {
117        int ext = 0;
118        // Should contain only whole words, so looking for a space
119        for (unsigned int i = length + 1; i < result.size(); ++i) {
120          ++ext;
121          if (s[i] == ' ') {
122            break;
123          }
124        }
125        result.remove(length + ext, 0);
126    }
127    return result;
128}
129
130/**
131 * Converts characters so they look like they've been localized.
132 *
133 * Note: This leaves escape sequences untouched so they can later be
134 * processed by ResTable::collectString in the normal way.
135 */
136String16
137pseudolocalize_string(const String16& source)
138{
139    const char16_t* s = source.string();
140    String16 result;
141    const size_t I = source.size();
142    for (size_t i=0; i<I; i++) {
143        char16_t c = s[i];
144        if (c == '\\') {
145            // Escape syntax, no need to pseudolocalize
146            if (i<I-1) {
147                result += String16("\\");
148                i++;
149                c = s[i];
150                switch (c) {
151                    case 'u':
152                        // this one takes up 5 chars
153                        result += String16(s+i, 5);
154                        i += 4;
155                        break;
156                    case 't':
157                    case 'n':
158                    case '#':
159                    case '@':
160                    case '?':
161                    case '"':
162                    case '\'':
163                    case '\\':
164                    default:
165                        result.append(&c, 1);
166                        break;
167                }
168            } else {
169                result.append(&c, 1);
170            }
171        } else if (c == '%') {
172            // Placeholder syntax, no need to pseudolocalize
173            result += k_placeholder_open;
174            bool end = false;
175            result.append(&c, 1);
176            while (!end && i < I) {
177                ++i;
178                c = s[i];
179                result.append(&c, 1);
180                if (is_possible_normal_placeholder_end(c)) {
181                    end = true;
182                } else if (c == 't') {
183                    ++i;
184                    c = s[i];
185                    result.append(&c, 1);
186                    end = true;
187                }
188            }
189            result += k_placeholder_close;
190        } else if (c == '<' || c == '&') {
191            // html syntax, no need to pseudolocalize
192            bool tag_closed = false;
193            while (!tag_closed && i < I) {
194                if (c == '&') {
195                    String16 escape_text;
196                    escape_text.append(&c, 1);
197                    bool end = false;
198                    size_t htmlCodePos = i;
199                    while (!end && htmlCodePos < I) {
200                        ++htmlCodePos;
201                        c = s[htmlCodePos];
202                        escape_text.append(&c, 1);
203                        // Valid html code
204                        if (c == ';') {
205                            end = true;
206                            i = htmlCodePos;
207                        }
208                        // Wrong html code
209                        else if (!((c == '#' ||
210                                 (c >= 'a' && c <= 'z') ||
211                                 (c >= 'A' && c <= 'Z') ||
212                                 (c >= '0' && c <= '9')))) {
213                            end = true;
214                        }
215                    }
216                    result += escape_text;
217                    if (escape_text != String16("&lt;")) {
218                        tag_closed = true;
219                    }
220                    continue;
221                }
222                if (c == '>') {
223                    tag_closed = true;
224                    result.append(&c, 1);
225                    continue;
226                }
227                result.append(&c, 1);
228                i++;
229                c = s[i];
230            }
231        } else {
232            // This is a pure text that should be pseudolocalized
233            const char* p = pseudolocalize_char(c);
234            if (p != NULL) {
235                result += String16(p);
236            } else {
237                result.append(&c, 1);
238            }
239        }
240    }
241    return result;
242}
243
244String16
245pseudobidi_string(const String16& source)
246{
247    const char16_t* s = source.string();
248    String16 result;
249    result += k_rlm;
250    result += k_rlo;
251    for (size_t i=0; i<source.size(); i++) {
252        char16_t c = s[i];
253        switch(c) {
254            case ' ': result += k_pdf;
255                      result += k_rlm;
256                      result.append(&c, 1);
257                      result += k_rlm;
258                      result += k_rlo;
259                      break;
260            default: result.append(&c, 1);
261                     break;
262        }
263    }
264    result += k_pdf;
265    result += k_rlm;
266    return result;
267}
268
269