rtl.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/logging.h"
9#include "base/string_util.h"
10#include "base/utf_string_conversions.h"
11#include "base/sys_string_conversions.h"
12#include "unicode/coll.h"
13#include "unicode/locid.h"
14#include "unicode/uchar.h"
15#include "unicode/uscript.h"
16
17#if defined(TOOLKIT_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace {
22
23// Extract language, country and variant, but ignore keywords.  For example,
24// en-US, ca@valencia, ca-ES@valencia.
25std::string GetLocaleString(const icu::Locale& locale) {
26  const char* language = locale.getLanguage();
27  const char* country = locale.getCountry();
28  const char* variant = locale.getVariant();
29
30  std::string result =
31      (language != NULL && *language != '\0') ? language : "und";
32
33  if (country != NULL && *country != '\0') {
34    result += '-';
35    result += country;
36  }
37
38  if (variant != NULL && *variant != '\0') {
39    std::string variant_str(variant);
40    StringToLowerASCII(&variant_str);
41    result += '@' + variant_str;
42  }
43
44  return result;
45}
46
47}  // namespace
48
49namespace base {
50namespace i18n {
51
52// Represents the locale-specific ICU text direction.
53static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
54
55// Convert the ICU default locale to a string.
56std::string GetConfiguredLocale() {
57  return GetLocaleString(icu::Locale::getDefault());
58}
59
60// Convert the ICU canonicalized locale to a string.
61std::string GetCanonicalLocale(const char* locale) {
62  return GetLocaleString(icu::Locale::createCanonical(locale));
63}
64
65// Convert Chrome locale name to ICU locale name
66std::string ICULocaleName(const std::string& locale_string) {
67  // If not Spanish, just return it.
68  if (locale_string.substr(0, 2) != "es")
69    return locale_string;
70  // Expand es to es-ES.
71  if (LowerCaseEqualsASCII(locale_string, "es"))
72    return "es-ES";
73  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
74  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
75  // to es-MX (the most populous in Spanish-speaking Latin America).
76  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
77    const icu::Locale& locale = icu::Locale::getDefault();
78    std::string language = locale.getLanguage();
79    const char* country = locale.getCountry();
80    if (LowerCaseEqualsASCII(language, "es") &&
81      !LowerCaseEqualsASCII(country, "es")) {
82        language += '-';
83        language += country;
84        return language;
85    }
86    return "es-MX";
87  }
88  // Currently, Chrome has only "es" and "es-419", but later we may have
89  // more specific "es-RR".
90  return locale_string;
91}
92
93void SetICUDefaultLocale(const std::string& locale_string) {
94  icu::Locale locale(ICULocaleName(locale_string).c_str());
95  UErrorCode error_code = U_ZERO_ERROR;
96  icu::Locale::setDefault(locale, error_code);
97  // This return value is actually bogus because Locale object is
98  // an ID and setDefault seems to always succeed (regardless of the
99  // presence of actual locale data). However,
100  // it does not hurt to have it as a sanity check.
101  DCHECK(U_SUCCESS(error_code));
102  g_icu_text_direction = UNKNOWN_DIRECTION;
103}
104
105bool IsRTL() {
106#if defined(TOOLKIT_GTK)
107  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
108  return gtk_dir == GTK_TEXT_DIR_RTL;
109#else
110  return ICUIsRTL();
111#endif
112}
113
114bool ICUIsRTL() {
115  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
116    const icu::Locale& locale = icu::Locale::getDefault();
117    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
118  }
119  return g_icu_text_direction == RIGHT_TO_LEFT;
120}
121
122TextDirection GetTextDirectionForLocale(const char* locale_name) {
123  UErrorCode status = U_ZERO_ERROR;
124  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
125  DCHECK(U_SUCCESS(status));
126  // Treat anything other than RTL as LTR.
127  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
128}
129
130TextDirection GetFirstStrongCharacterDirection(const string16& text) {
131  const UChar* string = text.c_str();
132  size_t length = text.length();
133  size_t position = 0;
134  while (position < length) {
135    UChar32 character;
136    size_t next_position = position;
137    U16_NEXT(string, next_position, length, character);
138
139    // Now that we have the character, we use ICU in order to query for the
140    // appropriate Unicode BiDi character type.
141    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
142    if ((property == U_RIGHT_TO_LEFT) ||
143        (property == U_RIGHT_TO_LEFT_ARABIC) ||
144        (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
145        (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
146      return RIGHT_TO_LEFT;
147    } else if ((property == U_LEFT_TO_RIGHT) ||
148               (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
149               (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
150      return LEFT_TO_RIGHT;
151    }
152
153    position = next_position;
154  }
155
156  return LEFT_TO_RIGHT;
157}
158
159#if defined(OS_WIN)
160bool AdjustStringForLocaleDirection(string16* text) {
161  if (!IsRTL() || text->empty())
162    return false;
163
164  // Marking the string as LTR if the locale is RTL and the string does not
165  // contain strong RTL characters. Otherwise, mark the string as RTL.
166  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
167  if (!has_rtl_chars)
168    WrapStringWithLTRFormatting(text);
169  else
170    WrapStringWithRTLFormatting(text);
171
172  return true;
173}
174
175bool UnadjustStringForLocaleDirection(string16* text) {
176  if (!IsRTL() || text->empty())
177    return false;
178
179  *text = StripWrappingBidiControlCharacters(*text);
180  return true;
181}
182#else
183bool AdjustStringForLocaleDirection(string16* text) {
184  // On OS X & GTK the directionality of a label is determined by the first
185  // strongly directional character.
186  // However, we want to make sure that in an LTR-language-UI all strings are
187  // left aligned and vice versa.
188  // A problem can arise if we display a string which starts with user input.
189  // User input may be of the opposite directionality to the UI. So the whole
190  // string will be displayed in the opposite directionality, e.g. if we want to
191  // display in an LTR UI [such as US English]:
192  //
193  // EMAN_NOISNETXE is now installed.
194  //
195  // Since EXTENSION_NAME begins with a strong RTL char, the label's
196  // directionality will be set to RTL and the string will be displayed visually
197  // as:
198  //
199  // .is now installed EMAN_NOISNETXE
200  //
201  // In order to solve this issue, we prepend an LRM to the string. An LRM is a
202  // strongly directional LTR char.
203  // We also append an LRM at the end, which ensures that we're in an LTR
204  // context.
205
206  // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
207  // box so there is no issue with displaying zero-width bidi control characters
208  // on any system.  Thus no need for the !IsRTL() check here.
209  if (text->empty())
210    return false;
211
212  bool ui_direction_is_rtl = IsRTL();
213
214  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
215  if (!ui_direction_is_rtl && has_rtl_chars) {
216    WrapStringWithRTLFormatting(text);
217    text->insert(0U, 1U, kLeftToRightMark);
218    text->push_back(kLeftToRightMark);
219  } else if (ui_direction_is_rtl && has_rtl_chars) {
220    WrapStringWithRTLFormatting(text);
221    text->insert(0U, 1U, kRightToLeftMark);
222    text->push_back(kRightToLeftMark);
223  } else if (ui_direction_is_rtl) {
224    WrapStringWithLTRFormatting(text);
225    text->insert(0U, 1U, kRightToLeftMark);
226    text->push_back(kRightToLeftMark);
227  } else {
228    return false;
229  }
230
231  return true;
232}
233
234bool UnadjustStringForLocaleDirection(string16* text) {
235  if (text->empty())
236    return false;
237
238  size_t begin_index = 0;
239  char16 begin = text->at(begin_index);
240  if (begin == kLeftToRightMark ||
241      begin == kRightToLeftMark) {
242    ++begin_index;
243  }
244
245  size_t end_index = text->length() - 1;
246  char16 end = text->at(end_index);
247  if (end == kLeftToRightMark ||
248      end == kRightToLeftMark) {
249    --end_index;
250  }
251
252  string16 unmarked_text =
253      text->substr(begin_index, end_index - begin_index + 1);
254  *text = StripWrappingBidiControlCharacters(unmarked_text);
255  return true;
256}
257
258#endif  // !OS_WIN
259
260bool StringContainsStrongRTLChars(const string16& text) {
261  const UChar* string = text.c_str();
262  size_t length = text.length();
263  size_t position = 0;
264  while (position < length) {
265    UChar32 character;
266    size_t next_position = position;
267    U16_NEXT(string, next_position, length, character);
268
269    // Now that we have the character, we use ICU in order to query for the
270    // appropriate Unicode BiDi character type.
271    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
272    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
273      return true;
274
275    position = next_position;
276  }
277
278  return false;
279}
280
281void WrapStringWithLTRFormatting(string16* text) {
282  if (text->empty())
283    return;
284
285  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
286  text->insert(0U, 1U, kLeftToRightEmbeddingMark);
287
288  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
289  text->push_back(kPopDirectionalFormatting);
290}
291
292void WrapStringWithRTLFormatting(string16* text) {
293  if (text->empty())
294    return;
295
296  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
297  text->insert(0U, 1U, kRightToLeftEmbeddingMark);
298
299  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
300  text->push_back(kPopDirectionalFormatting);
301}
302
303void WrapPathWithLTRFormatting(const FilePath& path,
304                               string16* rtl_safe_path) {
305  // Wrap the overall path with LRE-PDF pair which essentialy marks the
306  // string as a Left-To-Right string.
307  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
308  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
309#if defined(OS_MACOSX)
310    rtl_safe_path->append(UTF8ToUTF16(path.value()));
311#elif defined(OS_WIN)
312    rtl_safe_path->append(path.value());
313#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
314    std::wstring wide_path = base::SysNativeMBToWide(path.value());
315    rtl_safe_path->append(WideToUTF16(wide_path));
316#endif
317  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
318  rtl_safe_path->push_back(kPopDirectionalFormatting);
319}
320
321string16 GetDisplayStringInLTRDirectionality(const string16& text) {
322  // Always wrap the string in RTL UI (it may be appended to RTL string).
323  // Also wrap strings with an RTL first strong character direction in LTR UI.
324  if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
325    string16 text_mutable(text);
326    WrapStringWithLTRFormatting(&text_mutable);
327    return text_mutable;
328  }
329  return text;
330}
331
332string16 StripWrappingBidiControlCharacters(const string16& text) {
333  if (text.empty())
334    return text;
335  size_t begin_index = 0;
336  char16 begin = text[begin_index];
337  if (begin == kLeftToRightEmbeddingMark ||
338      begin == kRightToLeftEmbeddingMark ||
339      begin == kLeftToRightOverride ||
340      begin == kRightToLeftOverride)
341    ++begin_index;
342  size_t end_index = text.length() - 1;
343  if (text[end_index] == kPopDirectionalFormatting)
344    --end_index;
345  return text.substr(begin_index, end_index - begin_index + 1);
346}
347
348}  // namespace i18n
349}  // namespace base
350