rtl.cc revision 21d179b334e59e9a3bfcaed4c4430bef1bc5759d
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/logging.h"
9#include "base/string_util.h"
10#include "base/utf_string_conversions.h"
11#include "base/sys_string_conversions.h"
12#include "unicode/coll.h"
13#include "unicode/locid.h"
14#include "unicode/uchar.h"
15#include "unicode/uscript.h"
16
17#if defined(TOOLKIT_USES_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace {
22
23// Extract language and country, ignore keywords, concatenate using dash.
24std::string GetLocaleString(const icu::Locale& locale) {
25  const char* language = locale.getLanguage();
26  const char* country = locale.getCountry();
27
28  std::string result =
29      (language != NULL && *language != '\0') ? language : "und";
30
31  if (country != NULL && *country != '\0') {
32    result += '-';
33    result += country;
34  }
35
36  return result;
37}
38
39}  // namespace
40
41namespace base {
42namespace i18n {
43
44// Represents the locale-specific ICU text direction.
45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
46
47// Convert the ICU default locale to a string.
48std::string GetConfiguredLocale() {
49  return GetLocaleString(icu::Locale::getDefault());
50}
51
52// Convert the ICU canonicalized locale to a string.
53std::string GetCanonicalLocale(const char* locale) {
54  return GetLocaleString(icu::Locale::createCanonical(locale));
55}
56
57// Convert Chrome locale name to ICU locale name
58std::string ICULocaleName(const std::string& locale_string) {
59  // If not Spanish, just return it.
60  if (locale_string.substr(0, 2) != "es")
61    return locale_string;
62  // Expand es to es-ES.
63  if (LowerCaseEqualsASCII(locale_string, "es"))
64    return "es-ES";
65  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
66  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
67  // to es-MX (the most populous in Spanish-speaking Latin America).
68  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
69    const icu::Locale& locale = icu::Locale::getDefault();
70    std::string language = locale.getLanguage();
71    const char* country = locale.getCountry();
72    if (LowerCaseEqualsASCII(language, "es") &&
73      !LowerCaseEqualsASCII(country, "es")) {
74        language += '-';
75        language += country;
76        return language;
77    }
78    return "es-MX";
79  }
80  // Currently, Chrome has only "es" and "es-419", but later we may have
81  // more specific "es-RR".
82  return locale_string;
83}
84
85void SetICUDefaultLocale(const std::string& locale_string) {
86  icu::Locale locale(ICULocaleName(locale_string).c_str());
87  UErrorCode error_code = U_ZERO_ERROR;
88  icu::Locale::setDefault(locale, error_code);
89  // This return value is actually bogus because Locale object is
90  // an ID and setDefault seems to always succeed (regardless of the
91  // presence of actual locale data). However,
92  // it does not hurt to have it as a sanity check.
93  DCHECK(U_SUCCESS(error_code));
94  g_icu_text_direction = UNKNOWN_DIRECTION;
95
96  // If we use Views toolkit on top of GtkWidget, then we need to keep
97  // GtkWidget's default text direction consistent with ICU's text direction.
98  // Because in this case ICU's text direction will be used instead.
99  // See IsRTL() function below.
100#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
101  gtk_widget_set_default_direction(
102      ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
103#endif
104}
105
106bool IsRTL() {
107#if defined(TOOLKIT_GTK)
108  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
109  return (gtk_dir == GTK_TEXT_DIR_RTL);
110#else
111  return ICUIsRTL();
112#endif
113}
114
115bool ICUIsRTL() {
116  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
117    const icu::Locale& locale = icu::Locale::getDefault();
118    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
119  }
120  return g_icu_text_direction == RIGHT_TO_LEFT;
121}
122
123TextDirection GetTextDirectionForLocale(const char* locale_name) {
124  UErrorCode status = U_ZERO_ERROR;
125  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
126  DCHECK(U_SUCCESS(status));
127  // Treat anything other than RTL as LTR.
128  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
129}
130
131TextDirection GetFirstStrongCharacterDirection(const string16& text) {
132  const UChar* string = text.c_str();
133  size_t length = text.length();
134  size_t position = 0;
135  while (position < length) {
136    UChar32 character;
137    size_t next_position = position;
138    U16_NEXT(string, next_position, length, character);
139
140    // Now that we have the character, we use ICU in order to query for the
141    // appropriate Unicode BiDi character type.
142    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
143    if ((property == U_RIGHT_TO_LEFT) ||
144        (property == U_RIGHT_TO_LEFT_ARABIC) ||
145        (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
146        (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
147      return RIGHT_TO_LEFT;
148    } else if ((property == U_LEFT_TO_RIGHT) ||
149               (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
150               (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
151      return LEFT_TO_RIGHT;
152    }
153
154    position = next_position;
155  }
156
157  return LEFT_TO_RIGHT;
158}
159
160#if defined(WCHAR_T_IS_UTF32)
161TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
162  return GetFirstStrongCharacterDirection(WideToUTF16(text));
163}
164#endif
165
166#if defined(OS_WIN)
167bool AdjustStringForLocaleDirection(string16* text) {
168  if (!IsRTL() || text->empty())
169    return false;
170
171  // Marking the string as LTR if the locale is RTL and the string does not
172  // contain strong RTL characters. Otherwise, mark the string as RTL.
173  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
174  if (!has_rtl_chars)
175    WrapStringWithLTRFormatting(text);
176  else
177    WrapStringWithRTLFormatting(text);
178
179  return true;
180}
181#else
182bool AdjustStringForLocaleDirection(string16* text) {
183  // On OS X & GTK the directionality of a label is determined by the first
184  // strongly directional character.
185  // However, we want to make sure that in an LTR-language-UI all strings are
186  // left aligned and vice versa.
187  // A problem can arise if we display a string which starts with user input.
188  // User input may be of the opposite directionality to the UI. So the whole
189  // string will be displayed in the opposite directionality, e.g. if we want to
190  // display in an LTR UI [such as US English]:
191  //
192  // EMAN_NOISNETXE is now installed.
193  //
194  // Since EXTENSION_NAME begins with a strong RTL char, the label's
195  // directionality will be set to RTL and the string will be displayed visually
196  // as:
197  //
198  // .is now installed EMAN_NOISNETXE
199  //
200  // In order to solve this issue, we prepend an LRM to the string. An LRM is a
201  // strongly directional LTR char.
202  // We also append an LRM at the end, which ensures that we're in an LTR
203  // context.
204
205  // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
206  // box so there is no issue with displaying zero-width bidi control characters
207  // on any system.  Thus no need for the !IsRTL() check here.
208  if (text->empty())
209    return false;
210
211  bool ui_direction_is_rtl = IsRTL();
212
213  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
214  if (!ui_direction_is_rtl && has_rtl_chars) {
215    WrapStringWithRTLFormatting(text);
216    text->insert(0, 1, kLeftToRightMark);
217    text->push_back(kLeftToRightMark);
218  } else if (ui_direction_is_rtl && has_rtl_chars) {
219    WrapStringWithRTLFormatting(text);
220    text->insert(0, 1, kRightToLeftMark);
221    text->push_back(kRightToLeftMark);
222  } else if (ui_direction_is_rtl) {
223    WrapStringWithLTRFormatting(text);
224    text->insert(0, 1, kRightToLeftMark);
225    text->push_back(kRightToLeftMark);
226  }
227
228  return true;
229}
230
231#endif  // !OS_WIN
232
233#if defined(WCHAR_T_IS_UTF32)
234bool AdjustStringForLocaleDirection(std::wstring* text) {
235  string16 temp = WideToUTF16(*text);
236  if (AdjustStringForLocaleDirection(&temp)) {
237    // We should only touch the output on success.
238    *text = UTF16ToWide(temp);
239    return true;
240  }
241  return false;
242}
243#endif
244
245bool StringContainsStrongRTLChars(const string16& text) {
246  const UChar* string = text.c_str();
247  size_t length = text.length();
248  size_t position = 0;
249  while (position < length) {
250    UChar32 character;
251    size_t next_position = position;
252    U16_NEXT(string, next_position, length, character);
253
254    // Now that we have the character, we use ICU in order to query for the
255    // appropriate Unicode BiDi character type.
256    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
257    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
258      return true;
259
260    position = next_position;
261  }
262
263  return false;
264}
265
266#if defined(WCHAR_T_IS_UTF32)
267bool StringContainsStrongRTLChars(const std::wstring& text) {
268  return StringContainsStrongRTLChars(WideToUTF16(text));
269}
270#endif
271
272void WrapStringWithLTRFormatting(string16* text) {
273  if (text->empty())
274    return;
275
276  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
277  text->insert(0, 1, kLeftToRightEmbeddingMark);
278
279  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
280  text->push_back(kPopDirectionalFormatting);
281}
282
283#if defined(WCHAR_T_IS_UTF32)
284void WrapStringWithLTRFormatting(std::wstring* text) {
285  if (text->empty())
286    return;
287
288  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
289  text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
290
291  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
292  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
293}
294#endif
295
296void WrapStringWithRTLFormatting(string16* text) {
297  if (text->empty())
298    return;
299
300  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
301  text->insert(0, 1, kRightToLeftEmbeddingMark);
302
303  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
304  text->push_back(kPopDirectionalFormatting);
305}
306
307#if defined(WCHAR_T_IS_UTF32)
308void WrapStringWithRTLFormatting(std::wstring* text) {
309  if (text->empty())
310    return;
311
312  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
313  text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
314
315  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
316  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
317}
318#endif
319
320void WrapPathWithLTRFormatting(const FilePath& path,
321                               string16* rtl_safe_path) {
322  // Wrap the overall path with LRE-PDF pair which essentialy marks the
323  // string as a Left-To-Right string.
324  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
325  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
326#if defined(OS_MACOSX)
327    rtl_safe_path->append(UTF8ToUTF16(path.value()));
328#elif defined(OS_WIN)
329    rtl_safe_path->append(path.value());
330#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
331    std::wstring wide_path = base::SysNativeMBToWide(path.value());
332    rtl_safe_path->append(WideToUTF16(wide_path));
333#endif
334  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
335  rtl_safe_path->push_back(kPopDirectionalFormatting);
336}
337
338string16 GetDisplayStringInLTRDirectionality(const string16& text) {
339  if (!IsRTL())
340    return text;
341  string16 text_mutable(text);
342  WrapStringWithLTRFormatting(&text_mutable);
343  return text_mutable;
344}
345
346const string16 StripWrappingBidiControlCharacters(const string16& text) {
347  if (text.empty())
348    return text;
349  size_t begin_index = 0;
350  char16 begin = text[begin_index];
351  if (begin == kLeftToRightEmbeddingMark ||
352      begin == kRightToLeftEmbeddingMark ||
353      begin == kLeftToRightOverride ||
354      begin == kRightToLeftOverride)
355    ++begin_index;
356  size_t end_index = text.length() - 1;
357  if (text[end_index] == kPopDirectionalFormatting)
358    --end_index;
359  return text.substr(begin_index, end_index - begin_index + 1);
360}
361
362}  // namespace i18n
363}  // namespace base
364