rtl.cc revision 201ade2fbba22bfb27ae029f4d23fca6ded109a0
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/logging.h"
9#include "base/string_util.h"
10#include "base/utf_string_conversions.h"
11#include "base/sys_string_conversions.h"
12#include "unicode/coll.h"
13#include "unicode/locid.h"
14#include "unicode/uchar.h"
15#include "unicode/uscript.h"
16
17#if defined(TOOLKIT_USES_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace {
22
23// Extract language and country, ignore keywords, concatenate using dash.
24std::string GetLocaleString(const icu::Locale& locale) {
25  const char* language = locale.getLanguage();
26  const char* country = locale.getCountry();
27
28  std::string result =
29      (language != NULL && *language != '\0') ? language : "und";
30
31  if (country != NULL && *country != '\0') {
32    result += '-';
33    result += country;
34  }
35
36  return result;
37}
38
39}  // namespace
40
41namespace base {
42namespace i18n {
43
44// Represents the locale-specific ICU text direction.
45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
46
47// Convert the ICU default locale to a string.
48std::string GetConfiguredLocale() {
49  return GetLocaleString(icu::Locale::getDefault());
50}
51
52// Convert the ICU canonicalized locale to a string.
53std::string GetCanonicalLocale(const char* locale) {
54  return GetLocaleString(icu::Locale::createCanonical(locale));
55}
56
57// Convert Chrome locale name to ICU locale name
58std::string ICULocaleName(const std::string& locale_string) {
59  // If not Spanish, just return it.
60  if (locale_string.substr(0, 2) != "es")
61    return locale_string;
62  // Expand es to es-ES.
63  if (LowerCaseEqualsASCII(locale_string, "es"))
64    return "es-ES";
65  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
66  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
67  // to es-MX (the most populous in Spanish-speaking Latin America).
68  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
69    const icu::Locale& locale = icu::Locale::getDefault();
70    std::string language = locale.getLanguage();
71    const char* country = locale.getCountry();
72    if (LowerCaseEqualsASCII(language, "es") &&
73      !LowerCaseEqualsASCII(country, "es")) {
74        language += '-';
75        language += country;
76        return language;
77    }
78    return "es-MX";
79  }
80  // Currently, Chrome has only "es" and "es-419", but later we may have
81  // more specific "es-RR".
82  return locale_string;
83}
84
85void SetICUDefaultLocale(const std::string& locale_string) {
86  icu::Locale locale(ICULocaleName(locale_string).c_str());
87  UErrorCode error_code = U_ZERO_ERROR;
88  icu::Locale::setDefault(locale, error_code);
89  // This return value is actually bogus because Locale object is
90  // an ID and setDefault seems to always succeed (regardless of the
91  // presence of actual locale data). However,
92  // it does not hurt to have it as a sanity check.
93  DCHECK(U_SUCCESS(error_code));
94  g_icu_text_direction = UNKNOWN_DIRECTION;
95
96  // If we use Views toolkit on top of GtkWidget, then we need to keep
97  // GtkWidget's default text direction consistent with ICU's text direction.
98  // Because in this case ICU's text direction will be used instead.
99  // See IsRTL() function below.
100#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
101  gtk_widget_set_default_direction(
102      ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
103#endif
104}
105
106bool IsRTL() {
107#if defined(TOOLKIT_GTK)
108  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
109  return (gtk_dir == GTK_TEXT_DIR_RTL);
110#else
111  return ICUIsRTL();
112#endif
113}
114
115bool ICUIsRTL() {
116  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
117    const icu::Locale& locale = icu::Locale::getDefault();
118    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
119  }
120  return g_icu_text_direction == RIGHT_TO_LEFT;
121}
122
123TextDirection GetTextDirectionForLocale(const char* locale_name) {
124  UErrorCode status = U_ZERO_ERROR;
125  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
126  DCHECK(U_SUCCESS(status));
127  // Treat anything other than RTL as LTR.
128  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
129}
130
131TextDirection GetFirstStrongCharacterDirection(const string16& text) {
132  const UChar* string = text.c_str();
133  size_t length = text.length();
134  size_t position = 0;
135  while (position < length) {
136    UChar32 character;
137    size_t next_position = position;
138    U16_NEXT(string, next_position, length, character);
139
140    // Now that we have the character, we use ICU in order to query for the
141    // appropriate Unicode BiDi character type.
142    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
143    if ((property == U_RIGHT_TO_LEFT) ||
144        (property == U_RIGHT_TO_LEFT_ARABIC) ||
145        (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
146        (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
147      return RIGHT_TO_LEFT;
148    } else if ((property == U_LEFT_TO_RIGHT) ||
149               (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
150               (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
151      return LEFT_TO_RIGHT;
152    }
153
154    position = next_position;
155  }
156
157  return LEFT_TO_RIGHT;
158}
159
160#if defined(WCHAR_T_IS_UTF32)
161TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
162  return GetFirstStrongCharacterDirection(WideToUTF16(text));
163}
164#endif
165
166bool AdjustStringForLocaleDirection(string16* text) {
167  if (!IsRTL() || text->empty())
168    return false;
169
170  // Marking the string as LTR if the locale is RTL and the string does not
171  // contain strong RTL characters. Otherwise, mark the string as RTL.
172  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
173  if (!has_rtl_chars)
174    WrapStringWithLTRFormatting(text);
175  else
176    WrapStringWithRTLFormatting(text);
177
178  return true;
179}
180
181#if defined(WCHAR_T_IS_UTF32)
182bool AdjustStringForLocaleDirection(std::wstring* text) {
183  string16 temp = WideToUTF16(*text);
184  if (AdjustStringForLocaleDirection(&temp)) {
185    // We should only touch the output on success.
186    *text = UTF16ToWide(temp);
187    return true;
188  }
189  return false;
190}
191#endif
192
193bool StringContainsStrongRTLChars(const string16& text) {
194  const UChar* string = text.c_str();
195  size_t length = text.length();
196  size_t position = 0;
197  while (position < length) {
198    UChar32 character;
199    size_t next_position = position;
200    U16_NEXT(string, next_position, length, character);
201
202    // Now that we have the character, we use ICU in order to query for the
203    // appropriate Unicode BiDi character type.
204    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
205    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
206      return true;
207
208    position = next_position;
209  }
210
211  return false;
212}
213
214#if defined(WCHAR_T_IS_UTF32)
215bool StringContainsStrongRTLChars(const std::wstring& text) {
216  return StringContainsStrongRTLChars(WideToUTF16(text));
217}
218#endif
219
220void WrapStringWithLTRFormatting(string16* text) {
221  if (text->empty())
222    return;
223
224  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
225  text->insert(0, 1, kLeftToRightEmbeddingMark);
226
227  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
228  text->push_back(kPopDirectionalFormatting);
229}
230
231#if defined(WCHAR_T_IS_UTF32)
232void WrapStringWithLTRFormatting(std::wstring* text) {
233  if (text->empty())
234    return;
235
236  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
237  text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
238
239  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
240  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
241}
242#endif
243
244void WrapStringWithRTLFormatting(string16* text) {
245  if (text->empty())
246    return;
247
248  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
249  text->insert(0, 1, kRightToLeftEmbeddingMark);
250
251  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
252  text->push_back(kPopDirectionalFormatting);
253}
254
255#if defined(WCHAR_T_IS_UTF32)
256void WrapStringWithRTLFormatting(std::wstring* text) {
257  if (text->empty())
258    return;
259
260  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
261  text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
262
263  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
264  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
265}
266#endif
267
268void WrapPathWithLTRFormatting(const FilePath& path,
269                               string16* rtl_safe_path) {
270  // Wrap the overall path with LRE-PDF pair which essentialy marks the
271  // string as a Left-To-Right string.
272  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
273  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
274#if defined(OS_MACOSX)
275    rtl_safe_path->append(UTF8ToUTF16(path.value()));
276#elif defined(OS_WIN)
277    rtl_safe_path->append(path.value());
278#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
279    std::wstring wide_path = base::SysNativeMBToWide(path.value());
280    rtl_safe_path->append(WideToUTF16(wide_path));
281#endif
282  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
283  rtl_safe_path->push_back(kPopDirectionalFormatting);
284}
285
286string16 GetDisplayStringInLTRDirectionality(const string16& text) {
287  if (!IsRTL())
288    return text;
289  string16 text_mutable(text);
290  WrapStringWithLTRFormatting(&text_mutable);
291  return text_mutable;
292}
293
294const string16 StripWrappingBidiControlCharacters(const string16& text) {
295  if (text.empty())
296    return text;
297  size_t begin_index = 0;
298  char16 begin = text[begin_index];
299  if (begin == kLeftToRightEmbeddingMark ||
300      begin == kRightToLeftEmbeddingMark ||
301      begin == kLeftToRightOverride ||
302      begin == kRightToLeftOverride)
303    ++begin_index;
304  size_t end_index = text.length() - 1;
305  if (text[end_index] == kPopDirectionalFormatting)
306    --end_index;
307  return text.substr(begin_index, end_index - begin_index + 1);
308}
309
310}  // namespace i18n
311}  // namespace base
312