rtl.cc revision 4a5e2dc747d50c653511c68ccb2cfbfb740bd5a7
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/logging.h"
9#include "base/string_util.h"
10#include "base/utf_string_conversions.h"
11#include "base/sys_string_conversions.h"
12#include "unicode/coll.h"
13#include "unicode/locid.h"
14#include "unicode/uchar.h"
15#include "unicode/uscript.h"
16
17#if defined(TOOLKIT_USES_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace {
22
23// Extract language and country, ignore keywords, concatenate using dash.
24std::string GetLocaleString(const icu::Locale& locale) {
25  const char* language = locale.getLanguage();
26  const char* country = locale.getCountry();
27
28  std::string result =
29      (language != NULL && *language != '\0') ? language : "und";
30
31  if (country != NULL && *country != '\0') {
32    result += '-';
33    result += country;
34  }
35
36  return result;
37}
38
39}  // namespace
40
41namespace base {
42namespace i18n {
43
44// Represents the locale-specific ICU text direction.
45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
46
47// Convert the ICU default locale to a string.
48std::string GetConfiguredLocale() {
49  return GetLocaleString(icu::Locale::getDefault());
50}
51
52// Convert the ICU canonicalized locale to a string.
53std::string GetCanonicalLocale(const char* locale) {
54  return GetLocaleString(icu::Locale::createCanonical(locale));
55}
56
57// Convert Chrome locale name to ICU locale name
58std::string ICULocaleName(const std::string& locale_string) {
59  // If not Spanish, just return it.
60  if (locale_string.substr(0, 2) != "es")
61    return locale_string;
62  // Expand es to es-ES.
63  if (LowerCaseEqualsASCII(locale_string, "es"))
64    return "es-ES";
65  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
66  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
67  // to es-MX (the most populous in Spanish-speaking Latin America).
68  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
69    const icu::Locale& locale = icu::Locale::getDefault();
70    std::string language = locale.getLanguage();
71    const char* country = locale.getCountry();
72    if (LowerCaseEqualsASCII(language, "es") &&
73      !LowerCaseEqualsASCII(country, "es")) {
74        language += '-';
75        language += country;
76        return language;
77    }
78    return "es-MX";
79  }
80  // Currently, Chrome has only "es" and "es-419", but later we may have
81  // more specific "es-RR".
82  return locale_string;
83}
84
85void SetICUDefaultLocale(const std::string& locale_string) {
86  icu::Locale locale(ICULocaleName(locale_string).c_str());
87  UErrorCode error_code = U_ZERO_ERROR;
88  icu::Locale::setDefault(locale, error_code);
89  // This return value is actually bogus because Locale object is
90  // an ID and setDefault seems to always succeed (regardless of the
91  // presence of actual locale data). However,
92  // it does not hurt to have it as a sanity check.
93  DCHECK(U_SUCCESS(error_code));
94  g_icu_text_direction = UNKNOWN_DIRECTION;
95
96  // If we use Views toolkit on top of GtkWidget, then we need to keep
97  // GtkWidget's default text direction consistent with ICU's text direction.
98  // Because in this case ICU's text direction will be used instead.
99  // See IsRTL() function below.
100#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
101  gtk_widget_set_default_direction(
102      ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
103#endif
104}
105
106bool IsRTL() {
107#if defined(TOOLKIT_GTK)
108  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
109  return (gtk_dir == GTK_TEXT_DIR_RTL);
110#else
111  return ICUIsRTL();
112#endif
113}
114
115bool ICUIsRTL() {
116  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
117    const icu::Locale& locale = icu::Locale::getDefault();
118    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
119  }
120  return g_icu_text_direction == RIGHT_TO_LEFT;
121}
122
123TextDirection GetTextDirectionForLocale(const char* locale_name) {
124  UErrorCode status = U_ZERO_ERROR;
125  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
126  DCHECK(U_SUCCESS(status));
127  // Treat anything other than RTL as LTR.
128  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
129}
130
131TextDirection GetFirstStrongCharacterDirection(const string16& text) {
132  const UChar* string = text.c_str();
133  size_t length = text.length();
134  size_t position = 0;
135  while (position < length) {
136    UChar32 character;
137    size_t next_position = position;
138    U16_NEXT(string, next_position, length, character);
139
140    // Now that we have the character, we use ICU in order to query for the
141    // appropriate Unicode BiDi character type.
142    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
143    if ((property == U_RIGHT_TO_LEFT) ||
144        (property == U_RIGHT_TO_LEFT_ARABIC) ||
145        (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
146        (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
147      return RIGHT_TO_LEFT;
148    } else if ((property == U_LEFT_TO_RIGHT) ||
149               (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
150               (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
151      return LEFT_TO_RIGHT;
152    }
153
154    position = next_position;
155  }
156
157  return LEFT_TO_RIGHT;
158}
159
160#if defined(WCHAR_T_IS_UTF32)
161TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
162  return GetFirstStrongCharacterDirection(WideToUTF16(text));
163}
164#endif
165
166bool AdjustStringForLocaleDirection(const string16& text,
167                                    string16* localized_text) {
168  if (!IsRTL() || text.empty())
169    return false;
170
171  // Marking the string as LTR if the locale is RTL and the string does not
172  // contain strong RTL characters. Otherwise, mark the string as RTL.
173  *localized_text = text;
174  bool has_rtl_chars = StringContainsStrongRTLChars(text);
175  if (!has_rtl_chars)
176    WrapStringWithLTRFormatting(localized_text);
177  else
178    WrapStringWithRTLFormatting(localized_text);
179
180  return true;
181}
182
183#if defined(WCHAR_T_IS_UTF32)
184bool AdjustStringForLocaleDirection(const std::wstring& text,
185                                    std::wstring* localized_text) {
186  string16 out;
187  if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) {
188    // We should only touch the output on success.
189    *localized_text = UTF16ToWide(out);
190    return true;
191  }
192  return false;
193}
194#endif
195
196bool StringContainsStrongRTLChars(const string16& text) {
197  const UChar* string = text.c_str();
198  size_t length = text.length();
199  size_t position = 0;
200  while (position < length) {
201    UChar32 character;
202    size_t next_position = position;
203    U16_NEXT(string, next_position, length, character);
204
205    // Now that we have the character, we use ICU in order to query for the
206    // appropriate Unicode BiDi character type.
207    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
208    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
209      return true;
210
211    position = next_position;
212  }
213
214  return false;
215}
216
217#if defined(WCHAR_T_IS_UTF32)
218bool StringContainsStrongRTLChars(const std::wstring& text) {
219  return StringContainsStrongRTLChars(WideToUTF16(text));
220}
221#endif
222
223void WrapStringWithLTRFormatting(string16* text) {
224  if (text->empty())
225    return;
226
227  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
228  text->insert(0, 1, kLeftToRightEmbeddingMark);
229
230  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
231  text->push_back(kPopDirectionalFormatting);
232}
233
234#if defined(WCHAR_T_IS_UTF32)
235void WrapStringWithLTRFormatting(std::wstring* text) {
236  if (text->empty())
237    return;
238
239  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
240  text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
241
242  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
243  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
244}
245#endif
246
247void WrapStringWithRTLFormatting(string16* text) {
248  if (text->empty())
249    return;
250
251  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
252  text->insert(0, 1, kRightToLeftEmbeddingMark);
253
254  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
255  text->push_back(kPopDirectionalFormatting);
256}
257
258#if defined(WCHAR_T_IS_UTF32)
259void WrapStringWithRTLFormatting(std::wstring* text) {
260  if (text->empty())
261    return;
262
263  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
264  text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
265
266  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
267  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
268}
269#endif
270
271void WrapPathWithLTRFormatting(const FilePath& path,
272                               string16* rtl_safe_path) {
273  // Wrap the overall path with LRE-PDF pair which essentialy marks the
274  // string as a Left-To-Right string.
275  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
276  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
277#if defined(OS_MACOSX)
278    rtl_safe_path->append(UTF8ToUTF16(path.value()));
279#elif defined(OS_WIN)
280    rtl_safe_path->append(path.value());
281#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
282    std::wstring wide_path = base::SysNativeMBToWide(path.value());
283    rtl_safe_path->append(WideToUTF16(wide_path));
284#endif
285  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
286  rtl_safe_path->push_back(kPopDirectionalFormatting);
287}
288
289string16 GetDisplayStringInLTRDirectionality(const string16& text) {
290  if (!IsRTL())
291    return text;
292  string16 text_mutable(text);
293  WrapStringWithLTRFormatting(&text_mutable);
294  return text_mutable;
295}
296
297const string16 StripWrappingBidiControlCharacters(const string16& text) {
298  if (text.empty())
299    return text;
300  size_t begin_index = 0;
301  char16 begin = text[begin_index];
302  if (begin == kLeftToRightEmbeddingMark ||
303      begin == kRightToLeftEmbeddingMark ||
304      begin == kLeftToRightOverride ||
305      begin == kRightToLeftOverride)
306    ++begin_index;
307  size_t end_index = text.length() - 1;
308  if (text[end_index] == kPopDirectionalFormatting)
309    --end_index;
310  return text.substr(begin_index, end_index - begin_index + 1);
311}
312
313}  // namespace i18n
314}  // namespace base
315