rtl.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/logging.h"
9#include "base/string_util.h"
10#include "base/utf_string_conversions.h"
11#include "base/sys_string_conversions.h"
12#include "unicode/coll.h"
13#include "unicode/locid.h"
14#include "unicode/uchar.h"
15#include "unicode/uscript.h"
16
17#if defined(TOOLKIT_USES_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace base {
22namespace i18n {
23
24// Represents the locale-specific ICU text direction.
25static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
26
27void GetLanguageAndRegionFromOS(std::string* lang, std::string* region) {
28  // Later we may have to change this to be OS-dependent so that
29  // it's not affected by ICU's default locale. It's all right
30  // to do this way because SetICUDefaultLocale is internal
31  // to this file and we know that it's not yet called when this function
32  // is called.
33  icu::Locale locale = icu::Locale::getDefault();
34  const char* language = locale.getLanguage();
35  const char* country = locale.getCountry();
36  DCHECK(language);
37  *lang = language;
38  *region = country;
39}
40
41// Convert Chrome locale name to ICU locale name
42std::string ICULocaleName(const std::string& locale_string) {
43  // If not Spanish, just return it.
44  if (locale_string.substr(0, 2) != "es")
45    return locale_string;
46  // Expand es to es-ES.
47  if (LowerCaseEqualsASCII(locale_string, "es"))
48    return "es-ES";
49  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
50  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
51  // to es-MX (the most populous in Spanish-speaking Latin America).
52  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
53    std::string lang, region;
54    GetLanguageAndRegionFromOS(&lang, &region);
55    if (LowerCaseEqualsASCII(lang, "es") &&
56      !LowerCaseEqualsASCII(region, "es")) {
57        lang.append("-");
58        lang.append(region);
59        return lang;
60    }
61    return "es-MX";
62  }
63  // Currently, Chrome has only "es" and "es-419", but later we may have
64  // more specific "es-RR".
65  return locale_string;
66}
67
68void SetICUDefaultLocale(const std::string& locale_string) {
69  icu::Locale locale(ICULocaleName(locale_string).c_str());
70  UErrorCode error_code = U_ZERO_ERROR;
71  icu::Locale::setDefault(locale, error_code);
72  // This return value is actually bogus because Locale object is
73  // an ID and setDefault seems to always succeed (regardless of the
74  // presence of actual locale data). However,
75  // it does not hurt to have it as a sanity check.
76  DCHECK(U_SUCCESS(error_code));
77  g_icu_text_direction = UNKNOWN_DIRECTION;
78
79  // If we use Views toolkit on top of GtkWidget, then we need to keep
80  // GtkWidget's default text direction consistent with ICU's text direction.
81  // Because in this case ICU's text direction will be used instead.
82  // See IsRTL() function below.
83#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
84  gtk_widget_set_default_direction(
85      ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
86#endif
87}
88
89bool IsRTL() {
90#if defined(TOOLKIT_GTK)
91  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
92  return (gtk_dir == GTK_TEXT_DIR_RTL);
93#else
94  return ICUIsRTL();
95#endif
96}
97
98bool ICUIsRTL() {
99  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
100    const icu::Locale& locale = icu::Locale::getDefault();
101    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
102  }
103  return g_icu_text_direction == RIGHT_TO_LEFT;
104}
105
106TextDirection GetTextDirectionForLocale(const char* locale_name) {
107  UErrorCode status = U_ZERO_ERROR;
108  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
109  DCHECK(U_SUCCESS(status));
110  // Treat anything other than RTL as LTR.
111  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
112}
113
114TextDirection GetFirstStrongCharacterDirection(const string16& text) {
115  const UChar* string = text.c_str();
116  size_t length = text.length();
117  size_t position = 0;
118  while (position < length) {
119    UChar32 character;
120    size_t next_position = position;
121    U16_NEXT(string, next_position, length, character);
122
123    // Now that we have the character, we use ICU in order to query for the
124    // appropriate Unicode BiDi character type.
125    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
126    if ((property == U_RIGHT_TO_LEFT) ||
127        (property == U_RIGHT_TO_LEFT_ARABIC) ||
128        (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
129        (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
130      return RIGHT_TO_LEFT;
131    } else if ((property == U_LEFT_TO_RIGHT) ||
132               (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
133               (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
134      return LEFT_TO_RIGHT;
135    }
136
137    position = next_position;
138  }
139
140  return LEFT_TO_RIGHT;
141}
142
143#if defined(WCHAR_T_IS_UTF32)
144TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
145  return GetFirstStrongCharacterDirection(WideToUTF16(text));
146}
147#endif
148
149bool AdjustStringForLocaleDirection(const string16& text,
150                                    string16* localized_text) {
151  if (!IsRTL() || text.empty())
152    return false;
153
154  // Marking the string as LTR if the locale is RTL and the string does not
155  // contain strong RTL characters. Otherwise, mark the string as RTL.
156  *localized_text = text;
157  bool has_rtl_chars = StringContainsStrongRTLChars(text);
158  if (!has_rtl_chars)
159    WrapStringWithLTRFormatting(localized_text);
160  else
161    WrapStringWithRTLFormatting(localized_text);
162
163  return true;
164}
165
166#if defined(WCHAR_T_IS_UTF32)
167bool AdjustStringForLocaleDirection(const std::wstring& text,
168                                    std::wstring* localized_text) {
169  string16 out;
170  if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) {
171    // We should only touch the output on success.
172    *localized_text = UTF16ToWide(out);
173    return true;
174  }
175  return false;
176}
177#endif
178
179bool StringContainsStrongRTLChars(const string16& text) {
180  const UChar* string = text.c_str();
181  size_t length = text.length();
182  size_t position = 0;
183  while (position < length) {
184    UChar32 character;
185    size_t next_position = position;
186    U16_NEXT(string, next_position, length, character);
187
188    // Now that we have the character, we use ICU in order to query for the
189    // appropriate Unicode BiDi character type.
190    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
191    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
192      return true;
193
194    position = next_position;
195  }
196
197  return false;
198}
199
200#if defined(WCHAR_T_IS_UTF32)
201bool StringContainsStrongRTLChars(const std::wstring& text) {
202  return StringContainsStrongRTLChars(WideToUTF16(text));
203}
204#endif
205
206void WrapStringWithLTRFormatting(string16* text) {
207  if (text->empty())
208    return;
209
210  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
211  text->insert(0, 1, kLeftToRightEmbeddingMark);
212
213  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
214  text->push_back(kPopDirectionalFormatting);
215}
216
217#if defined(WCHAR_T_IS_UTF32)
218void WrapStringWithLTRFormatting(std::wstring* text) {
219  if (text->empty())
220    return;
221
222  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
223  text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
224
225  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
226  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
227}
228#endif
229
230void WrapStringWithRTLFormatting(string16* text) {
231  if (text->empty())
232    return;
233
234  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
235  text->insert(0, 1, kRightToLeftEmbeddingMark);
236
237  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
238  text->push_back(kPopDirectionalFormatting);
239}
240
241#if defined(WCHAR_T_IS_UTF32)
242void WrapStringWithRTLFormatting(std::wstring* text) {
243  if (text->empty())
244    return;
245
246  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
247  text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
248
249  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
250  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
251}
252#endif
253
254void WrapPathWithLTRFormatting(const FilePath& path,
255                               string16* rtl_safe_path) {
256  // Wrap the overall path with LRE-PDF pair which essentialy marks the
257  // string as a Left-To-Right string.
258  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
259  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
260#if defined(OS_MACOSX)
261    rtl_safe_path->append(UTF8ToUTF16(path.value()));
262#elif defined(OS_WIN)
263    rtl_safe_path->append(path.value());
264#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
265    std::wstring wide_path = base::SysNativeMBToWide(path.value());
266    rtl_safe_path->append(WideToUTF16(wide_path));
267#endif
268  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
269  rtl_safe_path->push_back(kPopDirectionalFormatting);
270}
271
272std::wstring GetDisplayStringInLTRDirectionality(std::wstring* text) {
273  if (IsRTL())
274    WrapStringWithLTRFormatting(text);
275  return *text;
276}
277
278const string16 StripWrappingBidiControlCharacters(const string16& text) {
279  if (text.empty())
280    return text;
281  size_t begin_index = 0;
282  char16 begin = text[begin_index];
283  if (begin == kLeftToRightEmbeddingMark ||
284      begin == kRightToLeftEmbeddingMark ||
285      begin == kLeftToRightOverride ||
286      begin == kRightToLeftOverride)
287    ++begin_index;
288  size_t end_index = text.length() - 1;
289  if (text[end_index] == kPopDirectionalFormatting)
290    --end_index;
291  return text.substr(begin_index, end_index - begin_index + 1);
292}
293
294}  // namespace i18n
295}  // namespace base
296