rtl.cc revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/logging.h"
9#include "base/string_util.h"
10#include "base/utf_string_conversions.h"
11#include "base/sys_string_conversions.h"
12#include "unicode/coll.h"
13#include "unicode/locid.h"
14#include "unicode/uchar.h"
15#include "unicode/uscript.h"
16
17#if defined(TOOLKIT_USES_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace {
22
23// Extract language and country, ignore keywords, concatenate using dash.
24std::string GetLocaleString(const icu::Locale& locale) {
25  const char* language = locale.getLanguage();
26  const char* country = locale.getCountry();
27
28  std::string result =
29      (language != NULL && *language != '\0') ? language : "und";
30
31  if (country != NULL && *country != '\0') {
32    result += '-';
33    result += country;
34  }
35
36  return result;
37}
38
39}  // namespace
40
41namespace base {
42namespace i18n {
43
44// Represents the locale-specific ICU text direction.
45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
46
47#if defined(OS_WIN)
48void GetLanguageAndRegionFromOS(std::string* lang, std::string* region) {
49  // Later we may have to change this to be OS-dependent so that
50  // it's not affected by ICU's default locale. It's all right
51  // to do this way because SetICUDefaultLocale is internal
52  // to this file and we know that it's not yet called when this function
53  // is called.
54  const icu::Locale& locale = icu::Locale::getDefault();
55  const char* language = locale.getLanguage();
56  const char* country = locale.getCountry();
57  DCHECK(language);
58  *lang = language;
59  *region = country;
60}
61#endif
62
63// Convert the ICU default locale to a string.
64std::string GetConfiguredLocale() {
65  return GetLocaleString(icu::Locale::getDefault());
66}
67
68// Convert the ICU canonicalized locale to a string.
69std::string GetCanonicalLocale(const char* locale) {
70  return GetLocaleString(icu::Locale::createCanonical(locale));
71}
72
73// Convert Chrome locale name to ICU locale name
74std::string ICULocaleName(const std::string& locale_string) {
75  // If not Spanish, just return it.
76  if (locale_string.substr(0, 2) != "es")
77    return locale_string;
78  // Expand es to es-ES.
79  if (LowerCaseEqualsASCII(locale_string, "es"))
80    return "es-ES";
81  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
82  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
83  // to es-MX (the most populous in Spanish-speaking Latin America).
84  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
85    const icu::Locale& locale = icu::Locale::getDefault();
86    std::string language = locale.getLanguage();
87    const char* country = locale.getCountry();
88    if (LowerCaseEqualsASCII(language, "es") &&
89      !LowerCaseEqualsASCII(country, "es")) {
90        language += '-';
91        language += country;
92        return language;
93    }
94    return "es-MX";
95  }
96  // Currently, Chrome has only "es" and "es-419", but later we may have
97  // more specific "es-RR".
98  return locale_string;
99}
100
101void SetICUDefaultLocale(const std::string& locale_string) {
102  icu::Locale locale(ICULocaleName(locale_string).c_str());
103  UErrorCode error_code = U_ZERO_ERROR;
104  icu::Locale::setDefault(locale, error_code);
105  // This return value is actually bogus because Locale object is
106  // an ID and setDefault seems to always succeed (regardless of the
107  // presence of actual locale data). However,
108  // it does not hurt to have it as a sanity check.
109  DCHECK(U_SUCCESS(error_code));
110  g_icu_text_direction = UNKNOWN_DIRECTION;
111
112  // If we use Views toolkit on top of GtkWidget, then we need to keep
113  // GtkWidget's default text direction consistent with ICU's text direction.
114  // Because in this case ICU's text direction will be used instead.
115  // See IsRTL() function below.
116#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK)
117  gtk_widget_set_default_direction(
118      ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
119#endif
120}
121
122bool IsRTL() {
123#if defined(TOOLKIT_GTK)
124  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
125  return (gtk_dir == GTK_TEXT_DIR_RTL);
126#else
127  return ICUIsRTL();
128#endif
129}
130
131bool ICUIsRTL() {
132  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
133    const icu::Locale& locale = icu::Locale::getDefault();
134    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
135  }
136  return g_icu_text_direction == RIGHT_TO_LEFT;
137}
138
139TextDirection GetTextDirectionForLocale(const char* locale_name) {
140  UErrorCode status = U_ZERO_ERROR;
141  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
142  DCHECK(U_SUCCESS(status));
143  // Treat anything other than RTL as LTR.
144  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
145}
146
147TextDirection GetFirstStrongCharacterDirection(const string16& text) {
148  const UChar* string = text.c_str();
149  size_t length = text.length();
150  size_t position = 0;
151  while (position < length) {
152    UChar32 character;
153    size_t next_position = position;
154    U16_NEXT(string, next_position, length, character);
155
156    // Now that we have the character, we use ICU in order to query for the
157    // appropriate Unicode BiDi character type.
158    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
159    if ((property == U_RIGHT_TO_LEFT) ||
160        (property == U_RIGHT_TO_LEFT_ARABIC) ||
161        (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
162        (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
163      return RIGHT_TO_LEFT;
164    } else if ((property == U_LEFT_TO_RIGHT) ||
165               (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
166               (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
167      return LEFT_TO_RIGHT;
168    }
169
170    position = next_position;
171  }
172
173  return LEFT_TO_RIGHT;
174}
175
176#if defined(WCHAR_T_IS_UTF32)
177TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) {
178  return GetFirstStrongCharacterDirection(WideToUTF16(text));
179}
180#endif
181
182bool AdjustStringForLocaleDirection(const string16& text,
183                                    string16* localized_text) {
184  if (!IsRTL() || text.empty())
185    return false;
186
187  // Marking the string as LTR if the locale is RTL and the string does not
188  // contain strong RTL characters. Otherwise, mark the string as RTL.
189  *localized_text = text;
190  bool has_rtl_chars = StringContainsStrongRTLChars(text);
191  if (!has_rtl_chars)
192    WrapStringWithLTRFormatting(localized_text);
193  else
194    WrapStringWithRTLFormatting(localized_text);
195
196  return true;
197}
198
199#if defined(WCHAR_T_IS_UTF32)
200bool AdjustStringForLocaleDirection(const std::wstring& text,
201                                    std::wstring* localized_text) {
202  string16 out;
203  if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) {
204    // We should only touch the output on success.
205    *localized_text = UTF16ToWide(out);
206    return true;
207  }
208  return false;
209}
210#endif
211
212bool StringContainsStrongRTLChars(const string16& text) {
213  const UChar* string = text.c_str();
214  size_t length = text.length();
215  size_t position = 0;
216  while (position < length) {
217    UChar32 character;
218    size_t next_position = position;
219    U16_NEXT(string, next_position, length, character);
220
221    // Now that we have the character, we use ICU in order to query for the
222    // appropriate Unicode BiDi character type.
223    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
224    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
225      return true;
226
227    position = next_position;
228  }
229
230  return false;
231}
232
233#if defined(WCHAR_T_IS_UTF32)
234bool StringContainsStrongRTLChars(const std::wstring& text) {
235  return StringContainsStrongRTLChars(WideToUTF16(text));
236}
237#endif
238
239void WrapStringWithLTRFormatting(string16* text) {
240  if (text->empty())
241    return;
242
243  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
244  text->insert(0, 1, kLeftToRightEmbeddingMark);
245
246  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
247  text->push_back(kPopDirectionalFormatting);
248}
249
250#if defined(WCHAR_T_IS_UTF32)
251void WrapStringWithLTRFormatting(std::wstring* text) {
252  if (text->empty())
253    return;
254
255  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
256  text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark));
257
258  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
259  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
260}
261#endif
262
263void WrapStringWithRTLFormatting(string16* text) {
264  if (text->empty())
265    return;
266
267  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
268  text->insert(0, 1, kRightToLeftEmbeddingMark);
269
270  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
271  text->push_back(kPopDirectionalFormatting);
272}
273
274#if defined(WCHAR_T_IS_UTF32)
275void WrapStringWithRTLFormatting(std::wstring* text) {
276  if (text->empty())
277    return;
278
279  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
280  text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark));
281
282  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
283  text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting));
284}
285#endif
286
287void WrapPathWithLTRFormatting(const FilePath& path,
288                               string16* rtl_safe_path) {
289  // Wrap the overall path with LRE-PDF pair which essentialy marks the
290  // string as a Left-To-Right string.
291  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
292  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
293#if defined(OS_MACOSX)
294    rtl_safe_path->append(UTF8ToUTF16(path.value()));
295#elif defined(OS_WIN)
296    rtl_safe_path->append(path.value());
297#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
298    std::wstring wide_path = base::SysNativeMBToWide(path.value());
299    rtl_safe_path->append(WideToUTF16(wide_path));
300#endif
301  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
302  rtl_safe_path->push_back(kPopDirectionalFormatting);
303}
304
305string16 GetDisplayStringInLTRDirectionality(const string16& text) {
306  if (!IsRTL())
307    return text;
308  string16 text_mutable(text);
309  WrapStringWithLTRFormatting(&text_mutable);
310  return text_mutable;
311}
312
313const string16 StripWrappingBidiControlCharacters(const string16& text) {
314  if (text.empty())
315    return text;
316  size_t begin_index = 0;
317  char16 begin = text[begin_index];
318  if (begin == kLeftToRightEmbeddingMark ||
319      begin == kRightToLeftEmbeddingMark ||
320      begin == kLeftToRightOverride ||
321      begin == kRightToLeftOverride)
322    ++begin_index;
323  size_t end_index = text.length() - 1;
324  if (text[end_index] == kPopDirectionalFormatting)
325    --end_index;
326  return text.substr(begin_index, end_index - begin_index + 1);
327}
328
329}  // namespace i18n
330}  // namespace base
331