rtl.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/files/file_path.h"
8#include "base/logging.h"
9#include "base/strings/string_util.h"
10#include "base/strings/sys_string_conversions.h"
11#include "base/strings/utf_string_conversions.h"
12#include "third_party/icu/source/common/unicode/locid.h"
13#include "third_party/icu/source/common/unicode/uchar.h"
14#include "third_party/icu/source/common/unicode/uscript.h"
15#include "third_party/icu/source/i18n/unicode/coll.h"
16
17#if defined(TOOLKIT_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace {
22
23// Extract language, country and variant, but ignore keywords.  For example,
24// en-US, ca@valencia, ca-ES@valencia.
25std::string GetLocaleString(const icu::Locale& locale) {
26  const char* language = locale.getLanguage();
27  const char* country = locale.getCountry();
28  const char* variant = locale.getVariant();
29
30  std::string result =
31      (language != NULL && *language != '\0') ? language : "und";
32
33  if (country != NULL && *country != '\0') {
34    result += '-';
35    result += country;
36  }
37
38  if (variant != NULL && *variant != '\0') {
39    std::string variant_str(variant);
40    StringToLowerASCII(&variant_str);
41    result += '@' + variant_str;
42  }
43
44  return result;
45}
46
47// Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
48// directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
49// http://unicode.org/reports/tr9/ for more information.
50base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
51  // Now that we have the character, we use ICU in order to query for the
52  // appropriate Unicode BiDi character type.
53  int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
54  if ((property == U_RIGHT_TO_LEFT) ||
55      (property == U_RIGHT_TO_LEFT_ARABIC) ||
56      (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
57      (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
58    return base::i18n::RIGHT_TO_LEFT;
59  } else if ((property == U_LEFT_TO_RIGHT) ||
60             (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
61             (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
62    return base::i18n::LEFT_TO_RIGHT;
63  }
64  return base::i18n::UNKNOWN_DIRECTION;
65}
66
67}  // namespace
68
69namespace base {
70namespace i18n {
71
72// Represents the locale-specific ICU text direction.
73static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
74
75// Convert the ICU default locale to a string.
76std::string GetConfiguredLocale() {
77  return GetLocaleString(icu::Locale::getDefault());
78}
79
80// Convert the ICU canonicalized locale to a string.
81std::string GetCanonicalLocale(const char* locale) {
82  return GetLocaleString(icu::Locale::createCanonical(locale));
83}
84
85// Convert Chrome locale name to ICU locale name
86std::string ICULocaleName(const std::string& locale_string) {
87  // If not Spanish, just return it.
88  if (locale_string.substr(0, 2) != "es")
89    return locale_string;
90  // Expand es to es-ES.
91  if (LowerCaseEqualsASCII(locale_string, "es"))
92    return "es-ES";
93  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
94  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
95  // to es-MX (the most populous in Spanish-speaking Latin America).
96  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
97    const icu::Locale& locale = icu::Locale::getDefault();
98    std::string language = locale.getLanguage();
99    const char* country = locale.getCountry();
100    if (LowerCaseEqualsASCII(language, "es") &&
101      !LowerCaseEqualsASCII(country, "es")) {
102        language += '-';
103        language += country;
104        return language;
105    }
106    return "es-MX";
107  }
108  // Currently, Chrome has only "es" and "es-419", but later we may have
109  // more specific "es-RR".
110  return locale_string;
111}
112
113void SetICUDefaultLocale(const std::string& locale_string) {
114  icu::Locale locale(ICULocaleName(locale_string).c_str());
115  UErrorCode error_code = U_ZERO_ERROR;
116  icu::Locale::setDefault(locale, error_code);
117  // This return value is actually bogus because Locale object is
118  // an ID and setDefault seems to always succeed (regardless of the
119  // presence of actual locale data). However,
120  // it does not hurt to have it as a sanity check.
121  DCHECK(U_SUCCESS(error_code));
122  g_icu_text_direction = UNKNOWN_DIRECTION;
123}
124
125bool IsRTL() {
126#if defined(TOOLKIT_GTK)
127  GtkTextDirection gtk_dir = gtk_widget_get_default_direction();
128  return gtk_dir == GTK_TEXT_DIR_RTL;
129#else
130  return ICUIsRTL();
131#endif
132}
133
134bool ICUIsRTL() {
135  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
136    const icu::Locale& locale = icu::Locale::getDefault();
137    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
138  }
139  return g_icu_text_direction == RIGHT_TO_LEFT;
140}
141
142TextDirection GetTextDirectionForLocale(const char* locale_name) {
143  UErrorCode status = U_ZERO_ERROR;
144  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
145  DCHECK(U_SUCCESS(status));
146  // Treat anything other than RTL as LTR.
147  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
148}
149
150TextDirection GetFirstStrongCharacterDirection(const string16& text) {
151  const UChar* string = text.c_str();
152  size_t length = text.length();
153  size_t position = 0;
154  while (position < length) {
155    UChar32 character;
156    size_t next_position = position;
157    U16_NEXT(string, next_position, length, character);
158    TextDirection direction = GetCharacterDirection(character);
159    if (direction != UNKNOWN_DIRECTION)
160      return direction;
161    position = next_position;
162  }
163  return LEFT_TO_RIGHT;
164}
165
166TextDirection GetLastStrongCharacterDirection(const string16& text) {
167  const UChar* string = text.c_str();
168  size_t position = text.length();
169  while (position > 0) {
170    UChar32 character;
171    size_t prev_position = position;
172    U16_PREV(string, 0, prev_position, character);
173    TextDirection direction = GetCharacterDirection(character);
174    if (direction != UNKNOWN_DIRECTION)
175      return direction;
176    position = prev_position;
177  }
178  return LEFT_TO_RIGHT;
179}
180
181TextDirection GetStringDirection(const string16& text) {
182  const UChar* string = text.c_str();
183  size_t length = text.length();
184  size_t position = 0;
185
186  TextDirection result(UNKNOWN_DIRECTION);
187  while (position < length) {
188    UChar32 character;
189    size_t next_position = position;
190    U16_NEXT(string, next_position, length, character);
191    TextDirection direction = GetCharacterDirection(character);
192    if (direction != UNKNOWN_DIRECTION) {
193      if (result != UNKNOWN_DIRECTION && result != direction)
194        return UNKNOWN_DIRECTION;
195      result = direction;
196    }
197    position = next_position;
198  }
199
200  // Handle the case of a string not containing any strong directionality
201  // characters defaulting to LEFT_TO_RIGHT.
202  if (result == UNKNOWN_DIRECTION)
203    return LEFT_TO_RIGHT;
204
205  return result;
206}
207
208#if defined(OS_WIN)
209bool AdjustStringForLocaleDirection(string16* text) {
210  if (!IsRTL() || text->empty())
211    return false;
212
213  // Marking the string as LTR if the locale is RTL and the string does not
214  // contain strong RTL characters. Otherwise, mark the string as RTL.
215  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
216  if (!has_rtl_chars)
217    WrapStringWithLTRFormatting(text);
218  else
219    WrapStringWithRTLFormatting(text);
220
221  return true;
222}
223
224bool UnadjustStringForLocaleDirection(string16* text) {
225  if (!IsRTL() || text->empty())
226    return false;
227
228  *text = StripWrappingBidiControlCharacters(*text);
229  return true;
230}
231#else
232bool AdjustStringForLocaleDirection(string16* text) {
233  // On OS X & GTK the directionality of a label is determined by the first
234  // strongly directional character.
235  // However, we want to make sure that in an LTR-language-UI all strings are
236  // left aligned and vice versa.
237  // A problem can arise if we display a string which starts with user input.
238  // User input may be of the opposite directionality to the UI. So the whole
239  // string will be displayed in the opposite directionality, e.g. if we want to
240  // display in an LTR UI [such as US English]:
241  //
242  // EMAN_NOISNETXE is now installed.
243  //
244  // Since EXTENSION_NAME begins with a strong RTL char, the label's
245  // directionality will be set to RTL and the string will be displayed visually
246  // as:
247  //
248  // .is now installed EMAN_NOISNETXE
249  //
250  // In order to solve this issue, we prepend an LRM to the string. An LRM is a
251  // strongly directional LTR char.
252  // We also append an LRM at the end, which ensures that we're in an LTR
253  // context.
254
255  // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
256  // box so there is no issue with displaying zero-width bidi control characters
257  // on any system.  Thus no need for the !IsRTL() check here.
258  if (text->empty())
259    return false;
260
261  bool ui_direction_is_rtl = IsRTL();
262
263  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
264  if (!ui_direction_is_rtl && has_rtl_chars) {
265    WrapStringWithRTLFormatting(text);
266    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
267                 kLeftToRightMark);
268    text->push_back(kLeftToRightMark);
269  } else if (ui_direction_is_rtl && has_rtl_chars) {
270    WrapStringWithRTLFormatting(text);
271    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
272                 kRightToLeftMark);
273    text->push_back(kRightToLeftMark);
274  } else if (ui_direction_is_rtl) {
275    WrapStringWithLTRFormatting(text);
276    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
277                 kRightToLeftMark);
278    text->push_back(kRightToLeftMark);
279  } else {
280    return false;
281  }
282
283  return true;
284}
285
286bool UnadjustStringForLocaleDirection(string16* text) {
287  if (text->empty())
288    return false;
289
290  size_t begin_index = 0;
291  char16 begin = text->at(begin_index);
292  if (begin == kLeftToRightMark ||
293      begin == kRightToLeftMark) {
294    ++begin_index;
295  }
296
297  size_t end_index = text->length() - 1;
298  char16 end = text->at(end_index);
299  if (end == kLeftToRightMark ||
300      end == kRightToLeftMark) {
301    --end_index;
302  }
303
304  string16 unmarked_text =
305      text->substr(begin_index, end_index - begin_index + 1);
306  *text = StripWrappingBidiControlCharacters(unmarked_text);
307  return true;
308}
309
310#endif  // !OS_WIN
311
312bool StringContainsStrongRTLChars(const string16& text) {
313  const UChar* string = text.c_str();
314  size_t length = text.length();
315  size_t position = 0;
316  while (position < length) {
317    UChar32 character;
318    size_t next_position = position;
319    U16_NEXT(string, next_position, length, character);
320
321    // Now that we have the character, we use ICU in order to query for the
322    // appropriate Unicode BiDi character type.
323    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
324    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
325      return true;
326
327    position = next_position;
328  }
329
330  return false;
331}
332
333void WrapStringWithLTRFormatting(string16* text) {
334  if (text->empty())
335    return;
336
337  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
338  text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
339               kLeftToRightEmbeddingMark);
340
341  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
342  text->push_back(kPopDirectionalFormatting);
343}
344
345void WrapStringWithRTLFormatting(string16* text) {
346  if (text->empty())
347    return;
348
349  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
350  text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
351               kRightToLeftEmbeddingMark);
352
353  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
354  text->push_back(kPopDirectionalFormatting);
355}
356
357void WrapPathWithLTRFormatting(const FilePath& path,
358                               string16* rtl_safe_path) {
359  // Wrap the overall path with LRE-PDF pair which essentialy marks the
360  // string as a Left-To-Right string.
361  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
362  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
363#if defined(OS_MACOSX)
364    rtl_safe_path->append(UTF8ToUTF16(path.value()));
365#elif defined(OS_WIN)
366    rtl_safe_path->append(path.value());
367#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
368    std::wstring wide_path = base::SysNativeMBToWide(path.value());
369    rtl_safe_path->append(WideToUTF16(wide_path));
370#endif
371  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
372  rtl_safe_path->push_back(kPopDirectionalFormatting);
373}
374
375string16 GetDisplayStringInLTRDirectionality(const string16& text) {
376  // Always wrap the string in RTL UI (it may be appended to RTL string).
377  // Also wrap strings with an RTL first strong character direction in LTR UI.
378  if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
379    string16 text_mutable(text);
380    WrapStringWithLTRFormatting(&text_mutable);
381    return text_mutable;
382  }
383  return text;
384}
385
386string16 StripWrappingBidiControlCharacters(const string16& text) {
387  if (text.empty())
388    return text;
389  size_t begin_index = 0;
390  char16 begin = text[begin_index];
391  if (begin == kLeftToRightEmbeddingMark ||
392      begin == kRightToLeftEmbeddingMark ||
393      begin == kLeftToRightOverride ||
394      begin == kRightToLeftOverride)
395    ++begin_index;
396  size_t end_index = text.length() - 1;
397  if (text[end_index] == kPopDirectionalFormatting)
398    --end_index;
399  return text.substr(begin_index, end_index - begin_index + 1);
400}
401
402}  // namespace i18n
403}  // namespace base
404