1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/files/file_path.h"
8#include "base/logging.h"
9#include "base/strings/string_util.h"
10#include "base/strings/sys_string_conversions.h"
11#include "base/strings/utf_string_conversions.h"
12#include "third_party/icu/source/common/unicode/locid.h"
13#include "third_party/icu/source/common/unicode/uchar.h"
14#include "third_party/icu/source/common/unicode/uscript.h"
15#include "third_party/icu/source/i18n/unicode/coll.h"
16
17namespace {
18
19// Extract language, country and variant, but ignore keywords.  For example,
20// en-US, ca@valencia, ca-ES@valencia.
21std::string GetLocaleString(const icu::Locale& locale) {
22  const char* language = locale.getLanguage();
23  const char* country = locale.getCountry();
24  const char* variant = locale.getVariant();
25
26  std::string result =
27      (language != NULL && *language != '\0') ? language : "und";
28
29  if (country != NULL && *country != '\0') {
30    result += '-';
31    result += country;
32  }
33
34  if (variant != NULL && *variant != '\0') {
35    std::string variant_str(variant);
36    base::StringToLowerASCII(&variant_str);
37    result += '@' + variant_str;
38  }
39
40  return result;
41}
42
43// Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
44// directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
45// http://unicode.org/reports/tr9/ for more information.
46base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
47  // Now that we have the character, we use ICU in order to query for the
48  // appropriate Unicode BiDi character type.
49  int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
50  if ((property == U_RIGHT_TO_LEFT) ||
51      (property == U_RIGHT_TO_LEFT_ARABIC) ||
52      (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
53      (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
54    return base::i18n::RIGHT_TO_LEFT;
55  } else if ((property == U_LEFT_TO_RIGHT) ||
56             (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
57             (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
58    return base::i18n::LEFT_TO_RIGHT;
59  }
60  return base::i18n::UNKNOWN_DIRECTION;
61}
62
63}  // namespace
64
65namespace base {
66namespace i18n {
67
68// Represents the locale-specific ICU text direction.
69static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
70
71// Convert the ICU default locale to a string.
72std::string GetConfiguredLocale() {
73  return GetLocaleString(icu::Locale::getDefault());
74}
75
76// Convert the ICU canonicalized locale to a string.
77std::string GetCanonicalLocale(const char* locale) {
78  return GetLocaleString(icu::Locale::createCanonical(locale));
79}
80
81// Convert Chrome locale name to ICU locale name
82std::string ICULocaleName(const std::string& locale_string) {
83  // If not Spanish, just return it.
84  if (locale_string.substr(0, 2) != "es")
85    return locale_string;
86  // Expand es to es-ES.
87  if (LowerCaseEqualsASCII(locale_string, "es"))
88    return "es-ES";
89  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
90  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
91  // to es-MX (the most populous in Spanish-speaking Latin America).
92  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
93    const icu::Locale& locale = icu::Locale::getDefault();
94    std::string language = locale.getLanguage();
95    const char* country = locale.getCountry();
96    if (LowerCaseEqualsASCII(language, "es") &&
97      !LowerCaseEqualsASCII(country, "es")) {
98        language += '-';
99        language += country;
100        return language;
101    }
102    return "es-MX";
103  }
104  // Currently, Chrome has only "es" and "es-419", but later we may have
105  // more specific "es-RR".
106  return locale_string;
107}
108
109void SetICUDefaultLocale(const std::string& locale_string) {
110  icu::Locale locale(ICULocaleName(locale_string).c_str());
111  UErrorCode error_code = U_ZERO_ERROR;
112  icu::Locale::setDefault(locale, error_code);
113  // This return value is actually bogus because Locale object is
114  // an ID and setDefault seems to always succeed (regardless of the
115  // presence of actual locale data). However,
116  // it does not hurt to have it as a sanity check.
117  DCHECK(U_SUCCESS(error_code));
118  g_icu_text_direction = UNKNOWN_DIRECTION;
119}
120
121bool IsRTL() {
122  return ICUIsRTL();
123}
124
125bool ICUIsRTL() {
126  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
127    const icu::Locale& locale = icu::Locale::getDefault();
128    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
129  }
130  return g_icu_text_direction == RIGHT_TO_LEFT;
131}
132
133TextDirection GetTextDirectionForLocale(const char* locale_name) {
134  UErrorCode status = U_ZERO_ERROR;
135  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
136  DCHECK(U_SUCCESS(status));
137  // Treat anything other than RTL as LTR.
138  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
139}
140
141TextDirection GetFirstStrongCharacterDirection(const string16& text) {
142  const UChar* string = text.c_str();
143  size_t length = text.length();
144  size_t position = 0;
145  while (position < length) {
146    UChar32 character;
147    size_t next_position = position;
148    U16_NEXT(string, next_position, length, character);
149    TextDirection direction = GetCharacterDirection(character);
150    if (direction != UNKNOWN_DIRECTION)
151      return direction;
152    position = next_position;
153  }
154  return LEFT_TO_RIGHT;
155}
156
157TextDirection GetLastStrongCharacterDirection(const string16& text) {
158  const UChar* string = text.c_str();
159  size_t position = text.length();
160  while (position > 0) {
161    UChar32 character;
162    size_t prev_position = position;
163    U16_PREV(string, 0, prev_position, character);
164    TextDirection direction = GetCharacterDirection(character);
165    if (direction != UNKNOWN_DIRECTION)
166      return direction;
167    position = prev_position;
168  }
169  return LEFT_TO_RIGHT;
170}
171
172TextDirection GetStringDirection(const string16& text) {
173  const UChar* string = text.c_str();
174  size_t length = text.length();
175  size_t position = 0;
176
177  TextDirection result(UNKNOWN_DIRECTION);
178  while (position < length) {
179    UChar32 character;
180    size_t next_position = position;
181    U16_NEXT(string, next_position, length, character);
182    TextDirection direction = GetCharacterDirection(character);
183    if (direction != UNKNOWN_DIRECTION) {
184      if (result != UNKNOWN_DIRECTION && result != direction)
185        return UNKNOWN_DIRECTION;
186      result = direction;
187    }
188    position = next_position;
189  }
190
191  // Handle the case of a string not containing any strong directionality
192  // characters defaulting to LEFT_TO_RIGHT.
193  if (result == UNKNOWN_DIRECTION)
194    return LEFT_TO_RIGHT;
195
196  return result;
197}
198
199#if defined(OS_WIN)
200bool AdjustStringForLocaleDirection(string16* text) {
201  if (!IsRTL() || text->empty())
202    return false;
203
204  // Marking the string as LTR if the locale is RTL and the string does not
205  // contain strong RTL characters. Otherwise, mark the string as RTL.
206  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
207  if (!has_rtl_chars)
208    WrapStringWithLTRFormatting(text);
209  else
210    WrapStringWithRTLFormatting(text);
211
212  return true;
213}
214
215bool UnadjustStringForLocaleDirection(string16* text) {
216  if (!IsRTL() || text->empty())
217    return false;
218
219  *text = StripWrappingBidiControlCharacters(*text);
220  return true;
221}
222#else
223bool AdjustStringForLocaleDirection(string16* text) {
224  // On OS X & GTK the directionality of a label is determined by the first
225  // strongly directional character.
226  // However, we want to make sure that in an LTR-language-UI all strings are
227  // left aligned and vice versa.
228  // A problem can arise if we display a string which starts with user input.
229  // User input may be of the opposite directionality to the UI. So the whole
230  // string will be displayed in the opposite directionality, e.g. if we want to
231  // display in an LTR UI [such as US English]:
232  //
233  // EMAN_NOISNETXE is now installed.
234  //
235  // Since EXTENSION_NAME begins with a strong RTL char, the label's
236  // directionality will be set to RTL and the string will be displayed visually
237  // as:
238  //
239  // .is now installed EMAN_NOISNETXE
240  //
241  // In order to solve this issue, we prepend an LRM to the string. An LRM is a
242  // strongly directional LTR char.
243  // We also append an LRM at the end, which ensures that we're in an LTR
244  // context.
245
246  // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
247  // box so there is no issue with displaying zero-width bidi control characters
248  // on any system.  Thus no need for the !IsRTL() check here.
249  if (text->empty())
250    return false;
251
252  bool ui_direction_is_rtl = IsRTL();
253
254  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
255  if (!ui_direction_is_rtl && has_rtl_chars) {
256    WrapStringWithRTLFormatting(text);
257    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
258                 kLeftToRightMark);
259    text->push_back(kLeftToRightMark);
260  } else if (ui_direction_is_rtl && has_rtl_chars) {
261    WrapStringWithRTLFormatting(text);
262    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
263                 kRightToLeftMark);
264    text->push_back(kRightToLeftMark);
265  } else if (ui_direction_is_rtl) {
266    WrapStringWithLTRFormatting(text);
267    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
268                 kRightToLeftMark);
269    text->push_back(kRightToLeftMark);
270  } else {
271    return false;
272  }
273
274  return true;
275}
276
277bool UnadjustStringForLocaleDirection(string16* text) {
278  if (text->empty())
279    return false;
280
281  size_t begin_index = 0;
282  char16 begin = text->at(begin_index);
283  if (begin == kLeftToRightMark ||
284      begin == kRightToLeftMark) {
285    ++begin_index;
286  }
287
288  size_t end_index = text->length() - 1;
289  char16 end = text->at(end_index);
290  if (end == kLeftToRightMark ||
291      end == kRightToLeftMark) {
292    --end_index;
293  }
294
295  string16 unmarked_text =
296      text->substr(begin_index, end_index - begin_index + 1);
297  *text = StripWrappingBidiControlCharacters(unmarked_text);
298  return true;
299}
300
301#endif  // !OS_WIN
302
303bool StringContainsStrongRTLChars(const string16& text) {
304  const UChar* string = text.c_str();
305  size_t length = text.length();
306  size_t position = 0;
307  while (position < length) {
308    UChar32 character;
309    size_t next_position = position;
310    U16_NEXT(string, next_position, length, character);
311
312    // Now that we have the character, we use ICU in order to query for the
313    // appropriate Unicode BiDi character type.
314    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
315    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
316      return true;
317
318    position = next_position;
319  }
320
321  return false;
322}
323
324void WrapStringWithLTRFormatting(string16* text) {
325  if (text->empty())
326    return;
327
328  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
329  text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
330               kLeftToRightEmbeddingMark);
331
332  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
333  text->push_back(kPopDirectionalFormatting);
334}
335
336void WrapStringWithRTLFormatting(string16* text) {
337  if (text->empty())
338    return;
339
340  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
341  text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
342               kRightToLeftEmbeddingMark);
343
344  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
345  text->push_back(kPopDirectionalFormatting);
346}
347
348void WrapPathWithLTRFormatting(const FilePath& path,
349                               string16* rtl_safe_path) {
350  // Wrap the overall path with LRE-PDF pair which essentialy marks the
351  // string as a Left-To-Right string.
352  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
353  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
354#if defined(OS_MACOSX)
355    rtl_safe_path->append(UTF8ToUTF16(path.value()));
356#elif defined(OS_WIN)
357    rtl_safe_path->append(path.value());
358#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
359    std::wstring wide_path = base::SysNativeMBToWide(path.value());
360    rtl_safe_path->append(WideToUTF16(wide_path));
361#endif
362  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
363  rtl_safe_path->push_back(kPopDirectionalFormatting);
364}
365
366string16 GetDisplayStringInLTRDirectionality(const string16& text) {
367  // Always wrap the string in RTL UI (it may be appended to RTL string).
368  // Also wrap strings with an RTL first strong character direction in LTR UI.
369  if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
370    string16 text_mutable(text);
371    WrapStringWithLTRFormatting(&text_mutable);
372    return text_mutable;
373  }
374  return text;
375}
376
377string16 StripWrappingBidiControlCharacters(const string16& text) {
378  if (text.empty())
379    return text;
380  size_t begin_index = 0;
381  char16 begin = text[begin_index];
382  if (begin == kLeftToRightEmbeddingMark ||
383      begin == kRightToLeftEmbeddingMark ||
384      begin == kLeftToRightOverride ||
385      begin == kRightToLeftOverride)
386    ++begin_index;
387  size_t end_index = text.length() - 1;
388  if (text[end_index] == kPopDirectionalFormatting)
389    --end_index;
390  return text.substr(begin_index, end_index - begin_index + 1);
391}
392
393}  // namespace i18n
394}  // namespace base
395