rtl.cc revision 6e8cce623b6e4fe0c9e4af605d675dd9d0338c38
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2011 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/i18n/rtl.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/files/file_path.h"
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
9868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/string_util.h"
102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "base/strings/sys_string_conversions.h"
11868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "base/strings/utf_string_conversions.h"
12ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/common/unicode/locid.h"
13ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/common/unicode/uchar.h"
14ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/common/unicode/uscript.h"
15ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "third_party/icu/source/i18n/unicode/coll.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Extract language, country and variant, but ignore keywords.  For example,
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// en-US, ca@valencia, ca-ES@valencia.
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string GetLocaleString(const icu::Locale& locale) {
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* language = locale.getLanguage();
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* country = locale.getCountry();
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const char* variant = locale.getVariant();
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string result =
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (language != NULL && *language != '\0') ? language : "und";
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (country != NULL && *country != '\0') {
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    result += '-';
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    result += country;
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (variant != NULL && *variant != '\0') {
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string variant_str(variant);
366e8cce623b6e4fe0c9e4af605d675dd9d0338c38Torne (Richard Coles)    base::StringToLowerASCII(&variant_str);
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    result += '@' + variant_str;
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return result;
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to
452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// http://unicode.org/reports/tr9/ for more information.
462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)base::i18n::TextDirection GetCharacterDirection(UChar32 character) {
472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Now that we have the character, we use ICU in order to query for the
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // appropriate Unicode BiDi character type.
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if ((property == U_RIGHT_TO_LEFT) ||
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      (property == U_RIGHT_TO_LEFT_ARABIC) ||
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      (property == U_RIGHT_TO_LEFT_EMBEDDING) ||
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      (property == U_RIGHT_TO_LEFT_OVERRIDE)) {
542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return base::i18n::RIGHT_TO_LEFT;
552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  } else if ((property == U_LEFT_TO_RIGHT) ||
562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)             (property == U_LEFT_TO_RIGHT_EMBEDDING) ||
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)             (property == U_LEFT_TO_RIGHT_OVERRIDE)) {
582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return base::i18n::LEFT_TO_RIGHT;
592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return base::i18n::UNKNOWN_DIRECTION;
612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace base {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace i18n {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Represents the locale-specific ICU text direction.
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Convert the ICU default locale to a string.
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string GetConfiguredLocale() {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return GetLocaleString(icu::Locale::getDefault());
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Convert the ICU canonicalized locale to a string.
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string GetCanonicalLocale(const char* locale) {
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return GetLocaleString(icu::Locale::createCanonical(locale));
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Convert Chrome locale name to ICU locale name
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string ICULocaleName(const std::string& locale_string) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If not Spanish, just return it.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (locale_string.substr(0, 2) != "es")
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return locale_string;
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Expand es to es-ES.
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (LowerCaseEqualsASCII(locale_string, "es"))
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "es-ES";
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Map es-419 (Latin American Spanish) to es-FOO depending on the system
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // locale.  If it's es-RR other than es-ES, map to es-RR. Otherwise, map
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to es-MX (the most populous in Spanish-speaking Latin America).
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (LowerCaseEqualsASCII(locale_string, "es-419")) {
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const icu::Locale& locale = icu::Locale::getDefault();
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string language = locale.getLanguage();
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const char* country = locale.getCountry();
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (LowerCaseEqualsASCII(language, "es") &&
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      !LowerCaseEqualsASCII(country, "es")) {
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        language += '-';
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        language += country;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        return language;
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return "es-MX";
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Currently, Chrome has only "es" and "es-419", but later we may have
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // more specific "es-RR".
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return locale_string;
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void SetICUDefaultLocale(const std::string& locale_string) {
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  icu::Locale locale(ICULocaleName(locale_string).c_str());
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UErrorCode error_code = U_ZERO_ERROR;
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  icu::Locale::setDefault(locale, error_code);
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This return value is actually bogus because Locale object is
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // an ID and setDefault seems to always succeed (regardless of the
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // presence of actual locale data). However,
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // it does not hurt to have it as a sanity check.
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(U_SUCCESS(error_code));
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  g_icu_text_direction = UNKNOWN_DIRECTION;
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool IsRTL() {
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ICUIsRTL();
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ICUIsRTL() {
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (g_icu_text_direction == UNKNOWN_DIRECTION) {
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const icu::Locale& locale = icu::Locale::getDefault();
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    g_icu_text_direction = GetTextDirectionForLocale(locale.getName());
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return g_icu_text_direction == RIGHT_TO_LEFT;
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TextDirection GetTextDirectionForLocale(const char* locale_name) {
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UErrorCode status = U_ZERO_ERROR;
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status);
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(U_SUCCESS(status));
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Treat anything other than RTL as LTR.
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT;
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)TextDirection GetFirstStrongCharacterDirection(const string16& text) {
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const UChar* string = text.c_str();
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t length = text.length();
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t position = 0;
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (position < length) {
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UChar32 character;
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t next_position = position;
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    U16_NEXT(string, next_position, length, character);
1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    TextDirection direction = GetCharacterDirection(character);
1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (direction != UNKNOWN_DIRECTION)
1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      return direction;
1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    position = next_position;
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
1545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  return LEFT_TO_RIGHT;
1555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)}
1565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)
1575d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)TextDirection GetLastStrongCharacterDirection(const string16& text) {
1585d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  const UChar* string = text.c_str();
1595d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  size_t position = text.length();
1605d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  while (position > 0) {
1615d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    UChar32 character;
1625d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    size_t prev_position = position;
1635d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    U16_PREV(string, 0, prev_position, character);
1645d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    TextDirection direction = GetCharacterDirection(character);
1655d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    if (direction != UNKNOWN_DIRECTION)
1665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)      return direction;
1675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)    position = prev_position;
1685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)  }
1692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return LEFT_TO_RIGHT;
1702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)TextDirection GetStringDirection(const string16& text) {
1732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const UChar* string = text.c_str();
1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  size_t length = text.length();
1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  size_t position = 0;
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  TextDirection result(UNKNOWN_DIRECTION);
1782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  while (position < length) {
1792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    UChar32 character;
1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    size_t next_position = position;
1812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    U16_NEXT(string, next_position, length, character);
1822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    TextDirection direction = GetCharacterDirection(character);
1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (direction != UNKNOWN_DIRECTION) {
1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (result != UNKNOWN_DIRECTION && result != direction)
1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        return UNKNOWN_DIRECTION;
1862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      result = direction;
1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    position = next_position;
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Handle the case of a string not containing any strong directionality
1922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // characters defaulting to LEFT_TO_RIGHT.
1932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (result == UNKNOWN_DIRECTION)
1942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return LEFT_TO_RIGHT;
1952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return result;
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_WIN)
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool AdjustStringForLocaleDirection(string16* text) {
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!IsRTL() || text->empty())
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Marking the string as LTR if the locale is RTL and the string does not
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // contain strong RTL characters. Otherwise, mark the string as RTL.
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!has_rtl_chars)
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WrapStringWithLTRFormatting(text);
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  else
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WrapStringWithRTLFormatting(text);
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UnadjustStringForLocaleDirection(string16* text) {
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!IsRTL() || text->empty())
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *text = StripWrappingBidiControlCharacters(*text);
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool AdjustStringForLocaleDirection(string16* text) {
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // On OS X & GTK the directionality of a label is determined by the first
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // strongly directional character.
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // However, we want to make sure that in an LTR-language-UI all strings are
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // left aligned and vice versa.
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // A problem can arise if we display a string which starts with user input.
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // User input may be of the opposite directionality to the UI. So the whole
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // string will be displayed in the opposite directionality, e.g. if we want to
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // display in an LTR UI [such as US English]:
2325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // EMAN_NOISNETXE is now installed.
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Since EXTENSION_NAME begins with a strong RTL char, the label's
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // directionality will be set to RTL and the string will be displayed visually
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // as:
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // .is now installed EMAN_NOISNETXE
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // In order to solve this issue, we prepend an LRM to the string. An LRM is a
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // strongly directional LTR char.
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // We also append an LRM at the end, which ensures that we're in an LTR
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // context.
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // box so there is no issue with displaying zero-width bidi control characters
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // on any system.  Thus no need for the !IsRTL() check here.
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (text->empty())
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ui_direction_is_rtl = IsRTL();
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool has_rtl_chars = StringContainsStrongRTLChars(*text);
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!ui_direction_is_rtl && has_rtl_chars) {
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WrapStringWithRTLFormatting(text);
257a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
258a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                 kLeftToRightMark);
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    text->push_back(kLeftToRightMark);
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (ui_direction_is_rtl && has_rtl_chars) {
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WrapStringWithRTLFormatting(text);
262a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
263a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                 kRightToLeftMark);
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    text->push_back(kRightToLeftMark);
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (ui_direction_is_rtl) {
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WrapStringWithLTRFormatting(text);
267a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
268a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                 kRightToLeftMark);
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    text->push_back(kRightToLeftMark);
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool UnadjustStringForLocaleDirection(string16* text) {
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (text->empty())
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t begin_index = 0;
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char16 begin = text->at(begin_index);
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (begin == kLeftToRightMark ||
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      begin == kRightToLeftMark) {
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++begin_index;
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t end_index = text->length() - 1;
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char16 end = text->at(end_index);
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (end == kLeftToRightMark ||
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      end == kRightToLeftMark) {
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    --end_index;
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  string16 unmarked_text =
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      text->substr(begin_index, end_index - begin_index + 1);
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  *text = StripWrappingBidiControlCharacters(unmarked_text);
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // !OS_WIN
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool StringContainsStrongRTLChars(const string16& text) {
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const UChar* string = text.c_str();
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t length = text.length();
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t position = 0;
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while (position < length) {
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UChar32 character;
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t next_position = position;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    U16_NEXT(string, next_position, length, character);
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Now that we have the character, we use ICU in order to query for the
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // appropriate Unicode BiDi character type.
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS);
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC))
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return true;
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    position = next_position;
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return false;
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void WrapStringWithLTRFormatting(string16* text) {
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (text->empty())
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
329a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
330a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)               kLeftToRightEmbeddingMark);
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  text->push_back(kPopDirectionalFormatting);
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void WrapStringWithRTLFormatting(string16* text) {
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (text->empty())
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Inserting an RLE (Right-To-Left Embedding) mark as the first character.
341a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  text->insert(static_cast<size_t>(0), static_cast<size_t>(1),
342a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)               kRightToLeftEmbeddingMark);
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  text->push_back(kPopDirectionalFormatting);
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void WrapPathWithLTRFormatting(const FilePath& path,
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                               string16* rtl_safe_path) {
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Wrap the overall path with LRE-PDF pair which essentialy marks the
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // string as a Left-To-Right string.
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Inserting an LRE (Left-To-Right Embedding) mark as the first character.
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rtl_safe_path->push_back(kLeftToRightEmbeddingMark);
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#if defined(OS_MACOSX)
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rtl_safe_path->append(UTF8ToUTF16(path.value()));
3565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#elif defined(OS_WIN)
3575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rtl_safe_path->append(path.value());
3585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#else  // defined(OS_POSIX) && !defined(OS_MACOSX)
3595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::wstring wide_path = base::SysNativeMBToWide(path.value());
3605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    rtl_safe_path->append(WideToUTF16(wide_path));
3615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif
3625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Inserting a PDF (Pop Directional Formatting) mark as the last character.
3635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  rtl_safe_path->push_back(kPopDirectionalFormatting);
3645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string16 GetDisplayStringInLTRDirectionality(const string16& text) {
3675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Always wrap the string in RTL UI (it may be appended to RTL string).
3685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Also wrap strings with an RTL first strong character direction in LTR UI.
3695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) {
3705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    string16 text_mutable(text);
3715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    WrapStringWithLTRFormatting(&text_mutable);
3725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return text_mutable;
3735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return text;
3755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)string16 StripWrappingBidiControlCharacters(const string16& text) {
3785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (text.empty())
3795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return text;
3805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t begin_index = 0;
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  char16 begin = text[begin_index];
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (begin == kLeftToRightEmbeddingMark ||
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      begin == kRightToLeftEmbeddingMark ||
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      begin == kLeftToRightOverride ||
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      begin == kRightToLeftOverride)
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++begin_index;
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t end_index = text.length() - 1;
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (text[end_index] == kPopDirectionalFormatting)
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    --end_index;
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return text.substr(begin_index, end_index - begin_index + 1);
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace i18n
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace base
395