rtl.cc revision a1401311d1ab56c4ed0a474bd38c108f75cb0cd9
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include "base/files/file_path.h" 8#include "base/logging.h" 9#include "base/strings/string_util.h" 10#include "base/strings/sys_string_conversions.h" 11#include "base/strings/utf_string_conversions.h" 12#include "third_party/icu/source/common/unicode/locid.h" 13#include "third_party/icu/source/common/unicode/uchar.h" 14#include "third_party/icu/source/common/unicode/uscript.h" 15#include "third_party/icu/source/i18n/unicode/coll.h" 16 17#if defined(TOOLKIT_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace { 22 23// Extract language, country and variant, but ignore keywords. For example, 24// en-US, ca@valencia, ca-ES@valencia. 25std::string GetLocaleString(const icu::Locale& locale) { 26 const char* language = locale.getLanguage(); 27 const char* country = locale.getCountry(); 28 const char* variant = locale.getVariant(); 29 30 std::string result = 31 (language != NULL && *language != '\0') ? language : "und"; 32 33 if (country != NULL && *country != '\0') { 34 result += '-'; 35 result += country; 36 } 37 38 if (variant != NULL && *variant != '\0') { 39 std::string variant_str(variant); 40 StringToLowerASCII(&variant_str); 41 result += '@' + variant_str; 42 } 43 44 return result; 45} 46 47// Returns LEFT_TO_RIGHT or RIGHT_TO_LEFT if |character| has strong 48// directionality, returns UNKNOWN_DIRECTION if it doesn't. Please refer to 49// http://unicode.org/reports/tr9/ for more information. 50base::i18n::TextDirection GetCharacterDirection(UChar32 character) { 51 // Now that we have the character, we use ICU in order to query for the 52 // appropriate Unicode BiDi character type. 53 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 54 if ((property == U_RIGHT_TO_LEFT) || 55 (property == U_RIGHT_TO_LEFT_ARABIC) || 56 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 57 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 58 return base::i18n::RIGHT_TO_LEFT; 59 } else if ((property == U_LEFT_TO_RIGHT) || 60 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 61 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 62 return base::i18n::LEFT_TO_RIGHT; 63 } 64 return base::i18n::UNKNOWN_DIRECTION; 65} 66 67} // namespace 68 69namespace base { 70namespace i18n { 71 72// Represents the locale-specific ICU text direction. 73static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 74 75// Convert the ICU default locale to a string. 76std::string GetConfiguredLocale() { 77 return GetLocaleString(icu::Locale::getDefault()); 78} 79 80// Convert the ICU canonicalized locale to a string. 81std::string GetCanonicalLocale(const char* locale) { 82 return GetLocaleString(icu::Locale::createCanonical(locale)); 83} 84 85// Convert Chrome locale name to ICU locale name 86std::string ICULocaleName(const std::string& locale_string) { 87 // If not Spanish, just return it. 88 if (locale_string.substr(0, 2) != "es") 89 return locale_string; 90 // Expand es to es-ES. 91 if (LowerCaseEqualsASCII(locale_string, "es")) 92 return "es-ES"; 93 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 94 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 95 // to es-MX (the most populous in Spanish-speaking Latin America). 96 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 97 const icu::Locale& locale = icu::Locale::getDefault(); 98 std::string language = locale.getLanguage(); 99 const char* country = locale.getCountry(); 100 if (LowerCaseEqualsASCII(language, "es") && 101 !LowerCaseEqualsASCII(country, "es")) { 102 language += '-'; 103 language += country; 104 return language; 105 } 106 return "es-MX"; 107 } 108 // Currently, Chrome has only "es" and "es-419", but later we may have 109 // more specific "es-RR". 110 return locale_string; 111} 112 113void SetICUDefaultLocale(const std::string& locale_string) { 114 icu::Locale locale(ICULocaleName(locale_string).c_str()); 115 UErrorCode error_code = U_ZERO_ERROR; 116 icu::Locale::setDefault(locale, error_code); 117 // This return value is actually bogus because Locale object is 118 // an ID and setDefault seems to always succeed (regardless of the 119 // presence of actual locale data). However, 120 // it does not hurt to have it as a sanity check. 121 DCHECK(U_SUCCESS(error_code)); 122 g_icu_text_direction = UNKNOWN_DIRECTION; 123} 124 125bool IsRTL() { 126#if defined(TOOLKIT_GTK) 127 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 128 return gtk_dir == GTK_TEXT_DIR_RTL; 129#else 130 return ICUIsRTL(); 131#endif 132} 133 134bool ICUIsRTL() { 135 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 136 const icu::Locale& locale = icu::Locale::getDefault(); 137 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 138 } 139 return g_icu_text_direction == RIGHT_TO_LEFT; 140} 141 142TextDirection GetTextDirectionForLocale(const char* locale_name) { 143 UErrorCode status = U_ZERO_ERROR; 144 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 145 DCHECK(U_SUCCESS(status)); 146 // Treat anything other than RTL as LTR. 147 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 148} 149 150TextDirection GetFirstStrongCharacterDirection(const string16& text) { 151 const UChar* string = text.c_str(); 152 size_t length = text.length(); 153 size_t position = 0; 154 while (position < length) { 155 UChar32 character; 156 size_t next_position = position; 157 U16_NEXT(string, next_position, length, character); 158 TextDirection direction = GetCharacterDirection(character); 159 if (direction != UNKNOWN_DIRECTION) 160 return direction; 161 position = next_position; 162 } 163 return LEFT_TO_RIGHT; 164} 165 166TextDirection GetLastStrongCharacterDirection(const string16& text) { 167 const UChar* string = text.c_str(); 168 size_t position = text.length(); 169 while (position > 0) { 170 UChar32 character; 171 size_t prev_position = position; 172 U16_PREV(string, 0, prev_position, character); 173 TextDirection direction = GetCharacterDirection(character); 174 if (direction != UNKNOWN_DIRECTION) 175 return direction; 176 position = prev_position; 177 } 178 return LEFT_TO_RIGHT; 179} 180 181TextDirection GetStringDirection(const string16& text) { 182 const UChar* string = text.c_str(); 183 size_t length = text.length(); 184 size_t position = 0; 185 186 TextDirection result(UNKNOWN_DIRECTION); 187 while (position < length) { 188 UChar32 character; 189 size_t next_position = position; 190 U16_NEXT(string, next_position, length, character); 191 TextDirection direction = GetCharacterDirection(character); 192 if (direction != UNKNOWN_DIRECTION) { 193 if (result != UNKNOWN_DIRECTION && result != direction) 194 return UNKNOWN_DIRECTION; 195 result = direction; 196 } 197 position = next_position; 198 } 199 200 // Handle the case of a string not containing any strong directionality 201 // characters defaulting to LEFT_TO_RIGHT. 202 if (result == UNKNOWN_DIRECTION) 203 return LEFT_TO_RIGHT; 204 205 return result; 206} 207 208#if defined(OS_WIN) 209bool AdjustStringForLocaleDirection(string16* text) { 210 if (!IsRTL() || text->empty()) 211 return false; 212 213 // Marking the string as LTR if the locale is RTL and the string does not 214 // contain strong RTL characters. Otherwise, mark the string as RTL. 215 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 216 if (!has_rtl_chars) 217 WrapStringWithLTRFormatting(text); 218 else 219 WrapStringWithRTLFormatting(text); 220 221 return true; 222} 223 224bool UnadjustStringForLocaleDirection(string16* text) { 225 if (!IsRTL() || text->empty()) 226 return false; 227 228 *text = StripWrappingBidiControlCharacters(*text); 229 return true; 230} 231#else 232bool AdjustStringForLocaleDirection(string16* text) { 233 // On OS X & GTK the directionality of a label is determined by the first 234 // strongly directional character. 235 // However, we want to make sure that in an LTR-language-UI all strings are 236 // left aligned and vice versa. 237 // A problem can arise if we display a string which starts with user input. 238 // User input may be of the opposite directionality to the UI. So the whole 239 // string will be displayed in the opposite directionality, e.g. if we want to 240 // display in an LTR UI [such as US English]: 241 // 242 // EMAN_NOISNETXE is now installed. 243 // 244 // Since EXTENSION_NAME begins with a strong RTL char, the label's 245 // directionality will be set to RTL and the string will be displayed visually 246 // as: 247 // 248 // .is now installed EMAN_NOISNETXE 249 // 250 // In order to solve this issue, we prepend an LRM to the string. An LRM is a 251 // strongly directional LTR char. 252 // We also append an LRM at the end, which ensures that we're in an LTR 253 // context. 254 255 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the 256 // box so there is no issue with displaying zero-width bidi control characters 257 // on any system. Thus no need for the !IsRTL() check here. 258 if (text->empty()) 259 return false; 260 261 bool ui_direction_is_rtl = IsRTL(); 262 263 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 264 if (!ui_direction_is_rtl && has_rtl_chars) { 265 WrapStringWithRTLFormatting(text); 266 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 267 kLeftToRightMark); 268 text->push_back(kLeftToRightMark); 269 } else if (ui_direction_is_rtl && has_rtl_chars) { 270 WrapStringWithRTLFormatting(text); 271 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 272 kRightToLeftMark); 273 text->push_back(kRightToLeftMark); 274 } else if (ui_direction_is_rtl) { 275 WrapStringWithLTRFormatting(text); 276 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 277 kRightToLeftMark); 278 text->push_back(kRightToLeftMark); 279 } else { 280 return false; 281 } 282 283 return true; 284} 285 286bool UnadjustStringForLocaleDirection(string16* text) { 287 if (text->empty()) 288 return false; 289 290 size_t begin_index = 0; 291 char16 begin = text->at(begin_index); 292 if (begin == kLeftToRightMark || 293 begin == kRightToLeftMark) { 294 ++begin_index; 295 } 296 297 size_t end_index = text->length() - 1; 298 char16 end = text->at(end_index); 299 if (end == kLeftToRightMark || 300 end == kRightToLeftMark) { 301 --end_index; 302 } 303 304 string16 unmarked_text = 305 text->substr(begin_index, end_index - begin_index + 1); 306 *text = StripWrappingBidiControlCharacters(unmarked_text); 307 return true; 308} 309 310#endif // !OS_WIN 311 312bool StringContainsStrongRTLChars(const string16& text) { 313 const UChar* string = text.c_str(); 314 size_t length = text.length(); 315 size_t position = 0; 316 while (position < length) { 317 UChar32 character; 318 size_t next_position = position; 319 U16_NEXT(string, next_position, length, character); 320 321 // Now that we have the character, we use ICU in order to query for the 322 // appropriate Unicode BiDi character type. 323 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 324 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 325 return true; 326 327 position = next_position; 328 } 329 330 return false; 331} 332 333void WrapStringWithLTRFormatting(string16* text) { 334 if (text->empty()) 335 return; 336 337 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 338 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 339 kLeftToRightEmbeddingMark); 340 341 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 342 text->push_back(kPopDirectionalFormatting); 343} 344 345void WrapStringWithRTLFormatting(string16* text) { 346 if (text->empty()) 347 return; 348 349 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 350 text->insert(static_cast<size_t>(0), static_cast<size_t>(1), 351 kRightToLeftEmbeddingMark); 352 353 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 354 text->push_back(kPopDirectionalFormatting); 355} 356 357void WrapPathWithLTRFormatting(const FilePath& path, 358 string16* rtl_safe_path) { 359 // Wrap the overall path with LRE-PDF pair which essentialy marks the 360 // string as a Left-To-Right string. 361 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 362 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 363#if defined(OS_MACOSX) 364 rtl_safe_path->append(UTF8ToUTF16(path.value())); 365#elif defined(OS_WIN) 366 rtl_safe_path->append(path.value()); 367#else // defined(OS_POSIX) && !defined(OS_MACOSX) 368 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 369 rtl_safe_path->append(WideToUTF16(wide_path)); 370#endif 371 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 372 rtl_safe_path->push_back(kPopDirectionalFormatting); 373} 374 375string16 GetDisplayStringInLTRDirectionality(const string16& text) { 376 // Always wrap the string in RTL UI (it may be appended to RTL string). 377 // Also wrap strings with an RTL first strong character direction in LTR UI. 378 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) { 379 string16 text_mutable(text); 380 WrapStringWithLTRFormatting(&text_mutable); 381 return text_mutable; 382 } 383 return text; 384} 385 386string16 StripWrappingBidiControlCharacters(const string16& text) { 387 if (text.empty()) 388 return text; 389 size_t begin_index = 0; 390 char16 begin = text[begin_index]; 391 if (begin == kLeftToRightEmbeddingMark || 392 begin == kRightToLeftEmbeddingMark || 393 begin == kLeftToRightOverride || 394 begin == kRightToLeftOverride) 395 ++begin_index; 396 size_t end_index = text.length() - 1; 397 if (text[end_index] == kPopDirectionalFormatting) 398 --end_index; 399 return text.substr(begin_index, end_index - begin_index + 1); 400} 401 402} // namespace i18n 403} // namespace base 404