rtl.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include "base/file_path.h" 8#include "base/logging.h" 9#include "base/string_util.h" 10#include "base/utf_string_conversions.h" 11#include "base/sys_string_conversions.h" 12#include "unicode/coll.h" 13#include "unicode/locid.h" 14#include "unicode/uchar.h" 15#include "unicode/uscript.h" 16 17#if defined(TOOLKIT_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace { 22 23// Extract language, country and variant, but ignore keywords. For example, 24// en-US, ca@valencia, ca-ES@valencia. 25std::string GetLocaleString(const icu::Locale& locale) { 26 const char* language = locale.getLanguage(); 27 const char* country = locale.getCountry(); 28 const char* variant = locale.getVariant(); 29 30 std::string result = 31 (language != NULL && *language != '\0') ? language : "und"; 32 33 if (country != NULL && *country != '\0') { 34 result += '-'; 35 result += country; 36 } 37 38 if (variant != NULL && *variant != '\0') { 39 std::string variant_str(variant); 40 StringToLowerASCII(&variant_str); 41 result += '@' + variant_str; 42 } 43 44 return result; 45} 46 47} // namespace 48 49namespace base { 50namespace i18n { 51 52// Represents the locale-specific ICU text direction. 53static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 54 55// Convert the ICU default locale to a string. 56std::string GetConfiguredLocale() { 57 return GetLocaleString(icu::Locale::getDefault()); 58} 59 60// Convert the ICU canonicalized locale to a string. 61std::string GetCanonicalLocale(const char* locale) { 62 return GetLocaleString(icu::Locale::createCanonical(locale)); 63} 64 65// Convert Chrome locale name to ICU locale name 66std::string ICULocaleName(const std::string& locale_string) { 67 // If not Spanish, just return it. 68 if (locale_string.substr(0, 2) != "es") 69 return locale_string; 70 // Expand es to es-ES. 71 if (LowerCaseEqualsASCII(locale_string, "es")) 72 return "es-ES"; 73 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 74 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 75 // to es-MX (the most populous in Spanish-speaking Latin America). 76 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 77 const icu::Locale& locale = icu::Locale::getDefault(); 78 std::string language = locale.getLanguage(); 79 const char* country = locale.getCountry(); 80 if (LowerCaseEqualsASCII(language, "es") && 81 !LowerCaseEqualsASCII(country, "es")) { 82 language += '-'; 83 language += country; 84 return language; 85 } 86 return "es-MX"; 87 } 88 // Currently, Chrome has only "es" and "es-419", but later we may have 89 // more specific "es-RR". 90 return locale_string; 91} 92 93void SetICUDefaultLocale(const std::string& locale_string) { 94 icu::Locale locale(ICULocaleName(locale_string).c_str()); 95 UErrorCode error_code = U_ZERO_ERROR; 96 icu::Locale::setDefault(locale, error_code); 97 // This return value is actually bogus because Locale object is 98 // an ID and setDefault seems to always succeed (regardless of the 99 // presence of actual locale data). However, 100 // it does not hurt to have it as a sanity check. 101 DCHECK(U_SUCCESS(error_code)); 102 g_icu_text_direction = UNKNOWN_DIRECTION; 103} 104 105bool IsRTL() { 106#if defined(TOOLKIT_GTK) 107 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 108 return gtk_dir == GTK_TEXT_DIR_RTL; 109#else 110 return ICUIsRTL(); 111#endif 112} 113 114bool ICUIsRTL() { 115 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 116 const icu::Locale& locale = icu::Locale::getDefault(); 117 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 118 } 119 return g_icu_text_direction == RIGHT_TO_LEFT; 120} 121 122TextDirection GetTextDirectionForLocale(const char* locale_name) { 123 UErrorCode status = U_ZERO_ERROR; 124 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 125 DCHECK(U_SUCCESS(status)); 126 // Treat anything other than RTL as LTR. 127 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 128} 129 130TextDirection GetFirstStrongCharacterDirection(const string16& text) { 131 const UChar* string = text.c_str(); 132 size_t length = text.length(); 133 size_t position = 0; 134 while (position < length) { 135 UChar32 character; 136 size_t next_position = position; 137 U16_NEXT(string, next_position, length, character); 138 139 // Now that we have the character, we use ICU in order to query for the 140 // appropriate Unicode BiDi character type. 141 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 142 if ((property == U_RIGHT_TO_LEFT) || 143 (property == U_RIGHT_TO_LEFT_ARABIC) || 144 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 145 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 146 return RIGHT_TO_LEFT; 147 } else if ((property == U_LEFT_TO_RIGHT) || 148 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 149 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 150 return LEFT_TO_RIGHT; 151 } 152 153 position = next_position; 154 } 155 156 return LEFT_TO_RIGHT; 157} 158 159#if defined(OS_WIN) 160bool AdjustStringForLocaleDirection(string16* text) { 161 if (!IsRTL() || text->empty()) 162 return false; 163 164 // Marking the string as LTR if the locale is RTL and the string does not 165 // contain strong RTL characters. Otherwise, mark the string as RTL. 166 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 167 if (!has_rtl_chars) 168 WrapStringWithLTRFormatting(text); 169 else 170 WrapStringWithRTLFormatting(text); 171 172 return true; 173} 174 175bool UnadjustStringForLocaleDirection(string16* text) { 176 if (!IsRTL() || text->empty()) 177 return false; 178 179 *text = StripWrappingBidiControlCharacters(*text); 180 return true; 181} 182#else 183bool AdjustStringForLocaleDirection(string16* text) { 184 // On OS X & GTK the directionality of a label is determined by the first 185 // strongly directional character. 186 // However, we want to make sure that in an LTR-language-UI all strings are 187 // left aligned and vice versa. 188 // A problem can arise if we display a string which starts with user input. 189 // User input may be of the opposite directionality to the UI. So the whole 190 // string will be displayed in the opposite directionality, e.g. if we want to 191 // display in an LTR UI [such as US English]: 192 // 193 // EMAN_NOISNETXE is now installed. 194 // 195 // Since EXTENSION_NAME begins with a strong RTL char, the label's 196 // directionality will be set to RTL and the string will be displayed visually 197 // as: 198 // 199 // .is now installed EMAN_NOISNETXE 200 // 201 // In order to solve this issue, we prepend an LRM to the string. An LRM is a 202 // strongly directional LTR char. 203 // We also append an LRM at the end, which ensures that we're in an LTR 204 // context. 205 206 // Unlike Windows, Linux and OS X can correctly display RTL glyphs out of the 207 // box so there is no issue with displaying zero-width bidi control characters 208 // on any system. Thus no need for the !IsRTL() check here. 209 if (text->empty()) 210 return false; 211 212 bool ui_direction_is_rtl = IsRTL(); 213 214 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 215 if (!ui_direction_is_rtl && has_rtl_chars) { 216 WrapStringWithRTLFormatting(text); 217 text->insert(0U, 1U, kLeftToRightMark); 218 text->push_back(kLeftToRightMark); 219 } else if (ui_direction_is_rtl && has_rtl_chars) { 220 WrapStringWithRTLFormatting(text); 221 text->insert(0U, 1U, kRightToLeftMark); 222 text->push_back(kRightToLeftMark); 223 } else if (ui_direction_is_rtl) { 224 WrapStringWithLTRFormatting(text); 225 text->insert(0U, 1U, kRightToLeftMark); 226 text->push_back(kRightToLeftMark); 227 } else { 228 return false; 229 } 230 231 return true; 232} 233 234bool UnadjustStringForLocaleDirection(string16* text) { 235 if (text->empty()) 236 return false; 237 238 size_t begin_index = 0; 239 char16 begin = text->at(begin_index); 240 if (begin == kLeftToRightMark || 241 begin == kRightToLeftMark) { 242 ++begin_index; 243 } 244 245 size_t end_index = text->length() - 1; 246 char16 end = text->at(end_index); 247 if (end == kLeftToRightMark || 248 end == kRightToLeftMark) { 249 --end_index; 250 } 251 252 string16 unmarked_text = 253 text->substr(begin_index, end_index - begin_index + 1); 254 *text = StripWrappingBidiControlCharacters(unmarked_text); 255 return true; 256} 257 258#endif // !OS_WIN 259 260bool StringContainsStrongRTLChars(const string16& text) { 261 const UChar* string = text.c_str(); 262 size_t length = text.length(); 263 size_t position = 0; 264 while (position < length) { 265 UChar32 character; 266 size_t next_position = position; 267 U16_NEXT(string, next_position, length, character); 268 269 // Now that we have the character, we use ICU in order to query for the 270 // appropriate Unicode BiDi character type. 271 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 272 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 273 return true; 274 275 position = next_position; 276 } 277 278 return false; 279} 280 281void WrapStringWithLTRFormatting(string16* text) { 282 if (text->empty()) 283 return; 284 285 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 286 text->insert(0U, 1U, kLeftToRightEmbeddingMark); 287 288 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 289 text->push_back(kPopDirectionalFormatting); 290} 291 292void WrapStringWithRTLFormatting(string16* text) { 293 if (text->empty()) 294 return; 295 296 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 297 text->insert(0U, 1U, kRightToLeftEmbeddingMark); 298 299 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 300 text->push_back(kPopDirectionalFormatting); 301} 302 303void WrapPathWithLTRFormatting(const FilePath& path, 304 string16* rtl_safe_path) { 305 // Wrap the overall path with LRE-PDF pair which essentialy marks the 306 // string as a Left-To-Right string. 307 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 308 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 309#if defined(OS_MACOSX) 310 rtl_safe_path->append(UTF8ToUTF16(path.value())); 311#elif defined(OS_WIN) 312 rtl_safe_path->append(path.value()); 313#else // defined(OS_POSIX) && !defined(OS_MACOSX) 314 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 315 rtl_safe_path->append(WideToUTF16(wide_path)); 316#endif 317 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 318 rtl_safe_path->push_back(kPopDirectionalFormatting); 319} 320 321string16 GetDisplayStringInLTRDirectionality(const string16& text) { 322 // Always wrap the string in RTL UI (it may be appended to RTL string). 323 // Also wrap strings with an RTL first strong character direction in LTR UI. 324 if (IsRTL() || GetFirstStrongCharacterDirection(text) == RIGHT_TO_LEFT) { 325 string16 text_mutable(text); 326 WrapStringWithLTRFormatting(&text_mutable); 327 return text_mutable; 328 } 329 return text; 330} 331 332string16 StripWrappingBidiControlCharacters(const string16& text) { 333 if (text.empty()) 334 return text; 335 size_t begin_index = 0; 336 char16 begin = text[begin_index]; 337 if (begin == kLeftToRightEmbeddingMark || 338 begin == kRightToLeftEmbeddingMark || 339 begin == kLeftToRightOverride || 340 begin == kRightToLeftOverride) 341 ++begin_index; 342 size_t end_index = text.length() - 1; 343 if (text[end_index] == kPopDirectionalFormatting) 344 --end_index; 345 return text.substr(begin_index, end_index - begin_index + 1); 346} 347 348} // namespace i18n 349} // namespace base 350