rtl.cc revision 513209b27ff55e2841eac0e4120199c23acce758
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include "base/file_path.h" 8#include "base/logging.h" 9#include "base/string_util.h" 10#include "base/utf_string_conversions.h" 11#include "base/sys_string_conversions.h" 12#include "unicode/coll.h" 13#include "unicode/locid.h" 14#include "unicode/uchar.h" 15#include "unicode/uscript.h" 16 17#if defined(TOOLKIT_USES_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace { 22 23// Extract language and country, ignore keywords, concatenate using dash. 24std::string GetLocaleString(const icu::Locale& locale) { 25 const char* language = locale.getLanguage(); 26 const char* country = locale.getCountry(); 27 28 std::string result = 29 (language != NULL && *language != '\0') ? language : "und"; 30 31 if (country != NULL && *country != '\0') { 32 result += '-'; 33 result += country; 34 } 35 36 return result; 37} 38 39} // namespace 40 41namespace base { 42namespace i18n { 43 44// Represents the locale-specific ICU text direction. 45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 46 47#if defined(OS_WIN) 48void GetLanguageAndRegionFromOS(std::string* lang, std::string* region) { 49 // Later we may have to change this to be OS-dependent so that 50 // it's not affected by ICU's default locale. It's all right 51 // to do this way because SetICUDefaultLocale is internal 52 // to this file and we know that it's not yet called when this function 53 // is called. 54 const icu::Locale& locale = icu::Locale::getDefault(); 55 const char* language = locale.getLanguage(); 56 const char* country = locale.getCountry(); 57 DCHECK(language); 58 *lang = language; 59 *region = country; 60} 61#endif 62 63// Convert the ICU default locale to a string. 64std::string GetConfiguredLocale() { 65 return GetLocaleString(icu::Locale::getDefault()); 66} 67 68// Convert the ICU canonicalized locale to a string. 69std::string GetCanonicalLocale(const char* locale) { 70 return GetLocaleString(icu::Locale::createCanonical(locale)); 71} 72 73// Convert Chrome locale name to ICU locale name 74std::string ICULocaleName(const std::string& locale_string) { 75 // If not Spanish, just return it. 76 if (locale_string.substr(0, 2) != "es") 77 return locale_string; 78 // Expand es to es-ES. 79 if (LowerCaseEqualsASCII(locale_string, "es")) 80 return "es-ES"; 81 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 82 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 83 // to es-MX (the most populous in Spanish-speaking Latin America). 84 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 85 const icu::Locale& locale = icu::Locale::getDefault(); 86 std::string language = locale.getLanguage(); 87 const char* country = locale.getCountry(); 88 if (LowerCaseEqualsASCII(language, "es") && 89 !LowerCaseEqualsASCII(country, "es")) { 90 language += '-'; 91 language += country; 92 return language; 93 } 94 return "es-MX"; 95 } 96 // Currently, Chrome has only "es" and "es-419", but later we may have 97 // more specific "es-RR". 98 return locale_string; 99} 100 101void SetICUDefaultLocale(const std::string& locale_string) { 102 icu::Locale locale(ICULocaleName(locale_string).c_str()); 103 UErrorCode error_code = U_ZERO_ERROR; 104 icu::Locale::setDefault(locale, error_code); 105 // This return value is actually bogus because Locale object is 106 // an ID and setDefault seems to always succeed (regardless of the 107 // presence of actual locale data). However, 108 // it does not hurt to have it as a sanity check. 109 DCHECK(U_SUCCESS(error_code)); 110 g_icu_text_direction = UNKNOWN_DIRECTION; 111 112 // If we use Views toolkit on top of GtkWidget, then we need to keep 113 // GtkWidget's default text direction consistent with ICU's text direction. 114 // Because in this case ICU's text direction will be used instead. 115 // See IsRTL() function below. 116#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK) 117 gtk_widget_set_default_direction( 118 ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); 119#endif 120} 121 122bool IsRTL() { 123#if defined(TOOLKIT_GTK) 124 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 125 return (gtk_dir == GTK_TEXT_DIR_RTL); 126#else 127 return ICUIsRTL(); 128#endif 129} 130 131bool ICUIsRTL() { 132 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 133 const icu::Locale& locale = icu::Locale::getDefault(); 134 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 135 } 136 return g_icu_text_direction == RIGHT_TO_LEFT; 137} 138 139TextDirection GetTextDirectionForLocale(const char* locale_name) { 140 UErrorCode status = U_ZERO_ERROR; 141 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 142 DCHECK(U_SUCCESS(status)); 143 // Treat anything other than RTL as LTR. 144 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 145} 146 147TextDirection GetFirstStrongCharacterDirection(const string16& text) { 148 const UChar* string = text.c_str(); 149 size_t length = text.length(); 150 size_t position = 0; 151 while (position < length) { 152 UChar32 character; 153 size_t next_position = position; 154 U16_NEXT(string, next_position, length, character); 155 156 // Now that we have the character, we use ICU in order to query for the 157 // appropriate Unicode BiDi character type. 158 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 159 if ((property == U_RIGHT_TO_LEFT) || 160 (property == U_RIGHT_TO_LEFT_ARABIC) || 161 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 162 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 163 return RIGHT_TO_LEFT; 164 } else if ((property == U_LEFT_TO_RIGHT) || 165 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 166 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 167 return LEFT_TO_RIGHT; 168 } 169 170 position = next_position; 171 } 172 173 return LEFT_TO_RIGHT; 174} 175 176#if defined(WCHAR_T_IS_UTF32) 177TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { 178 return GetFirstStrongCharacterDirection(WideToUTF16(text)); 179} 180#endif 181 182bool AdjustStringForLocaleDirection(const string16& text, 183 string16* localized_text) { 184 if (!IsRTL() || text.empty()) 185 return false; 186 187 // Marking the string as LTR if the locale is RTL and the string does not 188 // contain strong RTL characters. Otherwise, mark the string as RTL. 189 *localized_text = text; 190 bool has_rtl_chars = StringContainsStrongRTLChars(text); 191 if (!has_rtl_chars) 192 WrapStringWithLTRFormatting(localized_text); 193 else 194 WrapStringWithRTLFormatting(localized_text); 195 196 return true; 197} 198 199#if defined(WCHAR_T_IS_UTF32) 200bool AdjustStringForLocaleDirection(const std::wstring& text, 201 std::wstring* localized_text) { 202 string16 out; 203 if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) { 204 // We should only touch the output on success. 205 *localized_text = UTF16ToWide(out); 206 return true; 207 } 208 return false; 209} 210#endif 211 212bool StringContainsStrongRTLChars(const string16& text) { 213 const UChar* string = text.c_str(); 214 size_t length = text.length(); 215 size_t position = 0; 216 while (position < length) { 217 UChar32 character; 218 size_t next_position = position; 219 U16_NEXT(string, next_position, length, character); 220 221 // Now that we have the character, we use ICU in order to query for the 222 // appropriate Unicode BiDi character type. 223 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 224 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 225 return true; 226 227 position = next_position; 228 } 229 230 return false; 231} 232 233#if defined(WCHAR_T_IS_UTF32) 234bool StringContainsStrongRTLChars(const std::wstring& text) { 235 return StringContainsStrongRTLChars(WideToUTF16(text)); 236} 237#endif 238 239void WrapStringWithLTRFormatting(string16* text) { 240 if (text->empty()) 241 return; 242 243 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 244 text->insert(0, 1, kLeftToRightEmbeddingMark); 245 246 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 247 text->push_back(kPopDirectionalFormatting); 248} 249 250#if defined(WCHAR_T_IS_UTF32) 251void WrapStringWithLTRFormatting(std::wstring* text) { 252 if (text->empty()) 253 return; 254 255 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 256 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); 257 258 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 259 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 260} 261#endif 262 263void WrapStringWithRTLFormatting(string16* text) { 264 if (text->empty()) 265 return; 266 267 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 268 text->insert(0, 1, kRightToLeftEmbeddingMark); 269 270 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 271 text->push_back(kPopDirectionalFormatting); 272} 273 274#if defined(WCHAR_T_IS_UTF32) 275void WrapStringWithRTLFormatting(std::wstring* text) { 276 if (text->empty()) 277 return; 278 279 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 280 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); 281 282 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 283 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 284} 285#endif 286 287void WrapPathWithLTRFormatting(const FilePath& path, 288 string16* rtl_safe_path) { 289 // Wrap the overall path with LRE-PDF pair which essentialy marks the 290 // string as a Left-To-Right string. 291 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 292 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 293#if defined(OS_MACOSX) 294 rtl_safe_path->append(UTF8ToUTF16(path.value())); 295#elif defined(OS_WIN) 296 rtl_safe_path->append(path.value()); 297#else // defined(OS_POSIX) && !defined(OS_MACOSX) 298 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 299 rtl_safe_path->append(WideToUTF16(wide_path)); 300#endif 301 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 302 rtl_safe_path->push_back(kPopDirectionalFormatting); 303} 304 305string16 GetDisplayStringInLTRDirectionality(const string16& text) { 306 if (!IsRTL()) 307 return text; 308 string16 text_mutable(text); 309 WrapStringWithLTRFormatting(&text_mutable); 310 return text_mutable; 311} 312 313const string16 StripWrappingBidiControlCharacters(const string16& text) { 314 if (text.empty()) 315 return text; 316 size_t begin_index = 0; 317 char16 begin = text[begin_index]; 318 if (begin == kLeftToRightEmbeddingMark || 319 begin == kRightToLeftEmbeddingMark || 320 begin == kLeftToRightOverride || 321 begin == kRightToLeftOverride) 322 ++begin_index; 323 size_t end_index = text.length() - 1; 324 if (text[end_index] == kPopDirectionalFormatting) 325 --end_index; 326 return text.substr(begin_index, end_index - begin_index + 1); 327} 328 329} // namespace i18n 330} // namespace base 331