rtl.cc revision 201ade2fbba22bfb27ae029f4d23fca6ded109a0
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include "base/file_path.h" 8#include "base/logging.h" 9#include "base/string_util.h" 10#include "base/utf_string_conversions.h" 11#include "base/sys_string_conversions.h" 12#include "unicode/coll.h" 13#include "unicode/locid.h" 14#include "unicode/uchar.h" 15#include "unicode/uscript.h" 16 17#if defined(TOOLKIT_USES_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace { 22 23// Extract language and country, ignore keywords, concatenate using dash. 24std::string GetLocaleString(const icu::Locale& locale) { 25 const char* language = locale.getLanguage(); 26 const char* country = locale.getCountry(); 27 28 std::string result = 29 (language != NULL && *language != '\0') ? language : "und"; 30 31 if (country != NULL && *country != '\0') { 32 result += '-'; 33 result += country; 34 } 35 36 return result; 37} 38 39} // namespace 40 41namespace base { 42namespace i18n { 43 44// Represents the locale-specific ICU text direction. 45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 46 47// Convert the ICU default locale to a string. 48std::string GetConfiguredLocale() { 49 return GetLocaleString(icu::Locale::getDefault()); 50} 51 52// Convert the ICU canonicalized locale to a string. 53std::string GetCanonicalLocale(const char* locale) { 54 return GetLocaleString(icu::Locale::createCanonical(locale)); 55} 56 57// Convert Chrome locale name to ICU locale name 58std::string ICULocaleName(const std::string& locale_string) { 59 // If not Spanish, just return it. 60 if (locale_string.substr(0, 2) != "es") 61 return locale_string; 62 // Expand es to es-ES. 63 if (LowerCaseEqualsASCII(locale_string, "es")) 64 return "es-ES"; 65 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 66 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 67 // to es-MX (the most populous in Spanish-speaking Latin America). 68 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 69 const icu::Locale& locale = icu::Locale::getDefault(); 70 std::string language = locale.getLanguage(); 71 const char* country = locale.getCountry(); 72 if (LowerCaseEqualsASCII(language, "es") && 73 !LowerCaseEqualsASCII(country, "es")) { 74 language += '-'; 75 language += country; 76 return language; 77 } 78 return "es-MX"; 79 } 80 // Currently, Chrome has only "es" and "es-419", but later we may have 81 // more specific "es-RR". 82 return locale_string; 83} 84 85void SetICUDefaultLocale(const std::string& locale_string) { 86 icu::Locale locale(ICULocaleName(locale_string).c_str()); 87 UErrorCode error_code = U_ZERO_ERROR; 88 icu::Locale::setDefault(locale, error_code); 89 // This return value is actually bogus because Locale object is 90 // an ID and setDefault seems to always succeed (regardless of the 91 // presence of actual locale data). However, 92 // it does not hurt to have it as a sanity check. 93 DCHECK(U_SUCCESS(error_code)); 94 g_icu_text_direction = UNKNOWN_DIRECTION; 95 96 // If we use Views toolkit on top of GtkWidget, then we need to keep 97 // GtkWidget's default text direction consistent with ICU's text direction. 98 // Because in this case ICU's text direction will be used instead. 99 // See IsRTL() function below. 100#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK) 101 gtk_widget_set_default_direction( 102 ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); 103#endif 104} 105 106bool IsRTL() { 107#if defined(TOOLKIT_GTK) 108 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 109 return (gtk_dir == GTK_TEXT_DIR_RTL); 110#else 111 return ICUIsRTL(); 112#endif 113} 114 115bool ICUIsRTL() { 116 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 117 const icu::Locale& locale = icu::Locale::getDefault(); 118 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 119 } 120 return g_icu_text_direction == RIGHT_TO_LEFT; 121} 122 123TextDirection GetTextDirectionForLocale(const char* locale_name) { 124 UErrorCode status = U_ZERO_ERROR; 125 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 126 DCHECK(U_SUCCESS(status)); 127 // Treat anything other than RTL as LTR. 128 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 129} 130 131TextDirection GetFirstStrongCharacterDirection(const string16& text) { 132 const UChar* string = text.c_str(); 133 size_t length = text.length(); 134 size_t position = 0; 135 while (position < length) { 136 UChar32 character; 137 size_t next_position = position; 138 U16_NEXT(string, next_position, length, character); 139 140 // Now that we have the character, we use ICU in order to query for the 141 // appropriate Unicode BiDi character type. 142 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 143 if ((property == U_RIGHT_TO_LEFT) || 144 (property == U_RIGHT_TO_LEFT_ARABIC) || 145 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 146 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 147 return RIGHT_TO_LEFT; 148 } else if ((property == U_LEFT_TO_RIGHT) || 149 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 150 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 151 return LEFT_TO_RIGHT; 152 } 153 154 position = next_position; 155 } 156 157 return LEFT_TO_RIGHT; 158} 159 160#if defined(WCHAR_T_IS_UTF32) 161TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { 162 return GetFirstStrongCharacterDirection(WideToUTF16(text)); 163} 164#endif 165 166bool AdjustStringForLocaleDirection(string16* text) { 167 if (!IsRTL() || text->empty()) 168 return false; 169 170 // Marking the string as LTR if the locale is RTL and the string does not 171 // contain strong RTL characters. Otherwise, mark the string as RTL. 172 bool has_rtl_chars = StringContainsStrongRTLChars(*text); 173 if (!has_rtl_chars) 174 WrapStringWithLTRFormatting(text); 175 else 176 WrapStringWithRTLFormatting(text); 177 178 return true; 179} 180 181#if defined(WCHAR_T_IS_UTF32) 182bool AdjustStringForLocaleDirection(std::wstring* text) { 183 string16 temp = WideToUTF16(*text); 184 if (AdjustStringForLocaleDirection(&temp)) { 185 // We should only touch the output on success. 186 *text = UTF16ToWide(temp); 187 return true; 188 } 189 return false; 190} 191#endif 192 193bool StringContainsStrongRTLChars(const string16& text) { 194 const UChar* string = text.c_str(); 195 size_t length = text.length(); 196 size_t position = 0; 197 while (position < length) { 198 UChar32 character; 199 size_t next_position = position; 200 U16_NEXT(string, next_position, length, character); 201 202 // Now that we have the character, we use ICU in order to query for the 203 // appropriate Unicode BiDi character type. 204 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 205 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 206 return true; 207 208 position = next_position; 209 } 210 211 return false; 212} 213 214#if defined(WCHAR_T_IS_UTF32) 215bool StringContainsStrongRTLChars(const std::wstring& text) { 216 return StringContainsStrongRTLChars(WideToUTF16(text)); 217} 218#endif 219 220void WrapStringWithLTRFormatting(string16* text) { 221 if (text->empty()) 222 return; 223 224 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 225 text->insert(0, 1, kLeftToRightEmbeddingMark); 226 227 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 228 text->push_back(kPopDirectionalFormatting); 229} 230 231#if defined(WCHAR_T_IS_UTF32) 232void WrapStringWithLTRFormatting(std::wstring* text) { 233 if (text->empty()) 234 return; 235 236 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 237 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); 238 239 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 240 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 241} 242#endif 243 244void WrapStringWithRTLFormatting(string16* text) { 245 if (text->empty()) 246 return; 247 248 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 249 text->insert(0, 1, kRightToLeftEmbeddingMark); 250 251 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 252 text->push_back(kPopDirectionalFormatting); 253} 254 255#if defined(WCHAR_T_IS_UTF32) 256void WrapStringWithRTLFormatting(std::wstring* text) { 257 if (text->empty()) 258 return; 259 260 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 261 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); 262 263 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 264 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 265} 266#endif 267 268void WrapPathWithLTRFormatting(const FilePath& path, 269 string16* rtl_safe_path) { 270 // Wrap the overall path with LRE-PDF pair which essentialy marks the 271 // string as a Left-To-Right string. 272 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 273 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 274#if defined(OS_MACOSX) 275 rtl_safe_path->append(UTF8ToUTF16(path.value())); 276#elif defined(OS_WIN) 277 rtl_safe_path->append(path.value()); 278#else // defined(OS_POSIX) && !defined(OS_MACOSX) 279 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 280 rtl_safe_path->append(WideToUTF16(wide_path)); 281#endif 282 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 283 rtl_safe_path->push_back(kPopDirectionalFormatting); 284} 285 286string16 GetDisplayStringInLTRDirectionality(const string16& text) { 287 if (!IsRTL()) 288 return text; 289 string16 text_mutable(text); 290 WrapStringWithLTRFormatting(&text_mutable); 291 return text_mutable; 292} 293 294const string16 StripWrappingBidiControlCharacters(const string16& text) { 295 if (text.empty()) 296 return text; 297 size_t begin_index = 0; 298 char16 begin = text[begin_index]; 299 if (begin == kLeftToRightEmbeddingMark || 300 begin == kRightToLeftEmbeddingMark || 301 begin == kLeftToRightOverride || 302 begin == kRightToLeftOverride) 303 ++begin_index; 304 size_t end_index = text.length() - 1; 305 if (text[end_index] == kPopDirectionalFormatting) 306 --end_index; 307 return text.substr(begin_index, end_index - begin_index + 1); 308} 309 310} // namespace i18n 311} // namespace base 312