rtl.cc revision 4a5e2dc747d50c653511c68ccb2cfbfb740bd5a7
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include "base/file_path.h" 8#include "base/logging.h" 9#include "base/string_util.h" 10#include "base/utf_string_conversions.h" 11#include "base/sys_string_conversions.h" 12#include "unicode/coll.h" 13#include "unicode/locid.h" 14#include "unicode/uchar.h" 15#include "unicode/uscript.h" 16 17#if defined(TOOLKIT_USES_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace { 22 23// Extract language and country, ignore keywords, concatenate using dash. 24std::string GetLocaleString(const icu::Locale& locale) { 25 const char* language = locale.getLanguage(); 26 const char* country = locale.getCountry(); 27 28 std::string result = 29 (language != NULL && *language != '\0') ? language : "und"; 30 31 if (country != NULL && *country != '\0') { 32 result += '-'; 33 result += country; 34 } 35 36 return result; 37} 38 39} // namespace 40 41namespace base { 42namespace i18n { 43 44// Represents the locale-specific ICU text direction. 45static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 46 47// Convert the ICU default locale to a string. 48std::string GetConfiguredLocale() { 49 return GetLocaleString(icu::Locale::getDefault()); 50} 51 52// Convert the ICU canonicalized locale to a string. 53std::string GetCanonicalLocale(const char* locale) { 54 return GetLocaleString(icu::Locale::createCanonical(locale)); 55} 56 57// Convert Chrome locale name to ICU locale name 58std::string ICULocaleName(const std::string& locale_string) { 59 // If not Spanish, just return it. 60 if (locale_string.substr(0, 2) != "es") 61 return locale_string; 62 // Expand es to es-ES. 63 if (LowerCaseEqualsASCII(locale_string, "es")) 64 return "es-ES"; 65 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 66 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 67 // to es-MX (the most populous in Spanish-speaking Latin America). 68 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 69 const icu::Locale& locale = icu::Locale::getDefault(); 70 std::string language = locale.getLanguage(); 71 const char* country = locale.getCountry(); 72 if (LowerCaseEqualsASCII(language, "es") && 73 !LowerCaseEqualsASCII(country, "es")) { 74 language += '-'; 75 language += country; 76 return language; 77 } 78 return "es-MX"; 79 } 80 // Currently, Chrome has only "es" and "es-419", but later we may have 81 // more specific "es-RR". 82 return locale_string; 83} 84 85void SetICUDefaultLocale(const std::string& locale_string) { 86 icu::Locale locale(ICULocaleName(locale_string).c_str()); 87 UErrorCode error_code = U_ZERO_ERROR; 88 icu::Locale::setDefault(locale, error_code); 89 // This return value is actually bogus because Locale object is 90 // an ID and setDefault seems to always succeed (regardless of the 91 // presence of actual locale data). However, 92 // it does not hurt to have it as a sanity check. 93 DCHECK(U_SUCCESS(error_code)); 94 g_icu_text_direction = UNKNOWN_DIRECTION; 95 96 // If we use Views toolkit on top of GtkWidget, then we need to keep 97 // GtkWidget's default text direction consistent with ICU's text direction. 98 // Because in this case ICU's text direction will be used instead. 99 // See IsRTL() function below. 100#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK) 101 gtk_widget_set_default_direction( 102 ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); 103#endif 104} 105 106bool IsRTL() { 107#if defined(TOOLKIT_GTK) 108 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 109 return (gtk_dir == GTK_TEXT_DIR_RTL); 110#else 111 return ICUIsRTL(); 112#endif 113} 114 115bool ICUIsRTL() { 116 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 117 const icu::Locale& locale = icu::Locale::getDefault(); 118 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 119 } 120 return g_icu_text_direction == RIGHT_TO_LEFT; 121} 122 123TextDirection GetTextDirectionForLocale(const char* locale_name) { 124 UErrorCode status = U_ZERO_ERROR; 125 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 126 DCHECK(U_SUCCESS(status)); 127 // Treat anything other than RTL as LTR. 128 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 129} 130 131TextDirection GetFirstStrongCharacterDirection(const string16& text) { 132 const UChar* string = text.c_str(); 133 size_t length = text.length(); 134 size_t position = 0; 135 while (position < length) { 136 UChar32 character; 137 size_t next_position = position; 138 U16_NEXT(string, next_position, length, character); 139 140 // Now that we have the character, we use ICU in order to query for the 141 // appropriate Unicode BiDi character type. 142 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 143 if ((property == U_RIGHT_TO_LEFT) || 144 (property == U_RIGHT_TO_LEFT_ARABIC) || 145 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 146 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 147 return RIGHT_TO_LEFT; 148 } else if ((property == U_LEFT_TO_RIGHT) || 149 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 150 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 151 return LEFT_TO_RIGHT; 152 } 153 154 position = next_position; 155 } 156 157 return LEFT_TO_RIGHT; 158} 159 160#if defined(WCHAR_T_IS_UTF32) 161TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { 162 return GetFirstStrongCharacterDirection(WideToUTF16(text)); 163} 164#endif 165 166bool AdjustStringForLocaleDirection(const string16& text, 167 string16* localized_text) { 168 if (!IsRTL() || text.empty()) 169 return false; 170 171 // Marking the string as LTR if the locale is RTL and the string does not 172 // contain strong RTL characters. Otherwise, mark the string as RTL. 173 *localized_text = text; 174 bool has_rtl_chars = StringContainsStrongRTLChars(text); 175 if (!has_rtl_chars) 176 WrapStringWithLTRFormatting(localized_text); 177 else 178 WrapStringWithRTLFormatting(localized_text); 179 180 return true; 181} 182 183#if defined(WCHAR_T_IS_UTF32) 184bool AdjustStringForLocaleDirection(const std::wstring& text, 185 std::wstring* localized_text) { 186 string16 out; 187 if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) { 188 // We should only touch the output on success. 189 *localized_text = UTF16ToWide(out); 190 return true; 191 } 192 return false; 193} 194#endif 195 196bool StringContainsStrongRTLChars(const string16& text) { 197 const UChar* string = text.c_str(); 198 size_t length = text.length(); 199 size_t position = 0; 200 while (position < length) { 201 UChar32 character; 202 size_t next_position = position; 203 U16_NEXT(string, next_position, length, character); 204 205 // Now that we have the character, we use ICU in order to query for the 206 // appropriate Unicode BiDi character type. 207 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 208 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 209 return true; 210 211 position = next_position; 212 } 213 214 return false; 215} 216 217#if defined(WCHAR_T_IS_UTF32) 218bool StringContainsStrongRTLChars(const std::wstring& text) { 219 return StringContainsStrongRTLChars(WideToUTF16(text)); 220} 221#endif 222 223void WrapStringWithLTRFormatting(string16* text) { 224 if (text->empty()) 225 return; 226 227 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 228 text->insert(0, 1, kLeftToRightEmbeddingMark); 229 230 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 231 text->push_back(kPopDirectionalFormatting); 232} 233 234#if defined(WCHAR_T_IS_UTF32) 235void WrapStringWithLTRFormatting(std::wstring* text) { 236 if (text->empty()) 237 return; 238 239 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 240 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); 241 242 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 243 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 244} 245#endif 246 247void WrapStringWithRTLFormatting(string16* text) { 248 if (text->empty()) 249 return; 250 251 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 252 text->insert(0, 1, kRightToLeftEmbeddingMark); 253 254 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 255 text->push_back(kPopDirectionalFormatting); 256} 257 258#if defined(WCHAR_T_IS_UTF32) 259void WrapStringWithRTLFormatting(std::wstring* text) { 260 if (text->empty()) 261 return; 262 263 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 264 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); 265 266 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 267 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 268} 269#endif 270 271void WrapPathWithLTRFormatting(const FilePath& path, 272 string16* rtl_safe_path) { 273 // Wrap the overall path with LRE-PDF pair which essentialy marks the 274 // string as a Left-To-Right string. 275 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 276 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 277#if defined(OS_MACOSX) 278 rtl_safe_path->append(UTF8ToUTF16(path.value())); 279#elif defined(OS_WIN) 280 rtl_safe_path->append(path.value()); 281#else // defined(OS_POSIX) && !defined(OS_MACOSX) 282 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 283 rtl_safe_path->append(WideToUTF16(wide_path)); 284#endif 285 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 286 rtl_safe_path->push_back(kPopDirectionalFormatting); 287} 288 289string16 GetDisplayStringInLTRDirectionality(const string16& text) { 290 if (!IsRTL()) 291 return text; 292 string16 text_mutable(text); 293 WrapStringWithLTRFormatting(&text_mutable); 294 return text_mutable; 295} 296 297const string16 StripWrappingBidiControlCharacters(const string16& text) { 298 if (text.empty()) 299 return text; 300 size_t begin_index = 0; 301 char16 begin = text[begin_index]; 302 if (begin == kLeftToRightEmbeddingMark || 303 begin == kRightToLeftEmbeddingMark || 304 begin == kLeftToRightOverride || 305 begin == kRightToLeftOverride) 306 ++begin_index; 307 size_t end_index = text.length() - 1; 308 if (text[end_index] == kPopDirectionalFormatting) 309 --end_index; 310 return text.substr(begin_index, end_index - begin_index + 1); 311} 312 313} // namespace i18n 314} // namespace base 315