rtl.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include "base/file_path.h" 8#include "base/logging.h" 9#include "base/string_util.h" 10#include "base/utf_string_conversions.h" 11#include "base/sys_string_conversions.h" 12#include "unicode/coll.h" 13#include "unicode/locid.h" 14#include "unicode/uchar.h" 15#include "unicode/uscript.h" 16 17#if defined(TOOLKIT_USES_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace base { 22namespace i18n { 23 24// Represents the locale-specific ICU text direction. 25static TextDirection g_icu_text_direction = UNKNOWN_DIRECTION; 26 27void GetLanguageAndRegionFromOS(std::string* lang, std::string* region) { 28 // Later we may have to change this to be OS-dependent so that 29 // it's not affected by ICU's default locale. It's all right 30 // to do this way because SetICUDefaultLocale is internal 31 // to this file and we know that it's not yet called when this function 32 // is called. 33 icu::Locale locale = icu::Locale::getDefault(); 34 const char* language = locale.getLanguage(); 35 const char* country = locale.getCountry(); 36 DCHECK(language); 37 *lang = language; 38 *region = country; 39} 40 41// Convert Chrome locale name to ICU locale name 42std::string ICULocaleName(const std::string& locale_string) { 43 // If not Spanish, just return it. 44 if (locale_string.substr(0, 2) != "es") 45 return locale_string; 46 // Expand es to es-ES. 47 if (LowerCaseEqualsASCII(locale_string, "es")) 48 return "es-ES"; 49 // Map es-419 (Latin American Spanish) to es-FOO depending on the system 50 // locale. If it's es-RR other than es-ES, map to es-RR. Otherwise, map 51 // to es-MX (the most populous in Spanish-speaking Latin America). 52 if (LowerCaseEqualsASCII(locale_string, "es-419")) { 53 std::string lang, region; 54 GetLanguageAndRegionFromOS(&lang, ®ion); 55 if (LowerCaseEqualsASCII(lang, "es") && 56 !LowerCaseEqualsASCII(region, "es")) { 57 lang.append("-"); 58 lang.append(region); 59 return lang; 60 } 61 return "es-MX"; 62 } 63 // Currently, Chrome has only "es" and "es-419", but later we may have 64 // more specific "es-RR". 65 return locale_string; 66} 67 68void SetICUDefaultLocale(const std::string& locale_string) { 69 icu::Locale locale(ICULocaleName(locale_string).c_str()); 70 UErrorCode error_code = U_ZERO_ERROR; 71 icu::Locale::setDefault(locale, error_code); 72 // This return value is actually bogus because Locale object is 73 // an ID and setDefault seems to always succeed (regardless of the 74 // presence of actual locale data). However, 75 // it does not hurt to have it as a sanity check. 76 DCHECK(U_SUCCESS(error_code)); 77 g_icu_text_direction = UNKNOWN_DIRECTION; 78 79 // If we use Views toolkit on top of GtkWidget, then we need to keep 80 // GtkWidget's default text direction consistent with ICU's text direction. 81 // Because in this case ICU's text direction will be used instead. 82 // See IsRTL() function below. 83#if defined(TOOLKIT_USES_GTK) && !defined(TOOLKIT_GTK) 84 gtk_widget_set_default_direction( 85 ICUIsRTL() ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); 86#endif 87} 88 89bool IsRTL() { 90#if defined(TOOLKIT_GTK) 91 GtkTextDirection gtk_dir = gtk_widget_get_default_direction(); 92 return (gtk_dir == GTK_TEXT_DIR_RTL); 93#else 94 return ICUIsRTL(); 95#endif 96} 97 98bool ICUIsRTL() { 99 if (g_icu_text_direction == UNKNOWN_DIRECTION) { 100 const icu::Locale& locale = icu::Locale::getDefault(); 101 g_icu_text_direction = GetTextDirectionForLocale(locale.getName()); 102 } 103 return g_icu_text_direction == RIGHT_TO_LEFT; 104} 105 106TextDirection GetTextDirectionForLocale(const char* locale_name) { 107 UErrorCode status = U_ZERO_ERROR; 108 ULayoutType layout_dir = uloc_getCharacterOrientation(locale_name, &status); 109 DCHECK(U_SUCCESS(status)); 110 // Treat anything other than RTL as LTR. 111 return (layout_dir != ULOC_LAYOUT_RTL) ? LEFT_TO_RIGHT : RIGHT_TO_LEFT; 112} 113 114TextDirection GetFirstStrongCharacterDirection(const string16& text) { 115 const UChar* string = text.c_str(); 116 size_t length = text.length(); 117 size_t position = 0; 118 while (position < length) { 119 UChar32 character; 120 size_t next_position = position; 121 U16_NEXT(string, next_position, length, character); 122 123 // Now that we have the character, we use ICU in order to query for the 124 // appropriate Unicode BiDi character type. 125 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 126 if ((property == U_RIGHT_TO_LEFT) || 127 (property == U_RIGHT_TO_LEFT_ARABIC) || 128 (property == U_RIGHT_TO_LEFT_EMBEDDING) || 129 (property == U_RIGHT_TO_LEFT_OVERRIDE)) { 130 return RIGHT_TO_LEFT; 131 } else if ((property == U_LEFT_TO_RIGHT) || 132 (property == U_LEFT_TO_RIGHT_EMBEDDING) || 133 (property == U_LEFT_TO_RIGHT_OVERRIDE)) { 134 return LEFT_TO_RIGHT; 135 } 136 137 position = next_position; 138 } 139 140 return LEFT_TO_RIGHT; 141} 142 143#if defined(WCHAR_T_IS_UTF32) 144TextDirection GetFirstStrongCharacterDirection(const std::wstring& text) { 145 return GetFirstStrongCharacterDirection(WideToUTF16(text)); 146} 147#endif 148 149bool AdjustStringForLocaleDirection(const string16& text, 150 string16* localized_text) { 151 if (!IsRTL() || text.empty()) 152 return false; 153 154 // Marking the string as LTR if the locale is RTL and the string does not 155 // contain strong RTL characters. Otherwise, mark the string as RTL. 156 *localized_text = text; 157 bool has_rtl_chars = StringContainsStrongRTLChars(text); 158 if (!has_rtl_chars) 159 WrapStringWithLTRFormatting(localized_text); 160 else 161 WrapStringWithRTLFormatting(localized_text); 162 163 return true; 164} 165 166#if defined(WCHAR_T_IS_UTF32) 167bool AdjustStringForLocaleDirection(const std::wstring& text, 168 std::wstring* localized_text) { 169 string16 out; 170 if (AdjustStringForLocaleDirection(WideToUTF16(text), &out)) { 171 // We should only touch the output on success. 172 *localized_text = UTF16ToWide(out); 173 return true; 174 } 175 return false; 176} 177#endif 178 179bool StringContainsStrongRTLChars(const string16& text) { 180 const UChar* string = text.c_str(); 181 size_t length = text.length(); 182 size_t position = 0; 183 while (position < length) { 184 UChar32 character; 185 size_t next_position = position; 186 U16_NEXT(string, next_position, length, character); 187 188 // Now that we have the character, we use ICU in order to query for the 189 // appropriate Unicode BiDi character type. 190 int32_t property = u_getIntPropertyValue(character, UCHAR_BIDI_CLASS); 191 if ((property == U_RIGHT_TO_LEFT) || (property == U_RIGHT_TO_LEFT_ARABIC)) 192 return true; 193 194 position = next_position; 195 } 196 197 return false; 198} 199 200#if defined(WCHAR_T_IS_UTF32) 201bool StringContainsStrongRTLChars(const std::wstring& text) { 202 return StringContainsStrongRTLChars(WideToUTF16(text)); 203} 204#endif 205 206void WrapStringWithLTRFormatting(string16* text) { 207 if (text->empty()) 208 return; 209 210 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 211 text->insert(0, 1, kLeftToRightEmbeddingMark); 212 213 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 214 text->push_back(kPopDirectionalFormatting); 215} 216 217#if defined(WCHAR_T_IS_UTF32) 218void WrapStringWithLTRFormatting(std::wstring* text) { 219 if (text->empty()) 220 return; 221 222 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 223 text->insert(0, 1, static_cast<wchar_t>(kLeftToRightEmbeddingMark)); 224 225 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 226 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 227} 228#endif 229 230void WrapStringWithRTLFormatting(string16* text) { 231 if (text->empty()) 232 return; 233 234 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 235 text->insert(0, 1, kRightToLeftEmbeddingMark); 236 237 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 238 text->push_back(kPopDirectionalFormatting); 239} 240 241#if defined(WCHAR_T_IS_UTF32) 242void WrapStringWithRTLFormatting(std::wstring* text) { 243 if (text->empty()) 244 return; 245 246 // Inserting an RLE (Right-To-Left Embedding) mark as the first character. 247 text->insert(0, 1, static_cast<wchar_t>(kRightToLeftEmbeddingMark)); 248 249 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 250 text->push_back(static_cast<wchar_t>(kPopDirectionalFormatting)); 251} 252#endif 253 254void WrapPathWithLTRFormatting(const FilePath& path, 255 string16* rtl_safe_path) { 256 // Wrap the overall path with LRE-PDF pair which essentialy marks the 257 // string as a Left-To-Right string. 258 // Inserting an LRE (Left-To-Right Embedding) mark as the first character. 259 rtl_safe_path->push_back(kLeftToRightEmbeddingMark); 260#if defined(OS_MACOSX) 261 rtl_safe_path->append(UTF8ToUTF16(path.value())); 262#elif defined(OS_WIN) 263 rtl_safe_path->append(path.value()); 264#else // defined(OS_POSIX) && !defined(OS_MACOSX) 265 std::wstring wide_path = base::SysNativeMBToWide(path.value()); 266 rtl_safe_path->append(WideToUTF16(wide_path)); 267#endif 268 // Inserting a PDF (Pop Directional Formatting) mark as the last character. 269 rtl_safe_path->push_back(kPopDirectionalFormatting); 270} 271 272std::wstring GetDisplayStringInLTRDirectionality(std::wstring* text) { 273 if (IsRTL()) 274 WrapStringWithLTRFormatting(text); 275 return *text; 276} 277 278const string16 StripWrappingBidiControlCharacters(const string16& text) { 279 if (text.empty()) 280 return text; 281 size_t begin_index = 0; 282 char16 begin = text[begin_index]; 283 if (begin == kLeftToRightEmbeddingMark || 284 begin == kRightToLeftEmbeddingMark || 285 begin == kLeftToRightOverride || 286 begin == kRightToLeftOverride) 287 ++begin_index; 288 size_t end_index = text.length() - 1; 289 if (text[end_index] == kPopDirectionalFormatting) 290 --end_index; 291 return text.substr(begin_index, end_index - begin_index + 1); 292} 293 294} // namespace i18n 295} // namespace base 296