rtl_unittest.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/i18n/rtl.h" 6 7#include <algorithm> 8 9#include "base/files/file_path.h" 10#include "base/strings/string_util.h" 11#include "base/strings/sys_string_conversions.h" 12#include "base/strings/utf_string_conversions.h" 13#include "testing/gtest/include/gtest/gtest.h" 14#include "testing/platform_test.h" 15#include "third_party/icu/source/i18n/unicode/usearch.h" 16 17#if defined(TOOLKIT_GTK) 18#include <gtk/gtk.h> 19#endif 20 21namespace base { 22namespace i18n { 23 24namespace { 25 26// A test utility function to set the application default text direction. 27void SetRTL(bool rtl) { 28 // Override the current locale/direction. 29 SetICUDefaultLocale(rtl ? "he" : "en"); 30#if defined(TOOLKIT_GTK) 31 // Do the same for GTK, which does not rely on the ICU default locale. 32 gtk_widget_set_default_direction(rtl ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR); 33#endif 34 EXPECT_EQ(rtl, IsRTL()); 35} 36 37} // namespace 38 39class RTLTest : public PlatformTest { 40}; 41 42TEST_F(RTLTest, GetFirstStrongCharacterDirection) { 43 struct { 44 const wchar_t* text; 45 TextDirection direction; 46 } cases[] = { 47 // Test pure LTR string. 48 { L"foo bar", LEFT_TO_RIGHT }, 49 // Test pure RTL string. 50 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT}, 51 // Test bidi string in which the first character with strong directionality 52 // is a character with type L. 53 { L"foo \x05d0 bar", LEFT_TO_RIGHT }, 54 // Test bidi string in which the first character with strong directionality 55 // is a character with type R. 56 { L"\x05d0 foo bar", RIGHT_TO_LEFT }, 57 // Test bidi string which starts with a character with weak directionality 58 // and in which the first character with strong directionality is a 59 // character with type L. 60 { L"!foo \x05d0 bar", LEFT_TO_RIGHT }, 61 // Test bidi string which starts with a character with weak directionality 62 // and in which the first character with strong directionality is a 63 // character with type R. 64 { L",\x05d0 foo bar", RIGHT_TO_LEFT }, 65 // Test bidi string in which the first character with strong directionality 66 // is a character with type LRE. 67 { L"\x202a \x05d0 foo bar", LEFT_TO_RIGHT }, 68 // Test bidi string in which the first character with strong directionality 69 // is a character with type LRO. 70 { L"\x202d \x05d0 foo bar", LEFT_TO_RIGHT }, 71 // Test bidi string in which the first character with strong directionality 72 // is a character with type RLE. 73 { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT }, 74 // Test bidi string in which the first character with strong directionality 75 // is a character with type RLO. 76 { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT }, 77 // Test bidi string in which the first character with strong directionality 78 // is a character with type AL. 79 { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT }, 80 // Test a string without strong directionality characters. 81 { L",!.{}", LEFT_TO_RIGHT }, 82 // Test empty string. 83 { L"", LEFT_TO_RIGHT }, 84 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to 85 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more 86 // information). 87 { 88#if defined(WCHAR_T_IS_UTF32) 89 L" ! \x10910" L"abc 123", 90#elif defined(WCHAR_T_IS_UTF16) 91 L" ! \xd802\xdd10" L"abc 123", 92#else 93#error wchar_t should be either UTF-16 or UTF-32 94#endif 95 RIGHT_TO_LEFT }, 96 { 97#if defined(WCHAR_T_IS_UTF32) 98 L" ! \x10401" L"abc 123", 99#elif defined(WCHAR_T_IS_UTF16) 100 L" ! \xd801\xdc01" L"abc 123", 101#else 102#error wchar_t should be either UTF-16 or UTF-32 103#endif 104 LEFT_TO_RIGHT }, 105 }; 106 107 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 108 EXPECT_EQ(cases[i].direction, 109 GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text))); 110} 111 112 113// Note that the cases with LRE, LRO, RLE and RLO are invalid for 114// GetLastStrongCharacterDirection because they should be followed by PDF 115// character. 116TEST_F(RTLTest, GetLastStrongCharacterDirection) { 117 struct { 118 const wchar_t* text; 119 TextDirection direction; 120 } cases[] = { 121 // Test pure LTR string. 122 { L"foo bar", LEFT_TO_RIGHT }, 123 // Test pure RTL string. 124 { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT}, 125 // Test bidi string in which the last character with strong directionality 126 // is a character with type L. 127 { L"foo \x05d0 bar", LEFT_TO_RIGHT }, 128 // Test bidi string in which the last character with strong directionality 129 // is a character with type R. 130 { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT }, 131 // Test bidi string which ends with a character with weak directionality 132 // and in which the last character with strong directionality is a 133 // character with type L. 134 { L"!foo \x05d0 bar!", LEFT_TO_RIGHT }, 135 // Test bidi string which ends with a character with weak directionality 136 // and in which the last character with strong directionality is a 137 // character with type R. 138 { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT }, 139 // Test bidi string in which the last character with strong directionality 140 // is a character with type AL. 141 { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT }, 142 // Test a string without strong directionality characters. 143 { L",!.{}", LEFT_TO_RIGHT }, 144 // Test empty string. 145 { L"", LEFT_TO_RIGHT }, 146 // Test characters in non-BMP (e.g. Phoenician letters. Please refer to 147 // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more 148 // information). 149 { 150#if defined(WCHAR_T_IS_UTF32) 151 L"abc 123" L" ! \x10910 !", 152#elif defined(WCHAR_T_IS_UTF16) 153 L"abc 123" L" ! \xd802\xdd10 !", 154#else 155#error wchar_t should be either UTF-16 or UTF-32 156#endif 157 RIGHT_TO_LEFT }, 158 { 159#if defined(WCHAR_T_IS_UTF32) 160 L"abc 123" L" ! \x10401 !", 161#elif defined(WCHAR_T_IS_UTF16) 162 L"abc 123" L" ! \xd801\xdc01 !", 163#else 164#error wchar_t should be either UTF-16 or UTF-32 165#endif 166 LEFT_TO_RIGHT }, 167 }; 168 169 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 170 EXPECT_EQ(cases[i].direction, 171 GetLastStrongCharacterDirection(WideToUTF16(cases[i].text))); 172} 173 174TEST_F(RTLTest, GetStringDirection) { 175 struct { 176 const wchar_t* text; 177 TextDirection direction; 178 } cases[] = { 179 // Test pure LTR string. 180 { L"foobar", LEFT_TO_RIGHT }, 181 { L".foobar", LEFT_TO_RIGHT }, 182 { L"foo, bar", LEFT_TO_RIGHT }, 183 // Test pure LTR with strong directionality characters of type LRE. 184 { L"\x202a\x202a", LEFT_TO_RIGHT }, 185 { L".\x202a\x202a", LEFT_TO_RIGHT }, 186 { L"\x202a, \x202a", LEFT_TO_RIGHT }, 187 // Test pure LTR with strong directionality characters of type LRO. 188 { L"\x202d\x202d", LEFT_TO_RIGHT }, 189 { L".\x202d\x202d", LEFT_TO_RIGHT }, 190 { L"\x202d, \x202d", LEFT_TO_RIGHT }, 191 // Test pure LTR with various types of strong directionality characters. 192 { L"foo \x202a\x202d", LEFT_TO_RIGHT }, 193 { L".\x202d foo \x202a", LEFT_TO_RIGHT }, 194 { L"\x202a, \x202d foo", LEFT_TO_RIGHT }, 195 // Test pure RTL with strong directionality characters of type R. 196 { L"\x05d0\x05d0", RIGHT_TO_LEFT }, 197 { L".\x05d0\x05d0", RIGHT_TO_LEFT }, 198 { L"\x05d0, \x05d0", RIGHT_TO_LEFT }, 199 // Test pure RTL with strong directionality characters of type RLE. 200 { L"\x202b\x202b", RIGHT_TO_LEFT }, 201 { L".\x202b\x202b", RIGHT_TO_LEFT }, 202 { L"\x202b, \x202b", RIGHT_TO_LEFT }, 203 // Test pure RTL with strong directionality characters of type RLO. 204 { L"\x202e\x202e", RIGHT_TO_LEFT }, 205 { L".\x202e\x202e", RIGHT_TO_LEFT }, 206 { L"\x202e, \x202e", RIGHT_TO_LEFT }, 207 // Test pure RTL with strong directionality characters of type AL. 208 { L"\x0622\x0622", RIGHT_TO_LEFT }, 209 { L".\x0622\x0622", RIGHT_TO_LEFT }, 210 { L"\x0622, \x0622", RIGHT_TO_LEFT }, 211 // Test pure RTL with various types of strong directionality characters. 212 { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT }, 213 { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT }, 214 { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT }, 215 // Test bidi strings. 216 { L"foo \x05d0 bar", UNKNOWN_DIRECTION }, 217 { L"\x202b foo bar", UNKNOWN_DIRECTION }, 218 { L"!foo \x0622 bar", UNKNOWN_DIRECTION }, 219 { L"\x202a\x202b", UNKNOWN_DIRECTION }, 220 { L"\x202e\x202d", UNKNOWN_DIRECTION }, 221 { L"\x0622\x202a", UNKNOWN_DIRECTION }, 222 { L"\x202d\x05d0", UNKNOWN_DIRECTION }, 223 // Test a string without strong directionality characters. 224 { L",!.{}", LEFT_TO_RIGHT }, 225 // Test empty string. 226 { L"", LEFT_TO_RIGHT }, 227 { 228#if defined(WCHAR_T_IS_UTF32) 229 L" ! \x10910" L"abc 123", 230#elif defined(WCHAR_T_IS_UTF16) 231 L" ! \xd802\xdd10" L"abc 123", 232#else 233#error wchar_t should be either UTF-16 or UTF-32 234#endif 235 UNKNOWN_DIRECTION }, 236 { 237#if defined(WCHAR_T_IS_UTF32) 238 L" ! \x10401" L"abc 123", 239#elif defined(WCHAR_T_IS_UTF16) 240 L" ! \xd801\xdc01" L"abc 123", 241#else 242#error wchar_t should be either UTF-16 or UTF-32 243#endif 244 LEFT_TO_RIGHT }, 245 }; 246 247 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) 248 EXPECT_EQ(cases[i].direction, 249 GetStringDirection(WideToUTF16(cases[i].text))); 250} 251 252TEST_F(RTLTest, WrapPathWithLTRFormatting) { 253 const wchar_t* cases[] = { 254 // Test common path, such as "c:\foo\bar". 255 L"c:/foo/bar", 256 // Test path with file name, such as "c:\foo\bar\test.jpg". 257 L"c:/foo/bar/test.jpg", 258 // Test path ending with punctuation, such as "c:\(foo)\bar.". 259 L"c:/(foo)/bar.", 260 // Test path ending with separator, such as "c:\foo\bar\". 261 L"c:/foo/bar/", 262 // Test path with RTL character. 263 L"c:/\x05d0", 264 // Test path with 2 level RTL directory names. 265 L"c:/\x05d0/\x0622", 266 // Test path with mixed RTL/LTR directory names and ending with punctuation. 267 L"c:/\x05d0/\x0622/(foo)/b.a.r.", 268 // Test path without driver name, such as "/foo/bar/test/jpg". 269 L"/foo/bar/test.jpg", 270 // Test path start with current directory, such as "./foo". 271 L"./foo", 272 // Test path start with parent directory, such as "../foo/bar.jpg". 273 L"../foo/bar.jpg", 274 // Test absolute path, such as "//foo/bar.jpg". 275 L"//foo/bar.jpg", 276 // Test path with mixed RTL/LTR directory names. 277 L"c:/foo/\x05d0/\x0622/\x05d1.jpg", 278 // Test empty path. 279 L"" 280 }; 281 282 for (size_t i = 0; i < arraysize(cases); ++i) { 283 FilePath path; 284#if defined(OS_WIN) 285 std::wstring win_path(cases[i]); 286 std::replace(win_path.begin(), win_path.end(), '/', '\\'); 287 path = FilePath(win_path); 288 std::wstring wrapped_expected = 289 std::wstring(L"\x202a") + win_path + L"\x202c"; 290#else 291 path = FilePath(base::SysWideToNativeMB(cases[i])); 292 std::wstring wrapped_expected = 293 std::wstring(L"\x202a") + cases[i] + L"\x202c"; 294#endif 295 string16 localized_file_path_string; 296 WrapPathWithLTRFormatting(path, &localized_file_path_string); 297 298 std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string); 299 EXPECT_EQ(wrapped_expected, wrapped_actual); 300 } 301} 302 303TEST_F(RTLTest, WrapString) { 304 const wchar_t* cases[] = { 305 L" . ", 306 L"abc", 307 L"a" L"\x5d0\x5d1", 308 L"a" L"\x5d1" L"b", 309 L"\x5d0\x5d1\x5d2", 310 L"\x5d0\x5d1" L"a", 311 L"\x5d0" L"a" L"\x5d1", 312 }; 313 314 const bool was_rtl = IsRTL(); 315 316 for (size_t i = 0; i < 2; ++i) { 317 // Toggle the application default text direction (to try each direction). 318 SetRTL(!IsRTL()); 319 320 string16 empty; 321 WrapStringWithLTRFormatting(&empty); 322 EXPECT_TRUE(empty.empty()); 323 WrapStringWithRTLFormatting(&empty); 324 EXPECT_TRUE(empty.empty()); 325 326 for (size_t i = 0; i < arraysize(cases); ++i) { 327 string16 input = WideToUTF16(cases[i]); 328 string16 ltr_wrap = input; 329 WrapStringWithLTRFormatting(<r_wrap); 330 EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark); 331 EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input); 332 EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting); 333 334 string16 rtl_wrap = input; 335 WrapStringWithRTLFormatting(&rtl_wrap); 336 EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark); 337 EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input); 338 EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting); 339 } 340 } 341 342 EXPECT_EQ(was_rtl, IsRTL()); 343} 344 345TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) { 346 struct { 347 const wchar_t* path; 348 bool wrap_ltr; 349 bool wrap_rtl; 350 } cases[] = { 351 { L"test", false, true }, 352 { L"test.html", false, true }, 353 { L"\x05d0\x05d1\x05d2", true, true }, 354 { L"\x05d0\x05d1\x05d2.txt", true, true }, 355 { L"\x05d0" L"abc", true, true }, 356 { L"\x05d0" L"abc.txt", true, true }, 357 { L"abc\x05d0\x05d1", false, true }, 358 { L"abc\x05d0\x05d1.jpg", false, true }, 359 }; 360 361 const bool was_rtl = IsRTL(); 362 363 for (size_t i = 0; i < 2; ++i) { 364 // Toggle the application default text direction (to try each direction). 365 SetRTL(!IsRTL()); 366 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) { 367 string16 input = WideToUTF16(cases[i].path); 368 string16 output = GetDisplayStringInLTRDirectionality(input); 369 // Test the expected wrapping behavior for the current UI directionality. 370 if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr) 371 EXPECT_NE(output, input); 372 else 373 EXPECT_EQ(output, input); 374 } 375 } 376 377 EXPECT_EQ(was_rtl, IsRTL()); 378} 379 380TEST_F(RTLTest, GetTextDirection) { 381 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar")); 382 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG")); 383 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he")); 384 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL")); 385 // iw is an obsolete code for Hebrew. 386 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw")); 387 // Although we're not yet localized to Farsi and Urdu, we 388 // do have the text layout direction information for them. 389 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa")); 390 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur")); 391#if 0 392 // Enable these when we include the minimal locale data for Azerbaijani 393 // written in Arabic and Dhivehi. At the moment, our copy of 394 // ICU data does not have entries for them. 395 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab")); 396 // Dhivehi that uses Thaana script. 397 EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv")); 398#endif 399 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en")); 400 // Chinese in China with '-'. 401 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN")); 402 // Filipino : 3-letter code 403 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil")); 404 // Russian 405 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru")); 406 // Japanese that uses multiple scripts 407 EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja")); 408} 409 410TEST_F(RTLTest, UnadjustStringForLocaleDirection) { 411 // These test strings are borrowed from WrapPathWithLTRFormatting 412 const wchar_t* cases[] = { 413 L"foo bar", 414 L"foo \x05d0 bar", 415 L"\x05d0 foo bar", 416 L"!foo \x05d0 bar", 417 L",\x05d0 foo bar", 418 L"\x202a \x05d0 foo bar", 419 L"\x202d \x05d0 foo bar", 420 L"\x202b foo \x05d0 bar", 421 L"\x202e foo \x05d0 bar", 422 L"\x0622 foo \x05d0 bar", 423 }; 424 425 const bool was_rtl = IsRTL(); 426 427 for (size_t i = 0; i < 2; ++i) { 428 // Toggle the application default text direction (to try each direction). 429 SetRTL(!IsRTL()); 430 431 for (size_t i = 0; i < arraysize(cases); ++i) { 432 string16 test_case = WideToUTF16(cases[i]); 433 string16 adjusted_string = test_case; 434 435 if (!AdjustStringForLocaleDirection(&adjusted_string)) 436 continue; 437 438 EXPECT_NE(test_case, adjusted_string); 439 EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string)); 440 EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case 441 << "] with IsRTL() == " << IsRTL(); 442 } 443 } 444 445 EXPECT_EQ(was_rtl, IsRTL()); 446} 447 448} // namespace i18n 449} // namespace base 450