1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include <algorithm>
8
9#include "base/files/file_path.h"
10#include "base/strings/string_util.h"
11#include "base/strings/sys_string_conversions.h"
12#include "base/strings/utf_string_conversions.h"
13#include "testing/gtest/include/gtest/gtest.h"
14#include "testing/platform_test.h"
15#include "third_party/icu/source/i18n/unicode/usearch.h"
16
17namespace base {
18namespace i18n {
19
20namespace {
21
22// A test utility function to set the application default text direction.
23void SetRTL(bool rtl) {
24  // Override the current locale/direction.
25  SetICUDefaultLocale(rtl ? "he" : "en");
26  EXPECT_EQ(rtl, IsRTL());
27}
28
29}  // namespace
30
31class RTLTest : public PlatformTest {
32};
33
34TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
35  struct {
36    const wchar_t* text;
37    TextDirection direction;
38  } cases[] = {
39    // Test pure LTR string.
40    { L"foo bar", LEFT_TO_RIGHT },
41    // Test pure RTL string.
42    { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
43    // Test bidi string in which the first character with strong directionality
44    // is a character with type L.
45    { L"foo \x05d0 bar", LEFT_TO_RIGHT },
46    // Test bidi string in which the first character with strong directionality
47    // is a character with type R.
48    { L"\x05d0 foo bar", RIGHT_TO_LEFT },
49    // Test bidi string which starts with a character with weak directionality
50    // and in which the first character with strong directionality is a
51    // character with type L.
52    { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
53    // Test bidi string which starts with a character with weak directionality
54    // and in which the first character with strong directionality is a
55    // character with type R.
56    { L",\x05d0 foo bar", RIGHT_TO_LEFT },
57    // Test bidi string in which the first character with strong directionality
58    // is a character with type LRE.
59    { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
60    // Test bidi string in which the first character with strong directionality
61    // is a character with type LRO.
62    { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
63    // Test bidi string in which the first character with strong directionality
64    // is a character with type RLE.
65    { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
66    // Test bidi string in which the first character with strong directionality
67    // is a character with type RLO.
68    { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
69    // Test bidi string in which the first character with strong directionality
70    // is a character with type AL.
71    { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
72    // Test a string without strong directionality characters.
73    { L",!.{}", LEFT_TO_RIGHT },
74    // Test empty string.
75    { L"", LEFT_TO_RIGHT },
76    // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
77    // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
78    // information).
79    {
80#if defined(WCHAR_T_IS_UTF32)
81      L" ! \x10910" L"abc 123",
82#elif defined(WCHAR_T_IS_UTF16)
83      L" ! \xd802\xdd10" L"abc 123",
84#else
85#error wchar_t should be either UTF-16 or UTF-32
86#endif
87      RIGHT_TO_LEFT },
88    {
89#if defined(WCHAR_T_IS_UTF32)
90      L" ! \x10401" L"abc 123",
91#elif defined(WCHAR_T_IS_UTF16)
92      L" ! \xd801\xdc01" L"abc 123",
93#else
94#error wchar_t should be either UTF-16 or UTF-32
95#endif
96      LEFT_TO_RIGHT },
97   };
98
99  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
100    EXPECT_EQ(cases[i].direction,
101              GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
102}
103
104
105// Note that the cases with LRE, LRO, RLE and RLO are invalid for
106// GetLastStrongCharacterDirection because they should be followed by PDF
107// character.
108TEST_F(RTLTest, GetLastStrongCharacterDirection) {
109  struct {
110    const wchar_t* text;
111    TextDirection direction;
112  } cases[] = {
113    // Test pure LTR string.
114    { L"foo bar", LEFT_TO_RIGHT },
115    // Test pure RTL string.
116    { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
117    // Test bidi string in which the last character with strong directionality
118    // is a character with type L.
119    { L"foo \x05d0 bar", LEFT_TO_RIGHT },
120    // Test bidi string in which the last character with strong directionality
121    // is a character with type R.
122    { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
123    // Test bidi string which ends with a character with weak directionality
124    // and in which the last character with strong directionality is a
125    // character with type L.
126    { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
127    // Test bidi string which ends with a character with weak directionality
128    // and in which the last character with strong directionality is a
129    // character with type R.
130    { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
131    // Test bidi string in which the last character with strong directionality
132    // is a character with type AL.
133    { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
134    // Test a string without strong directionality characters.
135    { L",!.{}", LEFT_TO_RIGHT },
136    // Test empty string.
137    { L"", LEFT_TO_RIGHT },
138    // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
139    // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
140    // information).
141    {
142#if defined(WCHAR_T_IS_UTF32)
143       L"abc 123" L" ! \x10910 !",
144#elif defined(WCHAR_T_IS_UTF16)
145       L"abc 123" L" ! \xd802\xdd10 !",
146#else
147#error wchar_t should be either UTF-16 or UTF-32
148#endif
149      RIGHT_TO_LEFT },
150    {
151#if defined(WCHAR_T_IS_UTF32)
152       L"abc 123" L" ! \x10401 !",
153#elif defined(WCHAR_T_IS_UTF16)
154       L"abc 123" L" ! \xd801\xdc01 !",
155#else
156#error wchar_t should be either UTF-16 or UTF-32
157#endif
158      LEFT_TO_RIGHT },
159   };
160
161  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
162    EXPECT_EQ(cases[i].direction,
163              GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
164}
165
166TEST_F(RTLTest, GetStringDirection) {
167  struct {
168    const wchar_t* text;
169    TextDirection direction;
170  } cases[] = {
171    // Test pure LTR string.
172    { L"foobar", LEFT_TO_RIGHT },
173    { L".foobar", LEFT_TO_RIGHT },
174    { L"foo, bar", LEFT_TO_RIGHT },
175    // Test pure LTR with strong directionality characters of type LRE.
176    { L"\x202a\x202a", LEFT_TO_RIGHT },
177    { L".\x202a\x202a", LEFT_TO_RIGHT },
178    { L"\x202a, \x202a", LEFT_TO_RIGHT },
179    // Test pure LTR with strong directionality characters of type LRO.
180    { L"\x202d\x202d", LEFT_TO_RIGHT },
181    { L".\x202d\x202d", LEFT_TO_RIGHT },
182    { L"\x202d, \x202d", LEFT_TO_RIGHT },
183    // Test pure LTR with various types of strong directionality characters.
184    { L"foo \x202a\x202d", LEFT_TO_RIGHT },
185    { L".\x202d foo \x202a", LEFT_TO_RIGHT },
186    { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
187    // Test pure RTL with strong directionality characters of type R.
188    { L"\x05d0\x05d0", RIGHT_TO_LEFT },
189    { L".\x05d0\x05d0", RIGHT_TO_LEFT },
190    { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
191    // Test pure RTL with strong directionality characters of type RLE.
192    { L"\x202b\x202b", RIGHT_TO_LEFT },
193    { L".\x202b\x202b", RIGHT_TO_LEFT },
194    { L"\x202b, \x202b", RIGHT_TO_LEFT },
195    // Test pure RTL with strong directionality characters of type RLO.
196    { L"\x202e\x202e", RIGHT_TO_LEFT },
197    { L".\x202e\x202e", RIGHT_TO_LEFT },
198    { L"\x202e, \x202e", RIGHT_TO_LEFT },
199    // Test pure RTL with strong directionality characters of type AL.
200    { L"\x0622\x0622", RIGHT_TO_LEFT },
201    { L".\x0622\x0622", RIGHT_TO_LEFT },
202    { L"\x0622, \x0622", RIGHT_TO_LEFT },
203    // Test pure RTL with various types of strong directionality characters.
204    { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
205    { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
206    { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
207    // Test bidi strings.
208    { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
209    { L"\x202b foo bar", UNKNOWN_DIRECTION },
210    { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
211    { L"\x202a\x202b", UNKNOWN_DIRECTION },
212    { L"\x202e\x202d", UNKNOWN_DIRECTION },
213    { L"\x0622\x202a", UNKNOWN_DIRECTION },
214    { L"\x202d\x05d0", UNKNOWN_DIRECTION },
215    // Test a string without strong directionality characters.
216    { L",!.{}", LEFT_TO_RIGHT },
217    // Test empty string.
218    { L"", LEFT_TO_RIGHT },
219    {
220#if defined(WCHAR_T_IS_UTF32)
221      L" ! \x10910" L"abc 123",
222#elif defined(WCHAR_T_IS_UTF16)
223      L" ! \xd802\xdd10" L"abc 123",
224#else
225#error wchar_t should be either UTF-16 or UTF-32
226#endif
227      UNKNOWN_DIRECTION },
228    {
229#if defined(WCHAR_T_IS_UTF32)
230      L" ! \x10401" L"abc 123",
231#elif defined(WCHAR_T_IS_UTF16)
232      L" ! \xd801\xdc01" L"abc 123",
233#else
234#error wchar_t should be either UTF-16 or UTF-32
235#endif
236      LEFT_TO_RIGHT },
237   };
238
239  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
240    EXPECT_EQ(cases[i].direction,
241              GetStringDirection(WideToUTF16(cases[i].text)));
242}
243
244TEST_F(RTLTest, WrapPathWithLTRFormatting) {
245  const wchar_t* cases[] = {
246    // Test common path, such as "c:\foo\bar".
247    L"c:/foo/bar",
248    // Test path with file name, such as "c:\foo\bar\test.jpg".
249    L"c:/foo/bar/test.jpg",
250    // Test path ending with punctuation, such as "c:\(foo)\bar.".
251    L"c:/(foo)/bar.",
252    // Test path ending with separator, such as "c:\foo\bar\".
253    L"c:/foo/bar/",
254    // Test path with RTL character.
255    L"c:/\x05d0",
256    // Test path with 2 level RTL directory names.
257    L"c:/\x05d0/\x0622",
258    // Test path with mixed RTL/LTR directory names and ending with punctuation.
259    L"c:/\x05d0/\x0622/(foo)/b.a.r.",
260    // Test path without driver name, such as "/foo/bar/test/jpg".
261    L"/foo/bar/test.jpg",
262    // Test path start with current directory, such as "./foo".
263    L"./foo",
264    // Test path start with parent directory, such as "../foo/bar.jpg".
265    L"../foo/bar.jpg",
266    // Test absolute path, such as "//foo/bar.jpg".
267    L"//foo/bar.jpg",
268    // Test path with mixed RTL/LTR directory names.
269    L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
270    // Test empty path.
271    L""
272  };
273
274  for (size_t i = 0; i < arraysize(cases); ++i) {
275    FilePath path;
276#if defined(OS_WIN)
277    std::wstring win_path(cases[i]);
278    std::replace(win_path.begin(), win_path.end(), '/', '\\');
279    path = FilePath(win_path);
280    std::wstring wrapped_expected =
281        std::wstring(L"\x202a") + win_path + L"\x202c";
282#else
283    path = FilePath(base::SysWideToNativeMB(cases[i]));
284    std::wstring wrapped_expected =
285        std::wstring(L"\x202a") + cases[i] + L"\x202c";
286#endif
287    string16 localized_file_path_string;
288    WrapPathWithLTRFormatting(path, &localized_file_path_string);
289
290    std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
291    EXPECT_EQ(wrapped_expected, wrapped_actual);
292  }
293}
294
295TEST_F(RTLTest, WrapString) {
296  const wchar_t* cases[] = {
297    L" . ",
298    L"abc",
299    L"a" L"\x5d0\x5d1",
300    L"a" L"\x5d1" L"b",
301    L"\x5d0\x5d1\x5d2",
302    L"\x5d0\x5d1" L"a",
303    L"\x5d0" L"a" L"\x5d1",
304  };
305
306  const bool was_rtl = IsRTL();
307
308  for (size_t i = 0; i < 2; ++i) {
309    // Toggle the application default text direction (to try each direction).
310    SetRTL(!IsRTL());
311
312    string16 empty;
313    WrapStringWithLTRFormatting(&empty);
314    EXPECT_TRUE(empty.empty());
315    WrapStringWithRTLFormatting(&empty);
316    EXPECT_TRUE(empty.empty());
317
318    for (size_t i = 0; i < arraysize(cases); ++i) {
319      string16 input = WideToUTF16(cases[i]);
320      string16 ltr_wrap = input;
321      WrapStringWithLTRFormatting(&ltr_wrap);
322      EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
323      EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
324      EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
325
326      string16 rtl_wrap = input;
327      WrapStringWithRTLFormatting(&rtl_wrap);
328      EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
329      EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
330      EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
331    }
332  }
333
334  EXPECT_EQ(was_rtl, IsRTL());
335}
336
337TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
338  struct {
339    const wchar_t* path;
340    bool wrap_ltr;
341    bool wrap_rtl;
342  } cases[] = {
343    { L"test",                   false, true },
344    { L"test.html",              false, true },
345    { L"\x05d0\x05d1\x05d2",     true,  true },
346    { L"\x05d0\x05d1\x05d2.txt", true,  true },
347    { L"\x05d0" L"abc",          true,  true },
348    { L"\x05d0" L"abc.txt",      true,  true },
349    { L"abc\x05d0\x05d1",        false, true },
350    { L"abc\x05d0\x05d1.jpg",    false, true },
351  };
352
353  const bool was_rtl = IsRTL();
354
355  for (size_t i = 0; i < 2; ++i) {
356    // Toggle the application default text direction (to try each direction).
357    SetRTL(!IsRTL());
358    for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
359      string16 input = WideToUTF16(cases[i].path);
360      string16 output = GetDisplayStringInLTRDirectionality(input);
361      // Test the expected wrapping behavior for the current UI directionality.
362      if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
363        EXPECT_NE(output, input);
364      else
365        EXPECT_EQ(output, input);
366    }
367  }
368
369  EXPECT_EQ(was_rtl, IsRTL());
370}
371
372TEST_F(RTLTest, GetTextDirection) {
373  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
374  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
375  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
376  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
377  // iw is an obsolete code for Hebrew.
378  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
379  // Although we're not yet localized to Farsi and Urdu, we
380  // do have the text layout direction information for them.
381  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
382  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
383#if 0
384  // Enable these when we include the minimal locale data for Azerbaijani
385  // written in Arabic and Dhivehi. At the moment, our copy of
386  // ICU data does not have entries for them.
387  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
388  // Dhivehi that uses Thaana script.
389  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
390#endif
391  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
392  // Chinese in China with '-'.
393  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
394  // Filipino : 3-letter code
395  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
396  // Russian
397  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
398  // Japanese that uses multiple scripts
399  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
400}
401
402TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
403  // These test strings are borrowed from WrapPathWithLTRFormatting
404  const wchar_t* cases[] = {
405    L"foo bar",
406    L"foo \x05d0 bar",
407    L"\x05d0 foo bar",
408    L"!foo \x05d0 bar",
409    L",\x05d0 foo bar",
410    L"\x202a \x05d0 foo  bar",
411    L"\x202d \x05d0 foo  bar",
412    L"\x202b foo \x05d0 bar",
413    L"\x202e foo \x05d0 bar",
414    L"\x0622 foo \x05d0 bar",
415  };
416
417  const bool was_rtl = IsRTL();
418
419  for (size_t i = 0; i < 2; ++i) {
420    // Toggle the application default text direction (to try each direction).
421    SetRTL(!IsRTL());
422
423    for (size_t i = 0; i < arraysize(cases); ++i) {
424      string16 test_case = WideToUTF16(cases[i]);
425      string16 adjusted_string = test_case;
426
427      if (!AdjustStringForLocaleDirection(&adjusted_string))
428        continue;
429
430      EXPECT_NE(test_case, adjusted_string);
431      EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
432      EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
433                                            << "] with IsRTL() == " << IsRTL();
434    }
435  }
436
437  EXPECT_EQ(was_rtl, IsRTL());
438}
439
440}  // namespace i18n
441}  // namespace base
442