rtl_unittest.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include <algorithm>
8
9#include "base/files/file_path.h"
10#include "base/strings/string_util.h"
11#include "base/strings/sys_string_conversions.h"
12#include "base/strings/utf_string_conversions.h"
13#include "testing/gtest/include/gtest/gtest.h"
14#include "testing/platform_test.h"
15#include "third_party/icu/source/i18n/unicode/usearch.h"
16
17#if defined(TOOLKIT_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace base {
22namespace i18n {
23
24namespace {
25
26// A test utility function to set the application default text direction.
27void SetRTL(bool rtl) {
28  // Override the current locale/direction.
29  SetICUDefaultLocale(rtl ? "he" : "en");
30#if defined(TOOLKIT_GTK)
31  // Do the same for GTK, which does not rely on the ICU default locale.
32  gtk_widget_set_default_direction(rtl ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
33#endif
34  EXPECT_EQ(rtl, IsRTL());
35}
36
37}  // namespace
38
39class RTLTest : public PlatformTest {
40};
41
42TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
43  struct {
44    const wchar_t* text;
45    TextDirection direction;
46  } cases[] = {
47    // Test pure LTR string.
48    { L"foo bar", LEFT_TO_RIGHT },
49    // Test pure RTL string.
50    { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
51    // Test bidi string in which the first character with strong directionality
52    // is a character with type L.
53    { L"foo \x05d0 bar", LEFT_TO_RIGHT },
54    // Test bidi string in which the first character with strong directionality
55    // is a character with type R.
56    { L"\x05d0 foo bar", RIGHT_TO_LEFT },
57    // Test bidi string which starts with a character with weak directionality
58    // and in which the first character with strong directionality is a
59    // character with type L.
60    { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
61    // Test bidi string which starts with a character with weak directionality
62    // and in which the first character with strong directionality is a
63    // character with type R.
64    { L",\x05d0 foo bar", RIGHT_TO_LEFT },
65    // Test bidi string in which the first character with strong directionality
66    // is a character with type LRE.
67    { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
68    // Test bidi string in which the first character with strong directionality
69    // is a character with type LRO.
70    { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
71    // Test bidi string in which the first character with strong directionality
72    // is a character with type RLE.
73    { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
74    // Test bidi string in which the first character with strong directionality
75    // is a character with type RLO.
76    { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
77    // Test bidi string in which the first character with strong directionality
78    // is a character with type AL.
79    { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
80    // Test a string without strong directionality characters.
81    { L",!.{}", LEFT_TO_RIGHT },
82    // Test empty string.
83    { L"", LEFT_TO_RIGHT },
84    // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
85    // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
86    // information).
87    {
88#if defined(WCHAR_T_IS_UTF32)
89      L" ! \x10910" L"abc 123",
90#elif defined(WCHAR_T_IS_UTF16)
91      L" ! \xd802\xdd10" L"abc 123",
92#else
93#error wchar_t should be either UTF-16 or UTF-32
94#endif
95      RIGHT_TO_LEFT },
96    {
97#if defined(WCHAR_T_IS_UTF32)
98      L" ! \x10401" L"abc 123",
99#elif defined(WCHAR_T_IS_UTF16)
100      L" ! \xd801\xdc01" L"abc 123",
101#else
102#error wchar_t should be either UTF-16 or UTF-32
103#endif
104      LEFT_TO_RIGHT },
105   };
106
107  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
108    EXPECT_EQ(cases[i].direction,
109              GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
110}
111
112
113// Note that the cases with LRE, LRO, RLE and RLO are invalid for
114// GetLastStrongCharacterDirection because they should be followed by PDF
115// character.
116TEST_F(RTLTest, GetLastStrongCharacterDirection) {
117  struct {
118    const wchar_t* text;
119    TextDirection direction;
120  } cases[] = {
121    // Test pure LTR string.
122    { L"foo bar", LEFT_TO_RIGHT },
123    // Test pure RTL string.
124    { L"\x05d0\x05d1\x05d2 \x05d3\x0d4\x05d5", RIGHT_TO_LEFT},
125    // Test bidi string in which the last character with strong directionality
126    // is a character with type L.
127    { L"foo \x05d0 bar", LEFT_TO_RIGHT },
128    // Test bidi string in which the last character with strong directionality
129    // is a character with type R.
130    { L"\x05d0 foo bar \x05d3", RIGHT_TO_LEFT },
131    // Test bidi string which ends with a character with weak directionality
132    // and in which the last character with strong directionality is a
133    // character with type L.
134    { L"!foo \x05d0 bar!", LEFT_TO_RIGHT },
135    // Test bidi string which ends with a character with weak directionality
136    // and in which the last character with strong directionality is a
137    // character with type R.
138    { L",\x05d0 foo bar \x05d1,", RIGHT_TO_LEFT },
139    // Test bidi string in which the last character with strong directionality
140    // is a character with type AL.
141    { L"\x0622 foo \x05d0 bar \x0622", RIGHT_TO_LEFT },
142    // Test a string without strong directionality characters.
143    { L",!.{}", LEFT_TO_RIGHT },
144    // Test empty string.
145    { L"", LEFT_TO_RIGHT },
146    // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
147    // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
148    // information).
149    {
150#if defined(WCHAR_T_IS_UTF32)
151       L"abc 123" L" ! \x10910 !",
152#elif defined(WCHAR_T_IS_UTF16)
153       L"abc 123" L" ! \xd802\xdd10 !",
154#else
155#error wchar_t should be either UTF-16 or UTF-32
156#endif
157      RIGHT_TO_LEFT },
158    {
159#if defined(WCHAR_T_IS_UTF32)
160       L"abc 123" L" ! \x10401 !",
161#elif defined(WCHAR_T_IS_UTF16)
162       L"abc 123" L" ! \xd801\xdc01 !",
163#else
164#error wchar_t should be either UTF-16 or UTF-32
165#endif
166      LEFT_TO_RIGHT },
167   };
168
169  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
170    EXPECT_EQ(cases[i].direction,
171              GetLastStrongCharacterDirection(WideToUTF16(cases[i].text)));
172}
173
174TEST_F(RTLTest, GetStringDirection) {
175  struct {
176    const wchar_t* text;
177    TextDirection direction;
178  } cases[] = {
179    // Test pure LTR string.
180    { L"foobar", LEFT_TO_RIGHT },
181    { L".foobar", LEFT_TO_RIGHT },
182    { L"foo, bar", LEFT_TO_RIGHT },
183    // Test pure LTR with strong directionality characters of type LRE.
184    { L"\x202a\x202a", LEFT_TO_RIGHT },
185    { L".\x202a\x202a", LEFT_TO_RIGHT },
186    { L"\x202a, \x202a", LEFT_TO_RIGHT },
187    // Test pure LTR with strong directionality characters of type LRO.
188    { L"\x202d\x202d", LEFT_TO_RIGHT },
189    { L".\x202d\x202d", LEFT_TO_RIGHT },
190    { L"\x202d, \x202d", LEFT_TO_RIGHT },
191    // Test pure LTR with various types of strong directionality characters.
192    { L"foo \x202a\x202d", LEFT_TO_RIGHT },
193    { L".\x202d foo \x202a", LEFT_TO_RIGHT },
194    { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
195    // Test pure RTL with strong directionality characters of type R.
196    { L"\x05d0\x05d0", RIGHT_TO_LEFT },
197    { L".\x05d0\x05d0", RIGHT_TO_LEFT },
198    { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
199    // Test pure RTL with strong directionality characters of type RLE.
200    { L"\x202b\x202b", RIGHT_TO_LEFT },
201    { L".\x202b\x202b", RIGHT_TO_LEFT },
202    { L"\x202b, \x202b", RIGHT_TO_LEFT },
203    // Test pure RTL with strong directionality characters of type RLO.
204    { L"\x202e\x202e", RIGHT_TO_LEFT },
205    { L".\x202e\x202e", RIGHT_TO_LEFT },
206    { L"\x202e, \x202e", RIGHT_TO_LEFT },
207    // Test pure RTL with strong directionality characters of type AL.
208    { L"\x0622\x0622", RIGHT_TO_LEFT },
209    { L".\x0622\x0622", RIGHT_TO_LEFT },
210    { L"\x0622, \x0622", RIGHT_TO_LEFT },
211    // Test pure RTL with various types of strong directionality characters.
212    { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
213    { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
214    { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
215    // Test bidi strings.
216    { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
217    { L"\x202b foo bar", UNKNOWN_DIRECTION },
218    { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
219    { L"\x202a\x202b", UNKNOWN_DIRECTION },
220    { L"\x202e\x202d", UNKNOWN_DIRECTION },
221    { L"\x0622\x202a", UNKNOWN_DIRECTION },
222    { L"\x202d\x05d0", UNKNOWN_DIRECTION },
223    // Test a string without strong directionality characters.
224    { L",!.{}", LEFT_TO_RIGHT },
225    // Test empty string.
226    { L"", LEFT_TO_RIGHT },
227    {
228#if defined(WCHAR_T_IS_UTF32)
229      L" ! \x10910" L"abc 123",
230#elif defined(WCHAR_T_IS_UTF16)
231      L" ! \xd802\xdd10" L"abc 123",
232#else
233#error wchar_t should be either UTF-16 or UTF-32
234#endif
235      UNKNOWN_DIRECTION },
236    {
237#if defined(WCHAR_T_IS_UTF32)
238      L" ! \x10401" L"abc 123",
239#elif defined(WCHAR_T_IS_UTF16)
240      L" ! \xd801\xdc01" L"abc 123",
241#else
242#error wchar_t should be either UTF-16 or UTF-32
243#endif
244      LEFT_TO_RIGHT },
245   };
246
247  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
248    EXPECT_EQ(cases[i].direction,
249              GetStringDirection(WideToUTF16(cases[i].text)));
250}
251
252TEST_F(RTLTest, WrapPathWithLTRFormatting) {
253  const wchar_t* cases[] = {
254    // Test common path, such as "c:\foo\bar".
255    L"c:/foo/bar",
256    // Test path with file name, such as "c:\foo\bar\test.jpg".
257    L"c:/foo/bar/test.jpg",
258    // Test path ending with punctuation, such as "c:\(foo)\bar.".
259    L"c:/(foo)/bar.",
260    // Test path ending with separator, such as "c:\foo\bar\".
261    L"c:/foo/bar/",
262    // Test path with RTL character.
263    L"c:/\x05d0",
264    // Test path with 2 level RTL directory names.
265    L"c:/\x05d0/\x0622",
266    // Test path with mixed RTL/LTR directory names and ending with punctuation.
267    L"c:/\x05d0/\x0622/(foo)/b.a.r.",
268    // Test path without driver name, such as "/foo/bar/test/jpg".
269    L"/foo/bar/test.jpg",
270    // Test path start with current directory, such as "./foo".
271    L"./foo",
272    // Test path start with parent directory, such as "../foo/bar.jpg".
273    L"../foo/bar.jpg",
274    // Test absolute path, such as "//foo/bar.jpg".
275    L"//foo/bar.jpg",
276    // Test path with mixed RTL/LTR directory names.
277    L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
278    // Test empty path.
279    L""
280  };
281
282  for (size_t i = 0; i < arraysize(cases); ++i) {
283    FilePath path;
284#if defined(OS_WIN)
285    std::wstring win_path(cases[i]);
286    std::replace(win_path.begin(), win_path.end(), '/', '\\');
287    path = FilePath(win_path);
288    std::wstring wrapped_expected =
289        std::wstring(L"\x202a") + win_path + L"\x202c";
290#else
291    path = FilePath(base::SysWideToNativeMB(cases[i]));
292    std::wstring wrapped_expected =
293        std::wstring(L"\x202a") + cases[i] + L"\x202c";
294#endif
295    string16 localized_file_path_string;
296    WrapPathWithLTRFormatting(path, &localized_file_path_string);
297
298    std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
299    EXPECT_EQ(wrapped_expected, wrapped_actual);
300  }
301}
302
303TEST_F(RTLTest, WrapString) {
304  const wchar_t* cases[] = {
305    L" . ",
306    L"abc",
307    L"a" L"\x5d0\x5d1",
308    L"a" L"\x5d1" L"b",
309    L"\x5d0\x5d1\x5d2",
310    L"\x5d0\x5d1" L"a",
311    L"\x5d0" L"a" L"\x5d1",
312  };
313
314  const bool was_rtl = IsRTL();
315
316  for (size_t i = 0; i < 2; ++i) {
317    // Toggle the application default text direction (to try each direction).
318    SetRTL(!IsRTL());
319
320    string16 empty;
321    WrapStringWithLTRFormatting(&empty);
322    EXPECT_TRUE(empty.empty());
323    WrapStringWithRTLFormatting(&empty);
324    EXPECT_TRUE(empty.empty());
325
326    for (size_t i = 0; i < arraysize(cases); ++i) {
327      string16 input = WideToUTF16(cases[i]);
328      string16 ltr_wrap = input;
329      WrapStringWithLTRFormatting(&ltr_wrap);
330      EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
331      EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
332      EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
333
334      string16 rtl_wrap = input;
335      WrapStringWithRTLFormatting(&rtl_wrap);
336      EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
337      EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
338      EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
339    }
340  }
341
342  EXPECT_EQ(was_rtl, IsRTL());
343}
344
345TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
346  struct {
347    const wchar_t* path;
348    bool wrap_ltr;
349    bool wrap_rtl;
350  } cases[] = {
351    { L"test",                   false, true },
352    { L"test.html",              false, true },
353    { L"\x05d0\x05d1\x05d2",     true,  true },
354    { L"\x05d0\x05d1\x05d2.txt", true,  true },
355    { L"\x05d0" L"abc",          true,  true },
356    { L"\x05d0" L"abc.txt",      true,  true },
357    { L"abc\x05d0\x05d1",        false, true },
358    { L"abc\x05d0\x05d1.jpg",    false, true },
359  };
360
361  const bool was_rtl = IsRTL();
362
363  for (size_t i = 0; i < 2; ++i) {
364    // Toggle the application default text direction (to try each direction).
365    SetRTL(!IsRTL());
366    for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
367      string16 input = WideToUTF16(cases[i].path);
368      string16 output = GetDisplayStringInLTRDirectionality(input);
369      // Test the expected wrapping behavior for the current UI directionality.
370      if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
371        EXPECT_NE(output, input);
372      else
373        EXPECT_EQ(output, input);
374    }
375  }
376
377  EXPECT_EQ(was_rtl, IsRTL());
378}
379
380TEST_F(RTLTest, GetTextDirection) {
381  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
382  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
383  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
384  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
385  // iw is an obsolete code for Hebrew.
386  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
387  // Although we're not yet localized to Farsi and Urdu, we
388  // do have the text layout direction information for them.
389  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
390  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
391#if 0
392  // Enable these when we include the minimal locale data for Azerbaijani
393  // written in Arabic and Dhivehi. At the moment, our copy of
394  // ICU data does not have entries for them.
395  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
396  // Dhivehi that uses Thaana script.
397  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
398#endif
399  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
400  // Chinese in China with '-'.
401  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
402  // Filipino : 3-letter code
403  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
404  // Russian
405  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
406  // Japanese that uses multiple scripts
407  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
408}
409
410TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
411  // These test strings are borrowed from WrapPathWithLTRFormatting
412  const wchar_t* cases[] = {
413    L"foo bar",
414    L"foo \x05d0 bar",
415    L"\x05d0 foo bar",
416    L"!foo \x05d0 bar",
417    L",\x05d0 foo bar",
418    L"\x202a \x05d0 foo  bar",
419    L"\x202d \x05d0 foo  bar",
420    L"\x202b foo \x05d0 bar",
421    L"\x202e foo \x05d0 bar",
422    L"\x0622 foo \x05d0 bar",
423  };
424
425  const bool was_rtl = IsRTL();
426
427  for (size_t i = 0; i < 2; ++i) {
428    // Toggle the application default text direction (to try each direction).
429    SetRTL(!IsRTL());
430
431    for (size_t i = 0; i < arraysize(cases); ++i) {
432      string16 test_case = WideToUTF16(cases[i]);
433      string16 adjusted_string = test_case;
434
435      if (!AdjustStringForLocaleDirection(&adjusted_string))
436        continue;
437
438      EXPECT_NE(test_case, adjusted_string);
439      EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
440      EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
441                                            << "] with IsRTL() == " << IsRTL();
442    }
443  }
444
445  EXPECT_EQ(was_rtl, IsRTL());
446}
447
448}  // namespace i18n
449}  // namespace base
450