rtl_unittest.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include <algorithm>
8
9#include "base/file_path.h"
10#include "base/string_util.h"
11#include "base/utf_string_conversions.h"
12#include "base/sys_string_conversions.h"
13#include "testing/gtest/include/gtest/gtest.h"
14#include "testing/platform_test.h"
15#include "unicode/usearch.h"
16
17#if defined(TOOLKIT_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace base {
22namespace i18n {
23
24namespace {
25
26// A test utility function to set the application default text direction.
27void SetRTL(bool rtl) {
28  // Override the current locale/direction.
29  SetICUDefaultLocale(rtl ? "he" : "en");
30#if defined(TOOLKIT_GTK)
31  // Do the same for GTK, which does not rely on the ICU default locale.
32  gtk_widget_set_default_direction(rtl ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
33#endif
34  EXPECT_EQ(rtl, IsRTL());
35}
36
37}  // namespace
38
39class RTLTest : public PlatformTest {
40};
41
42TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
43  struct {
44    const wchar_t* text;
45    TextDirection direction;
46  } cases[] = {
47    // Test pure LTR string.
48    { L"foo bar", LEFT_TO_RIGHT },
49    // Test bidi string in which the first character with strong directionality
50    // is a character with type L.
51    { L"foo \x05d0 bar", LEFT_TO_RIGHT },
52    // Test bidi string in which the first character with strong directionality
53    // is a character with type R.
54    { L"\x05d0 foo bar", RIGHT_TO_LEFT },
55    // Test bidi string which starts with a character with weak directionality
56    // and in which the first character with strong directionality is a
57    // character with type L.
58    { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
59    // Test bidi string which starts with a character with weak directionality
60    // and in which the first character with strong directionality is a
61    // character with type R.
62    { L",\x05d0 foo bar", RIGHT_TO_LEFT },
63    // Test bidi string in which the first character with strong directionality
64    // is a character with type LRE.
65    { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
66    // Test bidi string in which the first character with strong directionality
67    // is a character with type LRO.
68    { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
69    // Test bidi string in which the first character with strong directionality
70    // is a character with type RLE.
71    { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
72    // Test bidi string in which the first character with strong directionality
73    // is a character with type RLO.
74    { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
75    // Test bidi string in which the first character with strong directionality
76    // is a character with type AL.
77    { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
78    // Test a string without strong directionality characters.
79    { L",!.{}", LEFT_TO_RIGHT },
80    // Test empty string.
81    { L"", LEFT_TO_RIGHT },
82    // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
83    // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
84    // information).
85    {
86#if defined(WCHAR_T_IS_UTF32)
87      L" ! \x10910" L"abc 123",
88#elif defined(WCHAR_T_IS_UTF16)
89      L" ! \xd802\xdd10" L"abc 123",
90#else
91#error wchar_t should be either UTF-16 or UTF-32
92#endif
93      RIGHT_TO_LEFT },
94    {
95#if defined(WCHAR_T_IS_UTF32)
96      L" ! \x10401" L"abc 123",
97#elif defined(WCHAR_T_IS_UTF16)
98      L" ! \xd801\xdc01" L"abc 123",
99#else
100#error wchar_t should be either UTF-16 or UTF-32
101#endif
102      LEFT_TO_RIGHT },
103   };
104
105  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
106    EXPECT_EQ(cases[i].direction,
107              GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
108}
109
110TEST_F(RTLTest, WrapPathWithLTRFormatting) {
111  const wchar_t* cases[] = {
112    // Test common path, such as "c:\foo\bar".
113    L"c:/foo/bar",
114    // Test path with file name, such as "c:\foo\bar\test.jpg".
115    L"c:/foo/bar/test.jpg",
116    // Test path ending with punctuation, such as "c:\(foo)\bar.".
117    L"c:/(foo)/bar.",
118    // Test path ending with separator, such as "c:\foo\bar\".
119    L"c:/foo/bar/",
120    // Test path with RTL character.
121    L"c:/\x05d0",
122    // Test path with 2 level RTL directory names.
123    L"c:/\x05d0/\x0622",
124    // Test path with mixed RTL/LTR directory names and ending with punctuation.
125    L"c:/\x05d0/\x0622/(foo)/b.a.r.",
126    // Test path without driver name, such as "/foo/bar/test/jpg".
127    L"/foo/bar/test.jpg",
128    // Test path start with current directory, such as "./foo".
129    L"./foo",
130    // Test path start with parent directory, such as "../foo/bar.jpg".
131    L"../foo/bar.jpg",
132    // Test absolute path, such as "//foo/bar.jpg".
133    L"//foo/bar.jpg",
134    // Test path with mixed RTL/LTR directory names.
135    L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
136    // Test empty path.
137    L""
138  };
139
140  for (size_t i = 0; i < arraysize(cases); ++i) {
141    FilePath path;
142#if defined(OS_WIN)
143    std::wstring win_path(cases[i]);
144    std::replace(win_path.begin(), win_path.end(), '/', '\\');
145    path = FilePath(win_path);
146    std::wstring wrapped_expected =
147        std::wstring(L"\x202a") + win_path + L"\x202c";
148#else
149    path = FilePath(base::SysWideToNativeMB(cases[i]));
150    std::wstring wrapped_expected =
151        std::wstring(L"\x202a") + cases[i] + L"\x202c";
152#endif
153    string16 localized_file_path_string;
154    WrapPathWithLTRFormatting(path, &localized_file_path_string);
155
156    std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
157    EXPECT_EQ(wrapped_expected, wrapped_actual);
158  }
159}
160
161TEST_F(RTLTest, WrapString) {
162  const wchar_t* cases[] = {
163    L" . ",
164    L"abc",
165    L"a"L"\x5d0\x5d1",
166    L"a"L"\x5d1"L"b",
167    L"\x5d0\x5d1\x5d2",
168    L"\x5d0\x5d1"L"a",
169    L"\x5d0"L"a"L"\x5d1",
170  };
171
172  const bool was_rtl = IsRTL();
173
174  for (size_t i = 0; i < 2; ++i) {
175    // Toggle the application default text direction (to try each direction).
176    SetRTL(!IsRTL());
177
178    string16 empty;
179    WrapStringWithLTRFormatting(&empty);
180    EXPECT_TRUE(empty.empty());
181    WrapStringWithRTLFormatting(&empty);
182    EXPECT_TRUE(empty.empty());
183
184    for (size_t i = 0; i < arraysize(cases); ++i) {
185      string16 input = WideToUTF16(cases[i]);
186      string16 ltr_wrap = input;
187      WrapStringWithLTRFormatting(&ltr_wrap);
188      EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
189      EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
190      EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
191
192      string16 rtl_wrap = input;
193      WrapStringWithRTLFormatting(&rtl_wrap);
194      EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
195      EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
196      EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
197    }
198  }
199
200  EXPECT_EQ(was_rtl, IsRTL());
201}
202
203TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
204  struct {
205    const wchar_t* path;
206    bool wrap_ltr;
207    bool wrap_rtl;
208  } cases[] = {
209    { L"test",                   false, true },
210    { L"test.html",              false, true },
211    { L"\x05d0\x05d1\x05d2",     true,  true },
212    { L"\x05d0\x05d1\x05d2.txt", true,  true },
213    { L"\x05d0"L"abc",           true,  true },
214    { L"\x05d0"L"abc.txt",       true,  true },
215    { L"abc\x05d0\x05d1",        false, true },
216    { L"abc\x05d0\x05d1.jpg",    false, true },
217  };
218
219  const bool was_rtl = IsRTL();
220
221  for (size_t i = 0; i < 2; ++i) {
222    // Toggle the application default text direction (to try each direction).
223    SetRTL(!IsRTL());
224    for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
225      string16 input = WideToUTF16(cases[i].path);
226      string16 output = GetDisplayStringInLTRDirectionality(input);
227      // Test the expected wrapping behavior for the current UI directionality.
228      if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
229        EXPECT_NE(output, input);
230      else
231        EXPECT_EQ(output, input);
232    }
233  }
234
235  EXPECT_EQ(was_rtl, IsRTL());
236}
237
238TEST_F(RTLTest, GetTextDirection) {
239  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
240  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
241  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
242  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
243  // iw is an obsolete code for Hebrew.
244  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
245  // Although we're not yet localized to Farsi and Urdu, we
246  // do have the text layout direction information for them.
247  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
248  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
249#if 0
250  // Enable these when we include the minimal locale data for Azerbaijani
251  // written in Arabic and Dhivehi. At the moment, our copy of
252  // ICU data does not have entries for them.
253  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
254  // Dhivehi that uses Thaana script.
255  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
256#endif
257  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
258  // Chinese in China with '-'.
259  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
260  // Filipino : 3-letter code
261  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
262  // Russian
263  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
264  // Japanese that uses multiple scripts
265  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
266}
267
268TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
269  // These test strings are borrowed from WrapPathWithLTRFormatting
270  const wchar_t* cases[] = {
271    L"foo bar",
272    L"foo \x05d0 bar",
273    L"\x05d0 foo bar",
274    L"!foo \x05d0 bar",
275    L",\x05d0 foo bar",
276    L"\x202a \x05d0 foo  bar",
277    L"\x202d \x05d0 foo  bar",
278    L"\x202b foo \x05d0 bar",
279    L"\x202e foo \x05d0 bar",
280    L"\x0622 foo \x05d0 bar",
281  };
282
283  const bool was_rtl = IsRTL();
284
285  for (size_t i = 0; i < 2; ++i) {
286    // Toggle the application default text direction (to try each direction).
287    SetRTL(!IsRTL());
288
289    for (size_t i = 0; i < arraysize(cases); ++i) {
290      string16 test_case = WideToUTF16(cases[i]);
291      string16 adjusted_string = test_case;
292
293      if (!AdjustStringForLocaleDirection(&adjusted_string))
294        continue;
295
296      EXPECT_NE(test_case, adjusted_string);
297      EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
298      EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
299                                            << "] with IsRTL() == " << IsRTL();
300    }
301  }
302
303  EXPECT_EQ(was_rtl, IsRTL());
304}
305
306}  // namespace i18n
307}  // namespace base
308