rtl_unittest.cc revision ca12bfac764ba476d6cd062bf1dde12cc64c3f40
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include <algorithm>
8
9#include "base/files/file_path.h"
10#include "base/strings/string_util.h"
11#include "base/strings/sys_string_conversions.h"
12#include "base/strings/utf_string_conversions.h"
13#include "testing/gtest/include/gtest/gtest.h"
14#include "testing/platform_test.h"
15#include "third_party/icu/source/i18n/unicode/usearch.h"
16
17#if defined(TOOLKIT_GTK)
18#include <gtk/gtk.h>
19#endif
20
21namespace base {
22namespace i18n {
23
24namespace {
25
26// A test utility function to set the application default text direction.
27void SetRTL(bool rtl) {
28  // Override the current locale/direction.
29  SetICUDefaultLocale(rtl ? "he" : "en");
30#if defined(TOOLKIT_GTK)
31  // Do the same for GTK, which does not rely on the ICU default locale.
32  gtk_widget_set_default_direction(rtl ? GTK_TEXT_DIR_RTL : GTK_TEXT_DIR_LTR);
33#endif
34  EXPECT_EQ(rtl, IsRTL());
35}
36
37}  // namespace
38
39class RTLTest : public PlatformTest {
40};
41
42TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
43  struct {
44    const wchar_t* text;
45    TextDirection direction;
46  } cases[] = {
47    // Test pure LTR string.
48    { L"foo bar", LEFT_TO_RIGHT },
49    // Test bidi string in which the first character with strong directionality
50    // is a character with type L.
51    { L"foo \x05d0 bar", LEFT_TO_RIGHT },
52    // Test bidi string in which the first character with strong directionality
53    // is a character with type R.
54    { L"\x05d0 foo bar", RIGHT_TO_LEFT },
55    // Test bidi string which starts with a character with weak directionality
56    // and in which the first character with strong directionality is a
57    // character with type L.
58    { L"!foo \x05d0 bar", LEFT_TO_RIGHT },
59    // Test bidi string which starts with a character with weak directionality
60    // and in which the first character with strong directionality is a
61    // character with type R.
62    { L",\x05d0 foo bar", RIGHT_TO_LEFT },
63    // Test bidi string in which the first character with strong directionality
64    // is a character with type LRE.
65    { L"\x202a \x05d0 foo  bar", LEFT_TO_RIGHT },
66    // Test bidi string in which the first character with strong directionality
67    // is a character with type LRO.
68    { L"\x202d \x05d0 foo  bar", LEFT_TO_RIGHT },
69    // Test bidi string in which the first character with strong directionality
70    // is a character with type RLE.
71    { L"\x202b foo \x05d0 bar", RIGHT_TO_LEFT },
72    // Test bidi string in which the first character with strong directionality
73    // is a character with type RLO.
74    { L"\x202e foo \x05d0 bar", RIGHT_TO_LEFT },
75    // Test bidi string in which the first character with strong directionality
76    // is a character with type AL.
77    { L"\x0622 foo \x05d0 bar", RIGHT_TO_LEFT },
78    // Test a string without strong directionality characters.
79    { L",!.{}", LEFT_TO_RIGHT },
80    // Test empty string.
81    { L"", LEFT_TO_RIGHT },
82    // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
83    // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
84    // information).
85    {
86#if defined(WCHAR_T_IS_UTF32)
87      L" ! \x10910" L"abc 123",
88#elif defined(WCHAR_T_IS_UTF16)
89      L" ! \xd802\xdd10" L"abc 123",
90#else
91#error wchar_t should be either UTF-16 or UTF-32
92#endif
93      RIGHT_TO_LEFT },
94    {
95#if defined(WCHAR_T_IS_UTF32)
96      L" ! \x10401" L"abc 123",
97#elif defined(WCHAR_T_IS_UTF16)
98      L" ! \xd801\xdc01" L"abc 123",
99#else
100#error wchar_t should be either UTF-16 or UTF-32
101#endif
102      LEFT_TO_RIGHT },
103   };
104
105  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
106    EXPECT_EQ(cases[i].direction,
107              GetFirstStrongCharacterDirection(WideToUTF16(cases[i].text)));
108}
109
110TEST_F(RTLTest, GetStringDirection) {
111  struct {
112    const wchar_t* text;
113    TextDirection direction;
114  } cases[] = {
115    // Test pure LTR string.
116    { L"foobar", LEFT_TO_RIGHT },
117    { L".foobar", LEFT_TO_RIGHT },
118    { L"foo, bar", LEFT_TO_RIGHT },
119    // Test pure LTR with strong directionality characters of type LRE.
120    { L"\x202a\x202a", LEFT_TO_RIGHT },
121    { L".\x202a\x202a", LEFT_TO_RIGHT },
122    { L"\x202a, \x202a", LEFT_TO_RIGHT },
123    // Test pure LTR with strong directionality characters of type LRO.
124    { L"\x202d\x202d", LEFT_TO_RIGHT },
125    { L".\x202d\x202d", LEFT_TO_RIGHT },
126    { L"\x202d, \x202d", LEFT_TO_RIGHT },
127    // Test pure LTR with various types of strong directionality characters.
128    { L"foo \x202a\x202d", LEFT_TO_RIGHT },
129    { L".\x202d foo \x202a", LEFT_TO_RIGHT },
130    { L"\x202a, \x202d foo", LEFT_TO_RIGHT },
131    // Test pure RTL with strong directionality characters of type R.
132    { L"\x05d0\x05d0", RIGHT_TO_LEFT },
133    { L".\x05d0\x05d0", RIGHT_TO_LEFT },
134    { L"\x05d0, \x05d0", RIGHT_TO_LEFT },
135    // Test pure RTL with strong directionality characters of type RLE.
136    { L"\x202b\x202b", RIGHT_TO_LEFT },
137    { L".\x202b\x202b", RIGHT_TO_LEFT },
138    { L"\x202b, \x202b", RIGHT_TO_LEFT },
139    // Test pure RTL with strong directionality characters of type RLO.
140    { L"\x202e\x202e", RIGHT_TO_LEFT },
141    { L".\x202e\x202e", RIGHT_TO_LEFT },
142    { L"\x202e, \x202e", RIGHT_TO_LEFT },
143    // Test pure RTL with strong directionality characters of type AL.
144    { L"\x0622\x0622", RIGHT_TO_LEFT },
145    { L".\x0622\x0622", RIGHT_TO_LEFT },
146    { L"\x0622, \x0622", RIGHT_TO_LEFT },
147    // Test pure RTL with various types of strong directionality characters.
148    { L"\x05d0\x202b\x202e\x0622", RIGHT_TO_LEFT },
149    { L".\x202b\x202e\x0622\x05d0", RIGHT_TO_LEFT },
150    { L"\x0622\x202e, \x202b\x05d0", RIGHT_TO_LEFT },
151    // Test bidi strings.
152    { L"foo \x05d0 bar", UNKNOWN_DIRECTION },
153    { L"\x202b foo bar", UNKNOWN_DIRECTION },
154    { L"!foo \x0622 bar", UNKNOWN_DIRECTION },
155    { L"\x202a\x202b", UNKNOWN_DIRECTION },
156    { L"\x202e\x202d", UNKNOWN_DIRECTION },
157    { L"\x0622\x202a", UNKNOWN_DIRECTION },
158    { L"\x202d\x05d0", UNKNOWN_DIRECTION },
159    // Test a string without strong directionality characters.
160    { L",!.{}", LEFT_TO_RIGHT },
161    // Test empty string.
162    { L"", LEFT_TO_RIGHT },
163    {
164#if defined(WCHAR_T_IS_UTF32)
165      L" ! \x10910" L"abc 123",
166#elif defined(WCHAR_T_IS_UTF16)
167      L" ! \xd802\xdd10" L"abc 123",
168#else
169#error wchar_t should be either UTF-16 or UTF-32
170#endif
171      UNKNOWN_DIRECTION },
172    {
173#if defined(WCHAR_T_IS_UTF32)
174      L" ! \x10401" L"abc 123",
175#elif defined(WCHAR_T_IS_UTF16)
176      L" ! \xd801\xdc01" L"abc 123",
177#else
178#error wchar_t should be either UTF-16 or UTF-32
179#endif
180      LEFT_TO_RIGHT },
181   };
182
183  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
184    EXPECT_EQ(cases[i].direction,
185              GetStringDirection(WideToUTF16(cases[i].text)));
186}
187
188TEST_F(RTLTest, WrapPathWithLTRFormatting) {
189  const wchar_t* cases[] = {
190    // Test common path, such as "c:\foo\bar".
191    L"c:/foo/bar",
192    // Test path with file name, such as "c:\foo\bar\test.jpg".
193    L"c:/foo/bar/test.jpg",
194    // Test path ending with punctuation, such as "c:\(foo)\bar.".
195    L"c:/(foo)/bar.",
196    // Test path ending with separator, such as "c:\foo\bar\".
197    L"c:/foo/bar/",
198    // Test path with RTL character.
199    L"c:/\x05d0",
200    // Test path with 2 level RTL directory names.
201    L"c:/\x05d0/\x0622",
202    // Test path with mixed RTL/LTR directory names and ending with punctuation.
203    L"c:/\x05d0/\x0622/(foo)/b.a.r.",
204    // Test path without driver name, such as "/foo/bar/test/jpg".
205    L"/foo/bar/test.jpg",
206    // Test path start with current directory, such as "./foo".
207    L"./foo",
208    // Test path start with parent directory, such as "../foo/bar.jpg".
209    L"../foo/bar.jpg",
210    // Test absolute path, such as "//foo/bar.jpg".
211    L"//foo/bar.jpg",
212    // Test path with mixed RTL/LTR directory names.
213    L"c:/foo/\x05d0/\x0622/\x05d1.jpg",
214    // Test empty path.
215    L""
216  };
217
218  for (size_t i = 0; i < arraysize(cases); ++i) {
219    FilePath path;
220#if defined(OS_WIN)
221    std::wstring win_path(cases[i]);
222    std::replace(win_path.begin(), win_path.end(), '/', '\\');
223    path = FilePath(win_path);
224    std::wstring wrapped_expected =
225        std::wstring(L"\x202a") + win_path + L"\x202c";
226#else
227    path = FilePath(base::SysWideToNativeMB(cases[i]));
228    std::wstring wrapped_expected =
229        std::wstring(L"\x202a") + cases[i] + L"\x202c";
230#endif
231    string16 localized_file_path_string;
232    WrapPathWithLTRFormatting(path, &localized_file_path_string);
233
234    std::wstring wrapped_actual = UTF16ToWide(localized_file_path_string);
235    EXPECT_EQ(wrapped_expected, wrapped_actual);
236  }
237}
238
239TEST_F(RTLTest, WrapString) {
240  const wchar_t* cases[] = {
241    L" . ",
242    L"abc",
243    L"a" L"\x5d0\x5d1",
244    L"a" L"\x5d1" L"b",
245    L"\x5d0\x5d1\x5d2",
246    L"\x5d0\x5d1" L"a",
247    L"\x5d0" L"a" L"\x5d1",
248  };
249
250  const bool was_rtl = IsRTL();
251
252  for (size_t i = 0; i < 2; ++i) {
253    // Toggle the application default text direction (to try each direction).
254    SetRTL(!IsRTL());
255
256    string16 empty;
257    WrapStringWithLTRFormatting(&empty);
258    EXPECT_TRUE(empty.empty());
259    WrapStringWithRTLFormatting(&empty);
260    EXPECT_TRUE(empty.empty());
261
262    for (size_t i = 0; i < arraysize(cases); ++i) {
263      string16 input = WideToUTF16(cases[i]);
264      string16 ltr_wrap = input;
265      WrapStringWithLTRFormatting(&ltr_wrap);
266      EXPECT_EQ(ltr_wrap[0], kLeftToRightEmbeddingMark);
267      EXPECT_EQ(ltr_wrap.substr(1, ltr_wrap.length() - 2), input);
268      EXPECT_EQ(ltr_wrap[ltr_wrap.length() -1], kPopDirectionalFormatting);
269
270      string16 rtl_wrap = input;
271      WrapStringWithRTLFormatting(&rtl_wrap);
272      EXPECT_EQ(rtl_wrap[0], kRightToLeftEmbeddingMark);
273      EXPECT_EQ(rtl_wrap.substr(1, rtl_wrap.length() - 2), input);
274      EXPECT_EQ(rtl_wrap[rtl_wrap.length() -1], kPopDirectionalFormatting);
275    }
276  }
277
278  EXPECT_EQ(was_rtl, IsRTL());
279}
280
281TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
282  struct {
283    const wchar_t* path;
284    bool wrap_ltr;
285    bool wrap_rtl;
286  } cases[] = {
287    { L"test",                   false, true },
288    { L"test.html",              false, true },
289    { L"\x05d0\x05d1\x05d2",     true,  true },
290    { L"\x05d0\x05d1\x05d2.txt", true,  true },
291    { L"\x05d0" L"abc",          true,  true },
292    { L"\x05d0" L"abc.txt",      true,  true },
293    { L"abc\x05d0\x05d1",        false, true },
294    { L"abc\x05d0\x05d1.jpg",    false, true },
295  };
296
297  const bool was_rtl = IsRTL();
298
299  for (size_t i = 0; i < 2; ++i) {
300    // Toggle the application default text direction (to try each direction).
301    SetRTL(!IsRTL());
302    for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
303      string16 input = WideToUTF16(cases[i].path);
304      string16 output = GetDisplayStringInLTRDirectionality(input);
305      // Test the expected wrapping behavior for the current UI directionality.
306      if (IsRTL() ? cases[i].wrap_rtl : cases[i].wrap_ltr)
307        EXPECT_NE(output, input);
308      else
309        EXPECT_EQ(output, input);
310    }
311  }
312
313  EXPECT_EQ(was_rtl, IsRTL());
314}
315
316TEST_F(RTLTest, GetTextDirection) {
317  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar"));
318  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ar_EG"));
319  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he"));
320  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("he_IL"));
321  // iw is an obsolete code for Hebrew.
322  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("iw"));
323  // Although we're not yet localized to Farsi and Urdu, we
324  // do have the text layout direction information for them.
325  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("fa"));
326  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("ur"));
327#if 0
328  // Enable these when we include the minimal locale data for Azerbaijani
329  // written in Arabic and Dhivehi. At the moment, our copy of
330  // ICU data does not have entries for them.
331  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("az_Arab"));
332  // Dhivehi that uses Thaana script.
333  EXPECT_EQ(RIGHT_TO_LEFT, GetTextDirectionForLocale("dv"));
334#endif
335  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("en"));
336  // Chinese in China with '-'.
337  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("zh-CN"));
338  // Filipino : 3-letter code
339  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("fil"));
340  // Russian
341  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ru"));
342  // Japanese that uses multiple scripts
343  EXPECT_EQ(LEFT_TO_RIGHT, GetTextDirectionForLocale("ja"));
344}
345
346TEST_F(RTLTest, UnadjustStringForLocaleDirection) {
347  // These test strings are borrowed from WrapPathWithLTRFormatting
348  const wchar_t* cases[] = {
349    L"foo bar",
350    L"foo \x05d0 bar",
351    L"\x05d0 foo bar",
352    L"!foo \x05d0 bar",
353    L",\x05d0 foo bar",
354    L"\x202a \x05d0 foo  bar",
355    L"\x202d \x05d0 foo  bar",
356    L"\x202b foo \x05d0 bar",
357    L"\x202e foo \x05d0 bar",
358    L"\x0622 foo \x05d0 bar",
359  };
360
361  const bool was_rtl = IsRTL();
362
363  for (size_t i = 0; i < 2; ++i) {
364    // Toggle the application default text direction (to try each direction).
365    SetRTL(!IsRTL());
366
367    for (size_t i = 0; i < arraysize(cases); ++i) {
368      string16 test_case = WideToUTF16(cases[i]);
369      string16 adjusted_string = test_case;
370
371      if (!AdjustStringForLocaleDirection(&adjusted_string))
372        continue;
373
374      EXPECT_NE(test_case, adjusted_string);
375      EXPECT_TRUE(UnadjustStringForLocaleDirection(&adjusted_string));
376      EXPECT_EQ(test_case, adjusted_string) << " for test case [" << test_case
377                                            << "] with IsRTL() == " << IsRTL();
378    }
379  }
380
381  EXPECT_EQ(was_rtl, IsRTL());
382}
383
384}  // namespace i18n
385}  // namespace base
386