rtl_unittest.cc revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/rtl.h"
6
7#include "base/file_path.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversions.h"
10#include "testing/gtest/include/gtest/gtest.h"
11#include "testing/platform_test.h"
12
13namespace {
14base::i18n::TextDirection GetTextDirection(const char* locale_name) {
15  return base::i18n::GetTextDirectionForLocale(locale_name);
16}
17}
18
19class RTLTest : public PlatformTest {
20};
21
22TEST_F(RTLTest, GetFirstStrongCharacterDirection) {
23  // Test pure LTR string.
24  std::wstring string(L"foo bar");
25  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
26            base::i18n::GetFirstStrongCharacterDirection(string));
27
28  // Test bidi string in which the first character with strong directionality
29  // is a character with type L.
30  string.assign(L"foo \x05d0 bar");
31  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
32            base::i18n::GetFirstStrongCharacterDirection(string));
33
34  // Test bidi string in which the first character with strong directionality
35  // is a character with type R.
36  string.assign(L"\x05d0 foo bar");
37  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT,
38            base::i18n::GetFirstStrongCharacterDirection(string));
39
40  // Test bidi string which starts with a character with weak directionality
41  // and in which the first character with strong directionality is a character
42  // with type L.
43  string.assign(L"!foo \x05d0 bar");
44  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
45            base::i18n::GetFirstStrongCharacterDirection(string));
46
47  // Test bidi string which starts with a character with weak directionality
48  // and in which the first character with strong directionality is a character
49  // with type R.
50  string.assign(L",\x05d0 foo bar");
51  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT,
52            base::i18n::GetFirstStrongCharacterDirection(string));
53
54  // Test bidi string in which the first character with strong directionality
55  // is a character with type LRE.
56  string.assign(L"\x202a \x05d0 foo  bar");
57  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
58            base::i18n::GetFirstStrongCharacterDirection(string));
59
60  // Test bidi string in which the first character with strong directionality
61  // is a character with type LRO.
62  string.assign(L"\x202d \x05d0 foo  bar");
63  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
64            base::i18n::GetFirstStrongCharacterDirection(string));
65
66  // Test bidi string in which the first character with strong directionality
67  // is a character with type RLE.
68  string.assign(L"\x202b foo \x05d0 bar");
69  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT,
70            base::i18n::GetFirstStrongCharacterDirection(string));
71
72  // Test bidi string in which the first character with strong directionality
73  // is a character with type RLO.
74  string.assign(L"\x202e foo \x05d0 bar");
75  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT,
76            base::i18n::GetFirstStrongCharacterDirection(string));
77
78  // Test bidi string in which the first character with strong directionality
79  // is a character with type AL.
80  string.assign(L"\x0622 foo \x05d0 bar");
81  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT,
82            base::i18n::GetFirstStrongCharacterDirection(string));
83
84  // Test a string without strong directionality characters.
85  string.assign(L",!.{}");
86  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
87            base::i18n::GetFirstStrongCharacterDirection(string));
88
89  // Test empty string.
90  string.assign(L"");
91  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
92            base::i18n::GetFirstStrongCharacterDirection(string));
93
94  // Test characters in non-BMP (e.g. Phoenician letters. Please refer to
95  // http://demo.icu-project.org/icu-bin/ubrowse?scr=151&b=10910 for more
96  // information).
97#if defined(WCHAR_T_IS_UTF32)
98  string.assign(L" ! \x10910" L"abc 123");
99#elif defined(WCHAR_T_IS_UTF16)
100  string.assign(L" ! \xd802\xdd10" L"abc 123");
101#else
102#error wchar_t should be either UTF-16 or UTF-32
103#endif
104  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT,
105            base::i18n::GetFirstStrongCharacterDirection(string));
106
107#if defined(WCHAR_T_IS_UTF32)
108  string.assign(L" ! \x10401" L"abc 123");
109#elif defined(WCHAR_T_IS_UTF16)
110  string.assign(L" ! \xd801\xdc01" L"abc 123");
111#else
112#error wchar_t should be either UTF-16 or UTF-32
113#endif
114  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT,
115            base::i18n::GetFirstStrongCharacterDirection(string));
116}
117
118typedef struct {
119  std::wstring path;
120  std::wstring wrapped_path;
121} PathAndWrappedPath;
122
123TEST_F(RTLTest, WrapPathWithLTRFormatting) {
124  std::wstring kSeparator;
125  kSeparator.push_back(static_cast<wchar_t>(FilePath::kSeparators[0]));
126  const PathAndWrappedPath test_data[] = {
127    // Test common path, such as "c:\foo\bar".
128    { L"c:" + kSeparator + L"foo" + kSeparator + L"bar",
129      L"\x202a"L"c:" + kSeparator + L"foo" + kSeparator +
130      L"bar\x202c"
131    },
132    // Test path with file name, such as "c:\foo\bar\test.jpg".
133    { L"c:" + kSeparator + L"foo" + kSeparator + L"bar" + kSeparator +
134      L"test.jpg",
135      L"\x202a"L"c:" + kSeparator + L"foo" + kSeparator +
136      L"bar" + kSeparator + L"test.jpg\x202c"
137    },
138    // Test path ending with punctuation, such as "c:\(foo)\bar.".
139    { L"c:" + kSeparator + L"(foo)" + kSeparator + L"bar.",
140      L"\x202a"L"c:" + kSeparator + L"(foo)" + kSeparator +
141      L"bar.\x202c"
142    },
143    // Test path ending with separator, such as "c:\foo\bar\".
144    { L"c:" + kSeparator + L"foo" + kSeparator + L"bar" + kSeparator,
145      L"\x202a"L"c:" + kSeparator + L"foo" + kSeparator +
146      L"bar" + kSeparator + L"\x202c",
147    },
148    // Test path with RTL character.
149    { L"c:" + kSeparator + L"\x05d0",
150      L"\x202a"L"c:" + kSeparator + L"\x05d0\x202c",
151    },
152    // Test path with 2 level RTL directory names.
153    { L"c:" + kSeparator + L"\x05d0" + kSeparator + L"\x0622",
154      L"\x202a"L"c:" + kSeparator + L"\x05d0" + kSeparator +
155      L"\x0622\x202c",
156    },
157    // Test path with mixed RTL/LTR directory names and ending with punctuation.
158    { L"c:" + kSeparator + L"\x05d0" + kSeparator + L"\x0622" + kSeparator +
159      L"(foo)" + kSeparator + L"b.a.r.",
160      L"\x202a"L"c:" + kSeparator + L"\x05d0" + kSeparator +
161      L"\x0622" + kSeparator + L"(foo)" + kSeparator +
162      L"b.a.r.\x202c",
163    },
164    // Test path without driver name, such as "/foo/bar/test/jpg".
165    { kSeparator + L"foo" + kSeparator + L"bar" + kSeparator + L"test.jpg",
166      L"\x202a" + kSeparator + L"foo" + kSeparator + L"bar" +
167      kSeparator + L"test.jpg" + L"\x202c"
168    },
169    // Test path start with current directory, such as "./foo".
170    { L"." + kSeparator + L"foo",
171      L"\x202a"L"." + kSeparator + L"foo" + L"\x202c"
172    },
173    // Test path start with parent directory, such as "../foo/bar.jpg".
174    { L".." + kSeparator + L"foo" + kSeparator + L"bar.jpg",
175      L"\x202a"L".." + kSeparator + L"foo" + kSeparator +
176      L"bar.jpg" + L"\x202c"
177    },
178    // Test absolute path, such as "//foo/bar.jpg".
179    { kSeparator + kSeparator + L"foo" + kSeparator + L"bar.jpg",
180      L"\x202a" + kSeparator + kSeparator + L"foo" + kSeparator +
181      L"bar.jpg" + L"\x202c"
182    },
183    // Test path with mixed RTL/LTR directory names.
184    { L"c:" + kSeparator + L"foo" + kSeparator + L"\x05d0" + kSeparator +
185      L"\x0622" + kSeparator + L"\x05d1.jpg",
186      L"\x202a"L"c:" + kSeparator + L"foo" + kSeparator + L"\x05d0" +
187      kSeparator + L"\x0622" + kSeparator + L"\x05d1.jpg" + L"\x202c",
188    },
189    // Test empty path.
190    { L"",
191      L"\x202a\x202c"
192    }
193  };
194  for (unsigned int i = 0; i < arraysize(test_data); ++i) {
195    string16 localized_file_path_string;
196    FilePath path = FilePath::FromWStringHack(test_data[i].path);
197    base::i18n::WrapPathWithLTRFormatting(path, &localized_file_path_string);
198    std::wstring wrapped_path = UTF16ToWide(localized_file_path_string);
199    EXPECT_EQ(wrapped_path, test_data[i].wrapped_path);
200  }
201}
202
203typedef struct  {
204    std::wstring raw_filename;
205    std::wstring display_string;
206} StringAndLTRString;
207
208TEST_F(RTLTest, GetDisplayStringInLTRDirectionality) {
209  const StringAndLTRString test_data[] = {
210    { L"test", L"\x202atest\x202c" },
211    { L"test.html", L"\x202atest.html\x202c" },
212    { L"\x05d0\x05d1\x05d2", L"\x202a\x05d0\x05d1\x05d2\x202c" },
213    { L"\x05d0\x05d1\x05d2.txt", L"\x202a\x05d0\x05d1\x05d2.txt\x202c" },
214    { L"\x05d0"L"abc", L"\x202a\x05d0"L"abc\x202c" },
215    { L"\x05d0"L"abc.txt", L"\x202a\x05d0"L"abc.txt\x202c" },
216    { L"abc\x05d0\x05d1", L"\x202a"L"abc\x05d0\x05d1\x202c" },
217    { L"abc\x05d0\x05d1.jpg", L"\x202a"L"abc\x05d0\x05d1.jpg\x202c" },
218  };
219  for (unsigned int i = 0; i < arraysize(test_data); ++i) {
220    string16 input = WideToUTF16(test_data[i].raw_filename);
221    string16 expected = base::i18n::GetDisplayStringInLTRDirectionality(input);
222    if (base::i18n::IsRTL())
223      EXPECT_EQ(expected, WideToUTF16(test_data[i].display_string));
224    else
225      EXPECT_EQ(expected, input);
226  }
227}
228
229TEST_F(RTLTest, GetTextDirection) {
230  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("ar"));
231  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("ar_EG"));
232  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("he"));
233  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("he_IL"));
234  // iw is an obsolete code for Hebrew.
235  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("iw"));
236  // Although we're not yet localized to Farsi and Urdu, we
237  // do have the text layout direction information for them.
238  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("fa"));
239  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("ur"));
240#if 0
241  // Enable these when we include the minimal locale data for Azerbaijani
242  // written in Arabic and Dhivehi. At the moment, our copy of
243  // ICU data does not have entries for them.
244  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("az_Arab"));
245  // Dhivehi that uses Thaana script.
246  EXPECT_EQ(base::i18n::RIGHT_TO_LEFT, GetTextDirection("dv"));
247#endif
248  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT, GetTextDirection("en"));
249  // Chinese in China with '-'.
250  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT, GetTextDirection("zh-CN"));
251  // Filipino : 3-letter code
252  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT, GetTextDirection("fil"));
253  // Russian
254  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT, GetTextDirection("ru"));
255  // Japanese that uses multiple scripts
256  EXPECT_EQ(base::i18n::LEFT_TO_RIGHT, GetTextDirection("ja"));
257}
258
259