string_search_unittest.cc revision 5821806d5e7f356e8fa4b058a389a808ea183019
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <string>
6
7#include "base/i18n/rtl.h"
8#include "base/i18n/string_search.h"
9#include "base/string16.h"
10#include "base/utf_string_conversions.h"
11#include "testing/gtest/include/gtest/gtest.h"
12#include "unicode/usearch.h"
13
14namespace base {
15namespace i18n {
16
17// Note on setting default locale for testing: The current default locale on
18// the Mac trybot is en_US_POSIX, with which primary-level collation strength
19// string search is case-sensitive, when normally it should be
20// case-insensitive. In other locales (including en_US which English speakers
21// in the U.S. use), this search would be case-insensitive as expected.
22
23TEST(StringSearchTest, ASCII) {
24  std::string default_locale(uloc_getDefault());
25  bool locale_is_posix = (default_locale == "en_US_POSIX");
26  if (locale_is_posix)
27    SetICUDefaultLocale("en_US");
28
29  size_t index = 0;
30  size_t length = 0;
31
32  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
33      ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"), &index, &length));
34  EXPECT_EQ(0U, index);
35  EXPECT_EQ(5U, length);
36
37  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
38      ASCIIToUTF16("h    e l l o"), ASCIIToUTF16("h   e l l o"),
39      &index, &length));
40
41  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
42      ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"), &index, &length));
43  EXPECT_EQ(4U, index);
44  EXPECT_EQ(6U, length);
45
46  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
47      ASCIIToUTF16("searching within empty string"), string16(),
48      &index, &length));
49
50  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
51      string16(), ASCIIToUTF16("searching for empty string"), &index, &length));
52  EXPECT_EQ(0U, index);
53  EXPECT_EQ(0U, length);
54
55  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
56      ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"),
57      &index, &length));
58  EXPECT_EQ(0U, index);
59  EXPECT_EQ(18U, length);
60
61  if (locale_is_posix)
62    SetICUDefaultLocale(default_locale.data());
63}
64
65TEST(StringSearchTest, UnicodeLocaleIndependent) {
66  // Base characters
67  const string16 e_base = WideToUTF16(L"e");
68  const string16 E_base = WideToUTF16(L"E");
69  const string16 a_base = WideToUTF16(L"a");
70
71  // Composed characters
72  const string16 e_with_acute_accent = WideToUTF16(L"\u00e9");
73  const string16 E_with_acute_accent = WideToUTF16(L"\u00c9");
74  const string16 e_with_grave_accent = WideToUTF16(L"\u00e8");
75  const string16 E_with_grave_accent = WideToUTF16(L"\u00c8");
76  const string16 a_with_acute_accent = WideToUTF16(L"\u00e1");
77
78  // Decomposed characters
79  const string16 e_with_acute_combining_mark = WideToUTF16(L"e\u0301");
80  const string16 E_with_acute_combining_mark = WideToUTF16(L"E\u0301");
81  const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300");
82  const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300");
83  const string16 a_with_acute_combining_mark = WideToUTF16(L"a\u0301");
84
85  std::string default_locale(uloc_getDefault());
86  bool locale_is_posix = (default_locale == "en_US_POSIX");
87  if (locale_is_posix)
88    SetICUDefaultLocale("en_US");
89
90  size_t index = 0;
91  size_t length = 0;
92
93  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
94      e_base, e_with_acute_accent, &index, &length));
95  EXPECT_EQ(0U, index);
96  EXPECT_EQ(e_with_acute_accent.size(), length);
97
98  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
99      e_with_acute_accent, e_base, &index, &length));
100  EXPECT_EQ(0U, index);
101  EXPECT_EQ(e_base.size(), length);
102
103  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
104      e_base, e_with_acute_combining_mark, &index, &length));
105  EXPECT_EQ(0U, index);
106  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
107
108  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
109      e_with_acute_combining_mark, e_base, &index, &length));
110  EXPECT_EQ(0U, index);
111  EXPECT_EQ(e_base.size(), length);
112
113  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
114      e_with_acute_combining_mark, e_with_acute_accent,
115      &index, &length));
116  EXPECT_EQ(0U, index);
117  EXPECT_EQ(e_with_acute_accent.size(), length);
118
119  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
120      e_with_acute_accent, e_with_acute_combining_mark,
121      &index, &length));
122  EXPECT_EQ(0U, index);
123  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
124
125  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
126      e_with_acute_combining_mark, e_with_grave_combining_mark,
127      &index, &length));
128  EXPECT_EQ(0U, index);
129  EXPECT_EQ(e_with_grave_combining_mark.size(), length);
130
131  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
132      e_with_grave_combining_mark, e_with_acute_combining_mark,
133      &index, &length));
134  EXPECT_EQ(0U, index);
135  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
136
137  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
138      e_with_acute_combining_mark, e_with_grave_accent, &index, &length));
139  EXPECT_EQ(0U, index);
140  EXPECT_EQ(e_with_grave_accent.size(), length);
141
142  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
143      e_with_grave_accent, e_with_acute_combining_mark, &index, &length));
144  EXPECT_EQ(0U, index);
145  EXPECT_EQ(e_with_acute_combining_mark.size(), length);
146
147  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
148      E_with_acute_accent, e_with_acute_accent, &index, &length));
149  EXPECT_EQ(0U, index);
150  EXPECT_EQ(e_with_acute_accent.size(), length);
151
152  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
153      E_with_grave_accent, e_with_acute_accent, &index, &length));
154  EXPECT_EQ(0U, index);
155  EXPECT_EQ(e_with_acute_accent.size(), length);
156
157  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
158      E_with_acute_combining_mark, e_with_grave_accent, &index, &length));
159  EXPECT_EQ(0U, index);
160  EXPECT_EQ(e_with_grave_accent.size(), length);
161
162  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
163      E_with_grave_combining_mark, e_with_acute_accent, &index, &length));
164  EXPECT_EQ(0U, index);
165  EXPECT_EQ(e_with_acute_accent.size(), length);
166
167  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
168      E_base, e_with_grave_accent, &index, &length));
169  EXPECT_EQ(0U, index);
170  EXPECT_EQ(e_with_grave_accent.size(), length);
171
172  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
173      a_with_acute_accent, e_with_acute_accent, &index, &length));
174
175  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
176      a_with_acute_combining_mark, e_with_acute_combining_mark,
177      &index, &length));
178
179  if (locale_is_posix)
180    SetICUDefaultLocale(default_locale.data());
181}
182
183TEST(StringSearchTest, UnicodeLocaleDependent) {
184  // Base characters
185  const string16 a_base = WideToUTF16(L"a");
186
187  // Composed characters
188  const string16 a_with_ring = WideToUTF16(L"\u00e5");
189
190  EXPECT_TRUE(StringSearchIgnoringCaseAndAccents(
191      a_base, a_with_ring, NULL, NULL));
192
193  const char* default_locale = uloc_getDefault();
194  SetICUDefaultLocale("da");
195
196  EXPECT_FALSE(StringSearchIgnoringCaseAndAccents(
197      a_base, a_with_ring, NULL, NULL));
198
199  SetICUDefaultLocale(default_locale);
200}
201
202}  // namespace i18n
203}  // namespace base
204