break_iterator_unittest.cc revision 21d179b334e59e9a3bfcaed4c4430bef1bc5759d
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/i18n/break_iterator.h"
6
7#include "base/string_piece.h"
8#include "base/string_util.h"
9#include "base/utf_string_conversions.h"
10#include "testing/gtest/include/gtest/gtest.h"
11
12TEST(BreakIteratorTest, BreakWordEmpty) {
13  string16 empty;
14  base::BreakIterator iter(&empty, base::BreakIterator::BREAK_WORD);
15  ASSERT_TRUE(iter.Init());
16  EXPECT_FALSE(iter.Advance());
17  EXPECT_FALSE(iter.IsWord());
18  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
19  EXPECT_FALSE(iter.IsWord());
20}
21
22TEST(BreakIteratorTest, BreakWord) {
23  string16 space(UTF8ToUTF16(" "));
24  string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
25  base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
26  ASSERT_TRUE(iter.Init());
27  EXPECT_TRUE(iter.Advance());
28  EXPECT_FALSE(iter.IsWord());
29  EXPECT_EQ(space, iter.GetString());
30  EXPECT_TRUE(iter.Advance());
31  EXPECT_TRUE(iter.IsWord());
32  EXPECT_EQ(UTF8ToUTF16("foo"), iter.GetString());
33  EXPECT_TRUE(iter.Advance());
34  EXPECT_FALSE(iter.IsWord());
35  EXPECT_EQ(space, iter.GetString());
36  EXPECT_TRUE(iter.Advance());
37  EXPECT_TRUE(iter.IsWord());
38  EXPECT_EQ(UTF8ToUTF16("bar"), iter.GetString());
39  EXPECT_TRUE(iter.Advance());
40  EXPECT_FALSE(iter.IsWord());
41  EXPECT_EQ(UTF8ToUTF16("!"), iter.GetString());
42  EXPECT_TRUE(iter.Advance());
43  EXPECT_FALSE(iter.IsWord());
44  EXPECT_EQ(space, iter.GetString());
45  EXPECT_TRUE(iter.Advance());
46  EXPECT_FALSE(iter.IsWord());
47  EXPECT_EQ(UTF8ToUTF16("\n"), iter.GetString());
48  EXPECT_TRUE(iter.Advance());
49  EXPECT_TRUE(iter.IsWord());
50  EXPECT_EQ(UTF8ToUTF16("pouet"), iter.GetString());
51  EXPECT_TRUE(iter.Advance());
52  EXPECT_FALSE(iter.IsWord());
53  EXPECT_EQ(space, iter.GetString());
54  EXPECT_TRUE(iter.Advance());
55  EXPECT_TRUE(iter.IsWord());
56  EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
57  EXPECT_FALSE(iter.Advance());
58  EXPECT_FALSE(iter.IsWord());
59  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
60  EXPECT_FALSE(iter.IsWord());
61}
62
63TEST(BreakIteratorTest, BreakWide16) {
64  // Two greek words separated by space.
65  const string16 str(WideToUTF16(
66      L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
67      L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
68  const string16 word1(str.substr(0, 10));
69  const string16 word2(str.substr(11, 5));
70  base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
71  ASSERT_TRUE(iter.Init());
72  EXPECT_TRUE(iter.Advance());
73  EXPECT_TRUE(iter.IsWord());
74  EXPECT_EQ(word1, iter.GetString());
75  EXPECT_TRUE(iter.Advance());
76  EXPECT_FALSE(iter.IsWord());
77  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
78  EXPECT_TRUE(iter.Advance());
79  EXPECT_TRUE(iter.IsWord());
80  EXPECT_EQ(word2, iter.GetString());
81  EXPECT_FALSE(iter.Advance());
82  EXPECT_FALSE(iter.IsWord());
83  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
84  EXPECT_FALSE(iter.IsWord());
85}
86
87TEST(BreakIteratorTest, BreakWide32) {
88  // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
89  const char* very_wide_char = "\xF0\x9D\x92\x9C";
90  const string16 str(
91      UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
92  const string16 very_wide_word(str.substr(0, 2));
93
94  base::BreakIterator iter(&str, base::BreakIterator::BREAK_WORD);
95  ASSERT_TRUE(iter.Init());
96  EXPECT_TRUE(iter.Advance());
97  EXPECT_TRUE(iter.IsWord());
98  EXPECT_EQ(very_wide_word, iter.GetString());
99  EXPECT_TRUE(iter.Advance());
100  EXPECT_FALSE(iter.IsWord());
101  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
102  EXPECT_TRUE(iter.Advance());
103  EXPECT_TRUE(iter.IsWord());
104  EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
105  EXPECT_FALSE(iter.Advance());
106  EXPECT_FALSE(iter.IsWord());
107  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
108  EXPECT_FALSE(iter.IsWord());
109}
110
111TEST(BreakIteratorTest, BreakSpaceEmpty) {
112  string16 empty;
113  base::BreakIterator iter(&empty, base::BreakIterator::BREAK_SPACE);
114  ASSERT_TRUE(iter.Init());
115  EXPECT_FALSE(iter.Advance());
116  EXPECT_FALSE(iter.IsWord());
117  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
118  EXPECT_FALSE(iter.IsWord());
119}
120
121TEST(BreakIteratorTest, BreakSpace) {
122  string16 str(UTF8ToUTF16(" foo bar! \npouet boom"));
123  base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
124  ASSERT_TRUE(iter.Init());
125  EXPECT_TRUE(iter.Advance());
126  EXPECT_FALSE(iter.IsWord());
127  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
128  EXPECT_TRUE(iter.Advance());
129  EXPECT_FALSE(iter.IsWord());
130  EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
131  EXPECT_TRUE(iter.Advance());
132  EXPECT_FALSE(iter.IsWord());
133  EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
134  EXPECT_TRUE(iter.Advance());
135  EXPECT_FALSE(iter.IsWord());
136  EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
137  EXPECT_TRUE(iter.Advance());
138  EXPECT_FALSE(iter.IsWord());
139  EXPECT_EQ(UTF8ToUTF16("boom"), iter.GetString());
140  EXPECT_FALSE(iter.Advance());
141  EXPECT_FALSE(iter.IsWord());
142  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
143  EXPECT_FALSE(iter.IsWord());
144}
145
146TEST(BreakIteratorTest, BreakSpaceSP) {
147  string16 str(UTF8ToUTF16(" foo bar! \npouet boom "));
148  base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
149  ASSERT_TRUE(iter.Init());
150  EXPECT_TRUE(iter.Advance());
151  EXPECT_FALSE(iter.IsWord());
152  EXPECT_EQ(UTF8ToUTF16(" "), iter.GetString());
153  EXPECT_TRUE(iter.Advance());
154  EXPECT_FALSE(iter.IsWord());
155  EXPECT_EQ(UTF8ToUTF16("foo "), iter.GetString());
156  EXPECT_TRUE(iter.Advance());
157  EXPECT_FALSE(iter.IsWord());
158  EXPECT_EQ(UTF8ToUTF16("bar! \n"), iter.GetString());
159  EXPECT_TRUE(iter.Advance());
160  EXPECT_FALSE(iter.IsWord());
161  EXPECT_EQ(UTF8ToUTF16("pouet "), iter.GetString());
162  EXPECT_TRUE(iter.Advance());
163  EXPECT_FALSE(iter.IsWord());
164  EXPECT_EQ(UTF8ToUTF16("boom "), iter.GetString());
165  EXPECT_FALSE(iter.Advance());
166  EXPECT_FALSE(iter.IsWord());
167  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
168  EXPECT_FALSE(iter.IsWord());
169}
170
171TEST(BreakIteratorTest, BreakSpacekWide16) {
172  // Two Greek words.
173  const string16 str(WideToUTF16(
174      L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
175      L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2"));
176  const string16 word1(str.substr(0, 11));
177  const string16 word2(str.substr(11, 5));
178  base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
179  ASSERT_TRUE(iter.Init());
180  EXPECT_TRUE(iter.Advance());
181  EXPECT_FALSE(iter.IsWord());
182  EXPECT_EQ(word1, iter.GetString());
183  EXPECT_TRUE(iter.Advance());
184  EXPECT_FALSE(iter.IsWord());
185  EXPECT_EQ(word2, iter.GetString());
186  EXPECT_FALSE(iter.Advance());
187  EXPECT_FALSE(iter.IsWord());
188  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
189  EXPECT_FALSE(iter.IsWord());
190}
191
192TEST(BreakIteratorTest, BreakSpaceWide32) {
193  // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
194  const char* very_wide_char = "\xF0\x9D\x92\x9C";
195  const string16 str(
196      UTF8ToUTF16(StringPrintf("%s a", very_wide_char)));
197  const string16 very_wide_word(str.substr(0, 3));
198
199  base::BreakIterator iter(&str, base::BreakIterator::BREAK_SPACE);
200  ASSERT_TRUE(iter.Init());
201  EXPECT_TRUE(iter.Advance());
202  EXPECT_FALSE(iter.IsWord());
203  EXPECT_EQ(very_wide_word, iter.GetString());
204  EXPECT_TRUE(iter.Advance());
205  EXPECT_FALSE(iter.IsWord());
206  EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
207  EXPECT_FALSE(iter.Advance());
208  EXPECT_FALSE(iter.IsWord());
209  EXPECT_FALSE(iter.Advance());  // Test unexpected advance after end.
210  EXPECT_FALSE(iter.IsWord());
211}
212
213TEST(BreakIteratorTest, BreakLineEmpty) {
214  string16 empty;
215  base::BreakIterator iter(&empty, base::BreakIterator::BREAK_NEWLINE);
216  ASSERT_TRUE(iter.Init());
217  EXPECT_FALSE(iter.Advance());
218  EXPECT_FALSE(iter.IsWord());
219  EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
220  EXPECT_FALSE(iter.IsWord());
221}
222
223TEST(BreakIteratorTest, BreakLine) {
224  string16 nl(UTF8ToUTF16("\n"));
225  string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom"));
226  base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
227  ASSERT_TRUE(iter.Init());
228  EXPECT_TRUE(iter.Advance());
229  EXPECT_FALSE(iter.IsWord());
230  EXPECT_EQ(nl, iter.GetString());
231  EXPECT_TRUE(iter.Advance());
232  EXPECT_FALSE(iter.IsWord());
233  EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
234  EXPECT_TRUE(iter.Advance());
235  EXPECT_FALSE(iter.IsWord());
236  EXPECT_EQ(nl, iter.GetString());
237  EXPECT_TRUE(iter.Advance());
238  EXPECT_FALSE(iter.IsWord());
239  EXPECT_EQ(UTF8ToUTF16("pouet boom"), iter.GetString());
240  EXPECT_FALSE(iter.Advance());
241  EXPECT_FALSE(iter.IsWord());
242  EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
243  EXPECT_FALSE(iter.IsWord());
244}
245
246TEST(BreakIteratorTest, BreakLineNL) {
247  string16 nl(UTF8ToUTF16("\n"));
248  string16 str(UTF8ToUTF16("\nfoo bar!\n\npouet boom\n"));
249  base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
250  ASSERT_TRUE(iter.Init());
251  EXPECT_TRUE(iter.Advance());
252  EXPECT_FALSE(iter.IsWord());
253  EXPECT_EQ(nl, iter.GetString());
254  EXPECT_TRUE(iter.Advance());
255  EXPECT_FALSE(iter.IsWord());
256  EXPECT_EQ(UTF8ToUTF16("foo bar!\n"), iter.GetString());
257  EXPECT_TRUE(iter.Advance());
258  EXPECT_FALSE(iter.IsWord());
259  EXPECT_EQ(nl, iter.GetString());
260  EXPECT_TRUE(iter.Advance());
261  EXPECT_FALSE(iter.IsWord());
262  EXPECT_EQ(UTF8ToUTF16("pouet boom\n"), iter.GetString());
263  EXPECT_FALSE(iter.Advance());
264  EXPECT_FALSE(iter.IsWord());
265  EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
266  EXPECT_FALSE(iter.IsWord());
267}
268
269TEST(BreakIteratorTest, BreakLineWide16) {
270  // Two Greek words separated by newline.
271  const string16 str(WideToUTF16(
272      L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
273      L"\x03bf\x03c2\x000a\x0399\x03c3\x03c4\x03cc\x03c2"));
274  const string16 line1(str.substr(0, 11));
275  const string16 line2(str.substr(11, 5));
276  base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
277  ASSERT_TRUE(iter.Init());
278  EXPECT_TRUE(iter.Advance());
279  EXPECT_FALSE(iter.IsWord());
280  EXPECT_EQ(line1, iter.GetString());
281  EXPECT_TRUE(iter.Advance());
282  EXPECT_FALSE(iter.IsWord());
283  EXPECT_EQ(line2, iter.GetString());
284  EXPECT_FALSE(iter.Advance());
285  EXPECT_FALSE(iter.IsWord());
286  EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
287  EXPECT_FALSE(iter.IsWord());
288}
289
290TEST(BreakIteratorTest, BreakLineWide32) {
291  // U+1D49C MATHEMATICAL SCRIPT CAPITAL A
292  const char* very_wide_char = "\xF0\x9D\x92\x9C";
293  const string16 str(
294      UTF8ToUTF16(StringPrintf("%s\na", very_wide_char)));
295  const string16 very_wide_line(str.substr(0, 3));
296  base::BreakIterator iter(&str, base::BreakIterator::BREAK_NEWLINE);
297  ASSERT_TRUE(iter.Init());
298  EXPECT_TRUE(iter.Advance());
299  EXPECT_FALSE(iter.IsWord());
300  EXPECT_EQ(very_wide_line, iter.GetString());
301  EXPECT_TRUE(iter.Advance());
302  EXPECT_FALSE(iter.IsWord());
303  EXPECT_EQ(UTF8ToUTF16("a"), iter.GetString());
304  EXPECT_FALSE(iter.Advance());
305  EXPECT_FALSE(iter.IsWord());
306  EXPECT_FALSE(iter.Advance());   // Test unexpected advance after end.
307  EXPECT_FALSE(iter.IsWord());
308}
309