1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_tokenizer.h"
6
7#include "testing/gtest/include/gtest/gtest.h"
8
9using std::string;
10
11namespace base {
12
13namespace {
14
15TEST(StringTokenizerTest, Simple) {
16  string input = "this is a test";
17  StringTokenizer t(input, " ");
18
19  EXPECT_TRUE(t.GetNext());
20  EXPECT_EQ(string("this"), t.token());
21
22  EXPECT_TRUE(t.GetNext());
23  EXPECT_EQ(string("is"), t.token());
24
25  EXPECT_TRUE(t.GetNext());
26  EXPECT_EQ(string("a"), t.token());
27
28  EXPECT_TRUE(t.GetNext());
29  EXPECT_EQ(string("test"), t.token());
30
31  EXPECT_FALSE(t.GetNext());
32}
33
34TEST(StringTokenizerTest, Reset) {
35  string input = "this is a test";
36  StringTokenizer t(input, " ");
37
38  for (int i = 0; i < 2; ++i) {
39    EXPECT_TRUE(t.GetNext());
40    EXPECT_EQ(string("this"), t.token());
41
42    EXPECT_TRUE(t.GetNext());
43    EXPECT_EQ(string("is"), t.token());
44
45    EXPECT_TRUE(t.GetNext());
46    EXPECT_EQ(string("a"), t.token());
47
48    EXPECT_TRUE(t.GetNext());
49    EXPECT_EQ(string("test"), t.token());
50
51    EXPECT_FALSE(t.GetNext());
52    t.Reset();
53  }
54}
55
56TEST(StringTokenizerTest, RetDelims) {
57  string input = "this is a test";
58  StringTokenizer t(input, " ");
59  t.set_options(StringTokenizer::RETURN_DELIMS);
60
61  EXPECT_TRUE(t.GetNext());
62  EXPECT_EQ(string("this"), t.token());
63
64  EXPECT_TRUE(t.GetNext());
65  EXPECT_EQ(string(" "), t.token());
66
67  EXPECT_TRUE(t.GetNext());
68  EXPECT_EQ(string("is"), t.token());
69
70  EXPECT_TRUE(t.GetNext());
71  EXPECT_EQ(string(" "), t.token());
72
73  EXPECT_TRUE(t.GetNext());
74  EXPECT_EQ(string("a"), t.token());
75
76  EXPECT_TRUE(t.GetNext());
77  EXPECT_EQ(string(" "), t.token());
78
79  EXPECT_TRUE(t.GetNext());
80  EXPECT_EQ(string("test"), t.token());
81
82  EXPECT_FALSE(t.GetNext());
83}
84
85TEST(StringTokenizerTest, ManyDelims) {
86  string input = "this: is, a-test";
87  StringTokenizer t(input, ": ,-");
88
89  EXPECT_TRUE(t.GetNext());
90  EXPECT_EQ(string("this"), t.token());
91
92  EXPECT_TRUE(t.GetNext());
93  EXPECT_EQ(string("is"), t.token());
94
95  EXPECT_TRUE(t.GetNext());
96  EXPECT_EQ(string("a"), t.token());
97
98  EXPECT_TRUE(t.GetNext());
99  EXPECT_EQ(string("test"), t.token());
100
101  EXPECT_FALSE(t.GetNext());
102}
103
104TEST(StringTokenizerTest, ParseHeader) {
105  string input = "Content-Type: text/html ; charset=UTF-8";
106  StringTokenizer t(input, ": ;=");
107  t.set_options(StringTokenizer::RETURN_DELIMS);
108
109  EXPECT_TRUE(t.GetNext());
110  EXPECT_FALSE(t.token_is_delim());
111  EXPECT_EQ(string("Content-Type"), t.token());
112
113  EXPECT_TRUE(t.GetNext());
114  EXPECT_TRUE(t.token_is_delim());
115  EXPECT_EQ(string(":"), t.token());
116
117  EXPECT_TRUE(t.GetNext());
118  EXPECT_TRUE(t.token_is_delim());
119  EXPECT_EQ(string(" "), t.token());
120
121  EXPECT_TRUE(t.GetNext());
122  EXPECT_FALSE(t.token_is_delim());
123  EXPECT_EQ(string("text/html"), t.token());
124
125  EXPECT_TRUE(t.GetNext());
126  EXPECT_TRUE(t.token_is_delim());
127  EXPECT_EQ(string(" "), t.token());
128
129  EXPECT_TRUE(t.GetNext());
130  EXPECT_TRUE(t.token_is_delim());
131  EXPECT_EQ(string(";"), t.token());
132
133  EXPECT_TRUE(t.GetNext());
134  EXPECT_TRUE(t.token_is_delim());
135  EXPECT_EQ(string(" "), t.token());
136
137  EXPECT_TRUE(t.GetNext());
138  EXPECT_FALSE(t.token_is_delim());
139  EXPECT_EQ(string("charset"), t.token());
140
141  EXPECT_TRUE(t.GetNext());
142  EXPECT_TRUE(t.token_is_delim());
143  EXPECT_EQ(string("="), t.token());
144
145  EXPECT_TRUE(t.GetNext());
146  EXPECT_FALSE(t.token_is_delim());
147  EXPECT_EQ(string("UTF-8"), t.token());
148
149  EXPECT_FALSE(t.GetNext());
150  EXPECT_FALSE(t.token_is_delim());
151}
152
153TEST(StringTokenizerTest, ParseQuotedString) {
154  string input = "foo bar 'hello world' baz";
155  StringTokenizer t(input, " ");
156  t.set_quote_chars("'");
157
158  EXPECT_TRUE(t.GetNext());
159  EXPECT_EQ(string("foo"), t.token());
160
161  EXPECT_TRUE(t.GetNext());
162  EXPECT_EQ(string("bar"), t.token());
163
164  EXPECT_TRUE(t.GetNext());
165  EXPECT_EQ(string("'hello world'"), t.token());
166
167  EXPECT_TRUE(t.GetNext());
168  EXPECT_EQ(string("baz"), t.token());
169
170  EXPECT_FALSE(t.GetNext());
171}
172
173TEST(StringTokenizerTest, ParseQuotedString_Malformed) {
174  string input = "bar 'hello wo";
175  StringTokenizer t(input, " ");
176  t.set_quote_chars("'");
177
178  EXPECT_TRUE(t.GetNext());
179  EXPECT_EQ(string("bar"), t.token());
180
181  EXPECT_TRUE(t.GetNext());
182  EXPECT_EQ(string("'hello wo"), t.token());
183
184  EXPECT_FALSE(t.GetNext());
185}
186
187TEST(StringTokenizerTest, ParseQuotedString_Multiple) {
188  string input = "bar 'hel\"lo\" wo' baz\"";
189  StringTokenizer t(input, " ");
190  t.set_quote_chars("'\"");
191
192  EXPECT_TRUE(t.GetNext());
193  EXPECT_EQ(string("bar"), t.token());
194
195  EXPECT_TRUE(t.GetNext());
196  EXPECT_EQ(string("'hel\"lo\" wo'"), t.token());
197
198  EXPECT_TRUE(t.GetNext());
199  EXPECT_EQ(string("baz\""), t.token());
200
201  EXPECT_FALSE(t.GetNext());
202}
203
204TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes) {
205  string input = "foo 'don\\'t do that'";
206  StringTokenizer t(input, " ");
207  t.set_quote_chars("'");
208
209  EXPECT_TRUE(t.GetNext());
210  EXPECT_EQ(string("foo"), t.token());
211
212  EXPECT_TRUE(t.GetNext());
213  EXPECT_EQ(string("'don\\'t do that'"), t.token());
214
215  EXPECT_FALSE(t.GetNext());
216}
217
218TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes2) {
219  string input = "foo='a, b', bar";
220  StringTokenizer t(input, ", ");
221  t.set_quote_chars("'");
222
223  EXPECT_TRUE(t.GetNext());
224  EXPECT_EQ(string("foo='a, b'"), t.token());
225
226  EXPECT_TRUE(t.GetNext());
227  EXPECT_EQ(string("bar"), t.token());
228
229  EXPECT_FALSE(t.GetNext());
230}
231
232}  // namespace
233
234}  // namespace base
235