1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "components/omnibox/autocomplete_input.h"
6
7#include "base/basictypes.h"
8#include "base/strings/string16.h"
9#include "base/strings/utf_string_conversions.h"
10#include "build/build_config.h"
11#include "components/metrics/proto/omnibox_event.pb.h"
12#include "components/metrics/proto/omnibox_input_type.pb.h"
13#include "components/omnibox/test_scheme_classifier.h"
14#include "testing/gtest/include/gtest/gtest.h"
15#include "url/url_parse.h"
16
17using base::ASCIIToUTF16;
18using metrics::OmniboxEventProto;
19
20TEST(AutocompleteInputTest, InputType) {
21  struct test_data {
22    const base::string16 input;
23    const metrics::OmniboxInputType::Type type;
24  } input_cases[] = {
25    { base::string16(), metrics::OmniboxInputType::INVALID },
26    { ASCIIToUTF16("?"), metrics::OmniboxInputType::FORCED_QUERY },
27    { ASCIIToUTF16("?foo"), metrics::OmniboxInputType::FORCED_QUERY },
28    { ASCIIToUTF16("?foo bar"), metrics::OmniboxInputType::FORCED_QUERY },
29    { ASCIIToUTF16("?http://foo.com/bar"),
30      metrics::OmniboxInputType::FORCED_QUERY },
31    { ASCIIToUTF16("foo"), metrics::OmniboxInputType::UNKNOWN },
32    { ASCIIToUTF16("localhost"), metrics::OmniboxInputType::URL },
33    { ASCIIToUTF16("foo.c"), metrics::OmniboxInputType::UNKNOWN },
34    { ASCIIToUTF16("foo.com"), metrics::OmniboxInputType::URL },
35    { ASCIIToUTF16("-foo.com"), metrics::OmniboxInputType::URL },
36    { ASCIIToUTF16("foo-.com"), metrics::OmniboxInputType::URL },
37    { ASCIIToUTF16("foo_.com"), metrics::OmniboxInputType::UNKNOWN },
38    { ASCIIToUTF16("foo.-com"), metrics::OmniboxInputType::QUERY },
39    { ASCIIToUTF16("foo/"), metrics::OmniboxInputType::URL },
40    { ASCIIToUTF16("foo/bar"), metrics::OmniboxInputType::UNKNOWN },
41    { ASCIIToUTF16("foo/bar%00"), metrics::OmniboxInputType::QUERY },
42    { ASCIIToUTF16("foo/bar/"), metrics::OmniboxInputType::URL },
43    { ASCIIToUTF16("foo/bar baz\\"), metrics::OmniboxInputType::URL },
44    { ASCIIToUTF16("foo.com/bar"), metrics::OmniboxInputType::URL },
45    { ASCIIToUTF16("foo;bar"), metrics::OmniboxInputType::UNKNOWN },
46    { ASCIIToUTF16("foo/bar baz"), metrics::OmniboxInputType::UNKNOWN },
47    { ASCIIToUTF16("foo bar.com"), metrics::OmniboxInputType::QUERY },
48    { ASCIIToUTF16("foo bar"), metrics::OmniboxInputType::QUERY },
49    { ASCIIToUTF16("foo+bar"), metrics::OmniboxInputType::QUERY },
50    { ASCIIToUTF16("foo+bar.com"), metrics::OmniboxInputType::UNKNOWN },
51    { ASCIIToUTF16("\"foo:bar\""), metrics::OmniboxInputType::QUERY },
52    { ASCIIToUTF16("link:foo.com"), metrics::OmniboxInputType::UNKNOWN },
53    { ASCIIToUTF16("foo:81"), metrics::OmniboxInputType::URL },
54    { ASCIIToUTF16("localhost:8080"), metrics::OmniboxInputType::URL },
55    { ASCIIToUTF16("www.foo.com:81"), metrics::OmniboxInputType::URL },
56    { ASCIIToUTF16("foo.com:123456"), metrics::OmniboxInputType::QUERY },
57    { ASCIIToUTF16("foo.com:abc"), metrics::OmniboxInputType::QUERY },
58    { ASCIIToUTF16("1.2.3.4:abc"), metrics::OmniboxInputType::QUERY },
59    { ASCIIToUTF16("user@foo.com"), metrics::OmniboxInputType::UNKNOWN },
60    { ASCIIToUTF16("user@foo/z"), metrics::OmniboxInputType::URL },
61    { ASCIIToUTF16("user@foo/z z"), metrics::OmniboxInputType::URL },
62    { ASCIIToUTF16("user@foo.com/z"), metrics::OmniboxInputType::URL },
63    { ASCIIToUTF16("user:pass@"), metrics::OmniboxInputType::UNKNOWN },
64    { ASCIIToUTF16("user:pass@!foo.com"), metrics::OmniboxInputType::UNKNOWN },
65    { ASCIIToUTF16("user:pass@foo"), metrics::OmniboxInputType::URL },
66    { ASCIIToUTF16("user:pass@foo.c"), metrics::OmniboxInputType::URL },
67    { ASCIIToUTF16("user:pass@foo.com"), metrics::OmniboxInputType::URL },
68    { ASCIIToUTF16("user:pass@foo.com:81"), metrics::OmniboxInputType::URL },
69    { ASCIIToUTF16("user:pass@foo:81"), metrics::OmniboxInputType::URL },
70    { ASCIIToUTF16("1.2"), metrics::OmniboxInputType::UNKNOWN },
71    { ASCIIToUTF16("1.2/45"), metrics::OmniboxInputType::UNKNOWN },
72    { ASCIIToUTF16("1.2:45"), metrics::OmniboxInputType::UNKNOWN },
73    { ASCIIToUTF16("user@1.2:45"), metrics::OmniboxInputType::URL },
74    { ASCIIToUTF16("user@foo:45"), metrics::OmniboxInputType::URL },
75    { ASCIIToUTF16("user:pass@1.2:45"), metrics::OmniboxInputType::URL },
76    { ASCIIToUTF16("host?query"), metrics::OmniboxInputType::UNKNOWN },
77    { ASCIIToUTF16("host#ref"), metrics::OmniboxInputType::UNKNOWN },
78    { ASCIIToUTF16("host/path?query"), metrics::OmniboxInputType::URL },
79    { ASCIIToUTF16("host/path#ref"), metrics::OmniboxInputType::URL },
80    { ASCIIToUTF16("en.wikipedia.org/wiki/Jim Beam"),
81      metrics::OmniboxInputType::URL },
82    // In Chrome itself, mailto: will get handled by ShellExecute, but in
83    // unittest mode, we don't have the data loaded in the external protocol
84    // handler to know this.
85    // { ASCIIToUTF16("mailto:abuse@foo.com"), metrics::OmniboxInputType::URL },
86    { ASCIIToUTF16("view-source:http://www.foo.com/"),
87      metrics::OmniboxInputType::URL },
88    { ASCIIToUTF16("javascript:alert(\"Hi there\");"),
89      metrics::OmniboxInputType::URL },
90#if defined(OS_WIN)
91    { ASCIIToUTF16("C:\\Program Files"), metrics::OmniboxInputType::URL },
92    { ASCIIToUTF16("\\\\Server\\Folder\\File"),
93      metrics::OmniboxInputType::URL },
94#endif  // defined(OS_WIN)
95    { ASCIIToUTF16("http:foo"), metrics::OmniboxInputType::URL },
96    { ASCIIToUTF16("http://foo"), metrics::OmniboxInputType::URL },
97    { ASCIIToUTF16("http://foo.c"), metrics::OmniboxInputType::URL },
98    { ASCIIToUTF16("http://foo.com"), metrics::OmniboxInputType::URL },
99    { ASCIIToUTF16("http://foo_bar.com"), metrics::OmniboxInputType::URL },
100    { ASCIIToUTF16("http://foo/bar%00"), metrics::OmniboxInputType::QUERY },
101    { ASCIIToUTF16("http://foo/bar baz"), metrics::OmniboxInputType::URL },
102    { ASCIIToUTF16("http://-foo.com"), metrics::OmniboxInputType::URL },
103    { ASCIIToUTF16("http://foo-.com"), metrics::OmniboxInputType::URL },
104    { ASCIIToUTF16("http://foo_.com"), metrics::OmniboxInputType::UNKNOWN },
105    { ASCIIToUTF16("http://foo.-com"), metrics::OmniboxInputType::UNKNOWN },
106    { ASCIIToUTF16("http://_foo_.com"), metrics::OmniboxInputType::UNKNOWN },
107    { ASCIIToUTF16("http://foo.com:abc"), metrics::OmniboxInputType::QUERY },
108    { ASCIIToUTF16("http://foo.com:123456"), metrics::OmniboxInputType::QUERY },
109    { ASCIIToUTF16("http://1.2.3.4:abc"), metrics::OmniboxInputType::QUERY },
110    { ASCIIToUTF16("http:user@foo.com"), metrics::OmniboxInputType::URL },
111    { ASCIIToUTF16("http://user@foo.com"), metrics::OmniboxInputType::URL },
112    { ASCIIToUTF16("http:user:pass@foo.com"), metrics::OmniboxInputType::URL },
113    { ASCIIToUTF16("http://user:pass@foo.com"),
114      metrics::OmniboxInputType::URL },
115    { ASCIIToUTF16("http://1.2"), metrics::OmniboxInputType::URL },
116    { ASCIIToUTF16("http://1.2/45"), metrics::OmniboxInputType::URL },
117    { ASCIIToUTF16("http:ps/2 games"), metrics::OmniboxInputType::URL },
118    { ASCIIToUTF16("https://foo.com"), metrics::OmniboxInputType::URL },
119    { ASCIIToUTF16("127.0.0.1"), metrics::OmniboxInputType::URL },
120    { ASCIIToUTF16("127.0.1"), metrics::OmniboxInputType::UNKNOWN },
121    { ASCIIToUTF16("127.0.1/"), metrics::OmniboxInputType::URL },
122    { ASCIIToUTF16("browser.tabs.closeButtons"),
123      metrics::OmniboxInputType::UNKNOWN },
124    { base::WideToUTF16(L"\u6d4b\u8bd5"), metrics::OmniboxInputType::UNKNOWN },
125    { ASCIIToUTF16("[2001:]"), metrics::OmniboxInputType::QUERY },
126    { ASCIIToUTF16("[2001:dB8::1]"), metrics::OmniboxInputType::URL },
127    { ASCIIToUTF16("192.168.0.256"), metrics::OmniboxInputType::QUERY },
128    { ASCIIToUTF16("[foo.com]"), metrics::OmniboxInputType::QUERY },
129    { ASCIIToUTF16("filesystem:http://a.com/t/bar"),
130      metrics::OmniboxInputType::URL },
131    { ASCIIToUTF16("filesystem:http://a.com/"),
132      metrics::OmniboxInputType::QUERY },
133    { ASCIIToUTF16("filesystem:file://"), metrics::OmniboxInputType::QUERY },
134    { ASCIIToUTF16("filesystem:http"), metrics::OmniboxInputType::QUERY },
135    { ASCIIToUTF16("filesystem:"), metrics::OmniboxInputType::QUERY },
136    { ASCIIToUTF16("chrome-search://"), metrics::OmniboxInputType::QUERY },
137    { ASCIIToUTF16("chrome-devtools:"), metrics::OmniboxInputType::QUERY },
138    { ASCIIToUTF16("about://f;"), metrics::OmniboxInputType::QUERY },
139    { ASCIIToUTF16("://w"), metrics::OmniboxInputType::QUERY },
140    { ASCIIToUTF16(":w"), metrics::OmniboxInputType::QUERY },
141  };
142
143  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
144    SCOPED_TRACE(input_cases[i].input);
145    AutocompleteInput input(input_cases[i].input, base::string16::npos,
146                            base::string16(), GURL(),
147                            OmniboxEventProto::INVALID_SPEC, true, false, true,
148                            true, TestSchemeClassifier());
149    EXPECT_EQ(input_cases[i].type, input.type());
150  }
151}
152
153TEST(AutocompleteInputTest, InputTypeWithDesiredTLD) {
154  struct test_data {
155    const base::string16 input;
156    const metrics::OmniboxInputType::Type type;
157    const std::string spec;  // Unused if not a URL.
158  } input_cases[] = {
159    { ASCIIToUTF16("401k"), metrics::OmniboxInputType::URL,
160        std::string("http://www.401k.com/") },
161    { ASCIIToUTF16("999999999999999"), metrics::OmniboxInputType::URL,
162        std::string("http://www.999999999999999.com/") },
163    { ASCIIToUTF16("x@y"), metrics::OmniboxInputType::URL,
164        std::string("http://x@www.y.com/") },
165    { ASCIIToUTF16("y/z z"), metrics::OmniboxInputType::URL,
166        std::string("http://www.y.com/z%20z") },
167    { ASCIIToUTF16("abc.com"), metrics::OmniboxInputType::URL,
168        std::string("http://abc.com/") },
169    { ASCIIToUTF16("foo bar"), metrics::OmniboxInputType::QUERY,
170        std::string() },
171  };
172
173  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
174    SCOPED_TRACE(input_cases[i].input);
175    AutocompleteInput input(input_cases[i].input, base::string16::npos,
176                            ASCIIToUTF16("com"), GURL(),
177                            OmniboxEventProto::INVALID_SPEC, true, false, true,
178                            true, TestSchemeClassifier());
179    EXPECT_EQ(input_cases[i].type, input.type());
180    if (input_cases[i].type == metrics::OmniboxInputType::URL)
181      EXPECT_EQ(input_cases[i].spec, input.canonicalized_url().spec());
182  }
183}
184
185// This tests for a regression where certain input in the omnibox caused us to
186// crash. As long as the test completes without crashing, we're fine.
187TEST(AutocompleteInputTest, InputCrash) {
188  AutocompleteInput input(base::WideToUTF16(L"\uff65@s"), base::string16::npos,
189                          base::string16(), GURL(),
190                          OmniboxEventProto::INVALID_SPEC, true, false,
191                          true, true, TestSchemeClassifier());
192}
193
194TEST(AutocompleteInputTest, ParseForEmphasizeComponent) {
195  using url::Component;
196  Component kInvalidComponent(0, -1);
197  struct test_data {
198    const base::string16 input;
199    const Component scheme;
200    const Component host;
201  } input_cases[] = {
202    { base::string16(), kInvalidComponent, kInvalidComponent },
203    { ASCIIToUTF16("?"), kInvalidComponent, kInvalidComponent },
204    { ASCIIToUTF16("?http://foo.com/bar"), kInvalidComponent,
205        kInvalidComponent },
206    { ASCIIToUTF16("foo/bar baz"), kInvalidComponent, Component(0, 3) },
207    { ASCIIToUTF16("http://foo/bar baz"), Component(0, 4), Component(7, 3) },
208    { ASCIIToUTF16("link:foo.com"), Component(0, 4), kInvalidComponent },
209    { ASCIIToUTF16("www.foo.com:81"), kInvalidComponent, Component(0, 11) },
210    { base::WideToUTF16(L"\u6d4b\u8bd5"), kInvalidComponent, Component(0, 2) },
211    { ASCIIToUTF16("view-source:http://www.foo.com/"), Component(12, 4),
212        Component(19, 11) },
213    { ASCIIToUTF16("view-source:https://example.com/"),
214      Component(12, 5), Component(20, 11) },
215    { ASCIIToUTF16("view-source:www.foo.com"), kInvalidComponent,
216        Component(12, 11) },
217    { ASCIIToUTF16("view-source:"), Component(0, 11), kInvalidComponent },
218    { ASCIIToUTF16("view-source:garbage"), kInvalidComponent,
219        Component(12, 7) },
220    { ASCIIToUTF16("view-source:http://http://foo"), Component(12, 4),
221        Component(19, 4) },
222    { ASCIIToUTF16("view-source:view-source:http://example.com/"),
223        Component(12, 11), kInvalidComponent }
224  };
225
226  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
227    SCOPED_TRACE(input_cases[i].input);
228    Component scheme, host;
229    AutocompleteInput::ParseForEmphasizeComponents(input_cases[i].input,
230                                                   TestSchemeClassifier(),
231                                                   &scheme,
232                                                   &host);
233    AutocompleteInput input(input_cases[i].input, base::string16::npos,
234                            base::string16(), GURL(),
235                            OmniboxEventProto::INVALID_SPEC, true,
236                            false, true, true, TestSchemeClassifier());
237    EXPECT_EQ(input_cases[i].scheme.begin, scheme.begin);
238    EXPECT_EQ(input_cases[i].scheme.len, scheme.len);
239    EXPECT_EQ(input_cases[i].host.begin, host.begin);
240    EXPECT_EQ(input_cases[i].host.len, host.len);
241  }
242}
243
244TEST(AutocompleteInputTest, InputTypeWithCursorPosition) {
245  struct test_data {
246    const base::string16 input;
247    size_t cursor_position;
248    const base::string16 normalized_input;
249    size_t normalized_cursor_position;
250  } input_cases[] = {
251    { ASCIIToUTF16("foo bar"), base::string16::npos,
252      ASCIIToUTF16("foo bar"), base::string16::npos },
253
254    // regular case, no changes.
255    { ASCIIToUTF16("foo bar"), 3, ASCIIToUTF16("foo bar"), 3 },
256
257    // extra leading space.
258    { ASCIIToUTF16("  foo bar"), 3, ASCIIToUTF16("foo bar"), 1 },
259    { ASCIIToUTF16("      foo bar"), 3, ASCIIToUTF16("foo bar"), 0 },
260    { ASCIIToUTF16("      foo bar   "), 2, ASCIIToUTF16("foo bar   "), 0 },
261
262    // forced query.
263    { ASCIIToUTF16("?foo bar"), 2, ASCIIToUTF16("foo bar"), 1 },
264    { ASCIIToUTF16("  ?foo bar"), 4, ASCIIToUTF16("foo bar"), 1 },
265    { ASCIIToUTF16("?  foo bar"), 4, ASCIIToUTF16("foo bar"), 1 },
266    { ASCIIToUTF16("  ?  foo bar"), 6, ASCIIToUTF16("foo bar"), 1 },
267  };
268
269  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(input_cases); ++i) {
270    SCOPED_TRACE(input_cases[i].input);
271    AutocompleteInput input(input_cases[i].input,
272                            input_cases[i].cursor_position,
273                            base::string16(), GURL(),
274                            OmniboxEventProto::INVALID_SPEC,
275                            true, false, true, true, TestSchemeClassifier());
276    EXPECT_EQ(input_cases[i].normalized_input, input.text());
277    EXPECT_EQ(input_cases[i].normalized_cursor_position,
278              input.cursor_position());
279  }
280}
281