keyword_provider_unittest.cc revision 03b57e008b61dfcb1fbad3aea950ae0e001748b0
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/command_line.h"
6#include "base/message_loop/message_loop.h"
7#include "base/strings/utf_string_conversions.h"
8#include "components/metrics/proto/omnibox_event.pb.h"
9#include "components/omnibox/autocomplete_match.h"
10#include "components/omnibox/autocomplete_scheme_classifier.h"
11#include "components/omnibox/keyword_provider.h"
12#include "components/search_engines/search_engines_switches.h"
13#include "components/search_engines/template_url.h"
14#include "components/search_engines/template_url_service.h"
15#include "testing/gtest/include/gtest/gtest.h"
16#include "url/gurl.h"
17
18using base::ASCIIToUTF16;
19
20namespace {
21
22class TestingSchemeClassifier : public AutocompleteSchemeClassifier {
23 public:
24  virtual metrics::OmniboxInputType::Type GetInputTypeForScheme(
25      const std::string& scheme) const OVERRIDE {
26    if (net::URLRequest::IsHandledProtocol(scheme))
27      return metrics::OmniboxInputType::URL;
28    return metrics::OmniboxInputType::INVALID;
29  }
30};
31
32}  // namespace
33
34class KeywordProviderTest : public testing::Test {
35 protected:
36  template<class ResultType>
37  struct MatchType {
38    const ResultType member;
39    bool allowed_to_be_default_match;
40  };
41
42  template<class ResultType>
43  struct TestData {
44    const base::string16 input;
45    const size_t num_results;
46    const MatchType<ResultType> output[3];
47  };
48
49  KeywordProviderTest() : kw_provider_(NULL) { }
50  virtual ~KeywordProviderTest() { }
51
52  virtual void SetUp();
53  virtual void TearDown();
54
55  template<class ResultType>
56  void RunTest(TestData<ResultType>* keyword_cases,
57               int num_cases,
58               ResultType AutocompleteMatch::* member);
59
60 protected:
61  static const TemplateURLService::Initializer kTestData[];
62
63  scoped_refptr<KeywordProvider> kw_provider_;
64  scoped_ptr<TemplateURLService> model_;
65};
66
67// static
68const TemplateURLService::Initializer KeywordProviderTest::kTestData[] = {
69  { "aa", "aa.com?foo={searchTerms}", "aa" },
70  { "aaaa", "http://aaaa/?aaaa=1&b={searchTerms}&c", "aaaa" },
71  { "aaaaa", "{searchTerms}", "aaaaa" },
72  { "ab", "bogus URL {searchTerms}", "ab" },
73  { "weasel", "weasel{searchTerms}weasel", "weasel" },
74  { "www", " +%2B?={searchTerms}foo ", "www" },
75  { "nonsub", "http://nonsubstituting-keyword.com/", "nonsub" },
76  { "z", "{searchTerms}=z", "z" },
77};
78
79void KeywordProviderTest::SetUp() {
80  model_.reset(new TemplateURLService(kTestData, arraysize(kTestData)));
81  kw_provider_ = new KeywordProvider(NULL, model_.get());
82}
83
84void KeywordProviderTest::TearDown() {
85  model_.reset();
86  kw_provider_ = NULL;
87}
88
89template<class ResultType>
90void KeywordProviderTest::RunTest(TestData<ResultType>* keyword_cases,
91                                  int num_cases,
92                                  ResultType AutocompleteMatch::* member) {
93  ACMatches matches;
94  for (int i = 0; i < num_cases; ++i) {
95    SCOPED_TRACE(keyword_cases[i].input);
96    AutocompleteInput input(keyword_cases[i].input, base::string16::npos,
97                            base::string16(), GURL(),
98                            metrics::OmniboxEventProto::INVALID_SPEC, true,
99                            false, true, true, TestingSchemeClassifier());
100    kw_provider_->Start(input, false);
101    EXPECT_TRUE(kw_provider_->done());
102    matches = kw_provider_->matches();
103    ASSERT_EQ(keyword_cases[i].num_results, matches.size());
104    for (size_t j = 0; j < matches.size(); ++j) {
105      EXPECT_EQ(keyword_cases[i].output[j].member, matches[j].*member);
106      EXPECT_EQ(keyword_cases[i].output[j].allowed_to_be_default_match,
107                matches[j].allowed_to_be_default_match);
108    }
109  }
110}
111
112TEST_F(KeywordProviderTest, Edit) {
113  const MatchType<base::string16> kEmptyMatch = { base::string16(), false };
114  TestData<base::string16> edit_cases[] = {
115    // Searching for a nonexistent prefix should give nothing.
116    { ASCIIToUTF16("Not Found"), 0,
117      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
118    { ASCIIToUTF16("aaaaaNot Found"), 0,
119      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
120
121    // Check that tokenization only collapses whitespace between first tokens,
122    // no-query-input cases have a space appended, and action is not escaped.
123    { ASCIIToUTF16("z"), 1,
124      { { ASCIIToUTF16("z "), true }, kEmptyMatch, kEmptyMatch } },
125    { ASCIIToUTF16("z    \t"), 1,
126      { { ASCIIToUTF16("z "), true }, kEmptyMatch, kEmptyMatch } },
127
128    // Check that exact, substituting keywords with a verbatim search term
129    // don't generate a result.  (These are handled by SearchProvider.)
130    { ASCIIToUTF16("z foo"), 0,
131      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
132    { ASCIIToUTF16("z   a   b   c++"), 0,
133      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
134
135    // Matches should be limited to three, and sorted in quality order, not
136    // alphabetical.
137    { ASCIIToUTF16("aaa"), 2,
138      { { ASCIIToUTF16("aaaa "), false },
139        { ASCIIToUTF16("aaaaa "), false },
140        kEmptyMatch } },
141    { ASCIIToUTF16("a 1 2 3"), 3,
142     { { ASCIIToUTF16("aa 1 2 3"), false },
143       { ASCIIToUTF16("ab 1 2 3"), false },
144       { ASCIIToUTF16("aaaa 1 2 3"), false } } },
145    { ASCIIToUTF16("www.a"), 3,
146      { { ASCIIToUTF16("aa "), false },
147        { ASCIIToUTF16("ab "), false },
148        { ASCIIToUTF16("aaaa "), false } } },
149    // Exact matches should prevent returning inexact matches.  Also, the
150    // verbatim query for this keyword match should not be returned.  (It's
151    // returned by SearchProvider.)
152    { ASCIIToUTF16("aaaa foo"), 0,
153      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
154    { ASCIIToUTF16("www.aaaa foo"), 0,
155      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
156
157    // Clean up keyword input properly.  "http" and "https" are the only
158    // allowed schemes.
159    { ASCIIToUTF16("www"), 1,
160      { { ASCIIToUTF16("www "), true }, kEmptyMatch, kEmptyMatch }},
161    { ASCIIToUTF16("www."), 0,
162      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
163    { ASCIIToUTF16("www.w w"), 2,
164      { { ASCIIToUTF16("www w"), false },
165        { ASCIIToUTF16("weasel w"), false },
166        kEmptyMatch } },
167    { ASCIIToUTF16("http://www"), 1,
168      { { ASCIIToUTF16("www "), true }, kEmptyMatch, kEmptyMatch } },
169    { ASCIIToUTF16("http://www."), 0,
170      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
171    { ASCIIToUTF16("ftp: blah"), 0,
172      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
173    { ASCIIToUTF16("mailto:z"), 0,
174      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
175    { ASCIIToUTF16("ftp://z"), 0,
176      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
177    { ASCIIToUTF16("https://z"), 1,
178      { { ASCIIToUTF16("z "), true }, kEmptyMatch, kEmptyMatch } },
179
180    // Non-substituting keywords, whether typed fully or not
181    // should not add a space.
182    { ASCIIToUTF16("nonsu"), 1,
183      { { ASCIIToUTF16("nonsub"), false }, kEmptyMatch, kEmptyMatch } },
184    { ASCIIToUTF16("nonsub"), 1,
185      { { ASCIIToUTF16("nonsub"), true }, kEmptyMatch, kEmptyMatch } },
186  };
187
188  RunTest<base::string16>(edit_cases, arraysize(edit_cases),
189                    &AutocompleteMatch::fill_into_edit);
190}
191
192TEST_F(KeywordProviderTest, URL) {
193  const MatchType<GURL> kEmptyMatch = { GURL(), false };
194  TestData<GURL> url_cases[] = {
195    // No query input -> empty destination URL.
196    { ASCIIToUTF16("z"), 1,
197      { { GURL(), true }, kEmptyMatch, kEmptyMatch } },
198    { ASCIIToUTF16("z    \t"), 1,
199      { { GURL(), true }, kEmptyMatch, kEmptyMatch } },
200
201    // Check that tokenization only collapses whitespace between first tokens
202    // and query input, but not rest of URL, is escaped.
203    { ASCIIToUTF16("w  bar +baz"), 2,
204      { { GURL(" +%2B?=bar+%2Bbazfoo "), false },
205        { GURL("bar+%2Bbaz=z"), false },
206        kEmptyMatch } },
207
208    // Substitution should work with various locations of the "%s".
209    { ASCIIToUTF16("aaa 1a2b"), 2,
210      { { GURL("http://aaaa/?aaaa=1&b=1a2b&c"), false },
211        { GURL("1a2b"), false },
212        kEmptyMatch } },
213    { ASCIIToUTF16("a 1 2 3"), 3,
214      { { GURL("aa.com?foo=1+2+3"), false },
215        { GURL("bogus URL 1+2+3"), false },
216        { GURL("http://aaaa/?aaaa=1&b=1+2+3&c"), false } } },
217    { ASCIIToUTF16("www.w w"), 2,
218      { { GURL(" +%2B?=wfoo "), false },
219        { GURL("weaselwweasel"), false },
220        kEmptyMatch } },
221  };
222
223  RunTest<GURL>(url_cases, arraysize(url_cases),
224                &AutocompleteMatch::destination_url);
225}
226
227TEST_F(KeywordProviderTest, Contents) {
228  const MatchType<base::string16> kEmptyMatch = { base::string16(), false };
229  TestData<base::string16> contents_cases[] = {
230    // No query input -> substitute "<enter query>" into contents.
231    { ASCIIToUTF16("z"), 1,
232      { { ASCIIToUTF16("Search z for <enter query>"), true },
233        kEmptyMatch, kEmptyMatch } },
234    { ASCIIToUTF16("z    \t"), 1,
235      { { ASCIIToUTF16("Search z for <enter query>"), true },
236        kEmptyMatch, kEmptyMatch } },
237
238    // Exact keyword matches with remaining text should return nothing.
239    { ASCIIToUTF16("www.www www"), 0,
240      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
241    { ASCIIToUTF16("z   a   b   c++"), 0,
242      { kEmptyMatch, kEmptyMatch, kEmptyMatch } },
243
244    // Exact keyword matches with remaining text when the keyword is an
245    // extension keyword should return something.  This is tested in
246    // chrome/browser/extensions/api/omnibox/omnibox_apitest.cc's
247    // in OmniboxApiTest's Basic test.
248
249    // Substitution should work with various locations of the "%s".
250    { ASCIIToUTF16("aaa"), 2,
251      { { ASCIIToUTF16("Search aaaa for <enter query>"), false },
252        { ASCIIToUTF16("Search aaaaa for <enter query>"), false },
253        kEmptyMatch} },
254    { ASCIIToUTF16("www.w w"), 2,
255      { { ASCIIToUTF16("Search www for w"), false },
256        { ASCIIToUTF16("Search weasel for w"), false },
257        kEmptyMatch } },
258    // Also, check that tokenization only collapses whitespace between first
259    // tokens and contents are not escaped or unescaped.
260    { ASCIIToUTF16("a   1 2+ 3"), 3,
261      { { ASCIIToUTF16("Search aa for 1 2+ 3"), false },
262        { ASCIIToUTF16("Search ab for 1 2+ 3"), false },
263        { ASCIIToUTF16("Search aaaa for 1 2+ 3"), false } } },
264  };
265
266  RunTest<base::string16>(contents_cases, arraysize(contents_cases),
267                    &AutocompleteMatch::contents);
268}
269
270TEST_F(KeywordProviderTest, AddKeyword) {
271  TemplateURLData data;
272  data.short_name = ASCIIToUTF16("Test");
273  base::string16 keyword(ASCIIToUTF16("foo"));
274  data.SetKeyword(keyword);
275  data.SetURL("http://www.google.com/foo?q={searchTerms}");
276  TemplateURL* template_url = new TemplateURL(data);
277  model_->Add(template_url);
278  ASSERT_TRUE(template_url == model_->GetTemplateURLForKeyword(keyword));
279}
280
281TEST_F(KeywordProviderTest, RemoveKeyword) {
282  base::string16 url(ASCIIToUTF16("http://aaaa/?aaaa=1&b={searchTerms}&c"));
283  model_->Remove(model_->GetTemplateURLForKeyword(ASCIIToUTF16("aaaa")));
284  ASSERT_TRUE(model_->GetTemplateURLForKeyword(ASCIIToUTF16("aaaa")) == NULL);
285}
286
287TEST_F(KeywordProviderTest, GetKeywordForInput) {
288  EXPECT_EQ(ASCIIToUTF16("aa"),
289      kw_provider_->GetKeywordForText(ASCIIToUTF16("aa")));
290  EXPECT_EQ(base::string16(),
291      kw_provider_->GetKeywordForText(ASCIIToUTF16("aafoo")));
292  EXPECT_EQ(base::string16(),
293      kw_provider_->GetKeywordForText(ASCIIToUTF16("aa foo")));
294}
295
296TEST_F(KeywordProviderTest, GetSubstitutingTemplateURLForInput) {
297  struct {
298    const std::string text;
299    const size_t cursor_position;
300    const bool allow_exact_keyword_match;
301    const std::string expected_url;
302    const std::string updated_text;
303    const size_t updated_cursor_position;
304  } cases[] = {
305    { "foo", base::string16::npos, true, "", "foo", base::string16::npos },
306    { "aa foo", base::string16::npos, true, "aa.com?foo={searchTerms}", "foo",
307      base::string16::npos },
308
309    // Cursor adjustment.
310    { "aa foo", base::string16::npos, true, "aa.com?foo={searchTerms}", "foo",
311      base::string16::npos },
312    { "aa foo", 4u, true, "aa.com?foo={searchTerms}", "foo", 1u },
313    // Cursor at the end.
314    { "aa foo", 6u, true, "aa.com?foo={searchTerms}", "foo", 3u },
315    // Cursor before the first character of the remaining text.
316    { "aa foo", 3u, true, "aa.com?foo={searchTerms}", "foo", 0u },
317
318    // Trailing space.
319    { "aa foo ", 7u, true, "aa.com?foo={searchTerms}", "foo ", 4u },
320    // Trailing space without remaining text, cursor in the middle.
321    { "aa  ", 3u, true, "aa.com?foo={searchTerms}", "", base::string16::npos },
322    // Trailing space without remaining text, cursor at the end.
323    { "aa  ", 4u, true, "aa.com?foo={searchTerms}", "", base::string16::npos },
324    // Extra space after keyword, cursor at the end.
325    { "aa  foo ", 8u, true, "aa.com?foo={searchTerms}", "foo ", 4u },
326    // Extra space after keyword, cursor in the middle.
327    { "aa  foo ", 3u, true, "aa.com?foo={searchTerms}", "foo ", 0 },
328    // Extra space after keyword, no trailing space, cursor at the end.
329    { "aa  foo", 7u, true, "aa.com?foo={searchTerms}", "foo", 3u },
330    // Extra space after keyword, no trailing space, cursor in the middle.
331    { "aa  foo", 5u, true, "aa.com?foo={searchTerms}", "foo", 1u },
332
333    // Disallow exact keyword match.
334    { "aa foo", base::string16::npos, false, "", "aa foo",
335      base::string16::npos },
336  };
337  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
338    AutocompleteInput input(
339        ASCIIToUTF16(cases[i].text), cases[i].cursor_position, base::string16(),
340        GURL(), metrics::OmniboxEventProto::INVALID_SPEC, false, false,
341        cases[i].allow_exact_keyword_match, true, TestingSchemeClassifier());
342    const TemplateURL* url =
343        KeywordProvider::GetSubstitutingTemplateURLForInput(model_.get(),
344                                                            &input);
345    if (cases[i].expected_url.empty())
346      EXPECT_FALSE(url);
347    else
348      EXPECT_EQ(cases[i].expected_url, url->url());
349    EXPECT_EQ(ASCIIToUTF16(cases[i].updated_text), input.text());
350    EXPECT_EQ(cases[i].updated_cursor_position, input.cursor_position());
351  }
352}
353
354// If extra query params are specified on the command line, they should be
355// reflected (only) in the default search provider's destination URL.
356TEST_F(KeywordProviderTest, ExtraQueryParams) {
357  CommandLine::ForCurrentProcess()->AppendSwitchASCII(
358      switches::kExtraSearchQueryParams, "a=b");
359
360  TestData<GURL> url_cases[] = {
361    { ASCIIToUTF16("a 1 2 3"), 3,
362      { { GURL("aa.com?a=b&foo=1+2+3"), false },
363        { GURL("bogus URL 1+2+3"), false },
364        { GURL("http://aaaa/?aaaa=1&b=1+2+3&c"), false } } },
365  };
366
367  RunTest<GURL>(url_cases, arraysize(url_cases),
368                &AutocompleteMatch::destination_url);
369}
370