history_url_provider_unittest.cc revision 5f1c94371a64b3196d4be9466099bb892df9b88e
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/history_url_provider.h"
6
7#include <algorithm>
8
9#include "base/message_loop/message_loop.h"
10#include "base/path_service.h"
11#include "base/prefs/pref_service.h"
12#include "base/strings/string_util.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "chrome/browser/autocomplete/autocomplete_result.h"
16#include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
17#include "chrome/browser/autocomplete/history_quick_provider.h"
18#include "chrome/browser/history/history_service.h"
19#include "chrome/browser/history/history_service_factory.h"
20#include "chrome/browser/search_engines/chrome_template_url_service_client.h"
21#include "chrome/browser/search_engines/template_url_service_factory.h"
22#include "chrome/common/pref_names.h"
23#include "chrome/test/base/testing_browser_process.h"
24#include "chrome/test/base/testing_profile.h"
25#include "components/history/core/browser/url_database.h"
26#include "components/metrics/proto/omnibox_event.pb.h"
27#include "components/metrics/proto/omnibox_input_type.pb.h"
28#include "components/omnibox/autocomplete_match.h"
29#include "components/omnibox/autocomplete_provider.h"
30#include "components/omnibox/autocomplete_provider_listener.h"
31#include "components/search_engines/search_terms_data.h"
32#include "components/search_engines/template_url.h"
33#include "components/search_engines/template_url_service.h"
34#include "components/url_fixer/url_fixer.h"
35#include "content/public/test/test_browser_thread_bundle.h"
36#include "testing/gtest/include/gtest/gtest.h"
37
38using base::ASCIIToUTF16;
39using base::Time;
40using base::TimeDelta;
41
42using content::TestBrowserThreadBundle;
43
44struct TestURLInfo {
45  const char* url;
46  const char* title;
47  int visit_count;
48  int typed_count;
49  int age_in_days;
50} test_db[] = {
51  {"http://www.google.com/", "Google", 3, 3, 80},
52
53  // High-quality pages should get a host synthesized as a lower-quality match.
54  {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100, 80},
55
56  // Less popular pages should have hosts synthesized as higher-quality
57  // matches.
58  {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0, 80},
59
60  // Unpopular pages should not appear in the results at all.
61  {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 0, 80},
62
63  // If a host has a match, we should pick it up during host synthesis.
64  {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 80},
65  {"http://news.google.com/", "Google News", 1, 1, 80},
66
67  // Matches that are normally not inline-autocompletable should be
68  // autocompleted if they are shorter substitutes for longer matches that would
69  // have been inline autocompleted.
70  {"http://synthesisatest.com/foo/", "Test A", 1, 1, 80},
71  {"http://synthesisbtest.com/foo/", "Test B", 1, 1, 80},
72  {"http://synthesisbtest.com/foo/bar.html", "Test B Bar", 2, 2, 80},
73
74  // Suggested short URLs must be "good enough" and must match user input.
75  {"http://foo.com/", "Dir", 5, 5, 80},
76  {"http://foo.com/dir/", "Dir", 2, 2, 80},
77  {"http://foo.com/dir/another/", "Dir", 5, 1, 80},
78  {"http://foo.com/dir/another/again/", "Dir", 10, 0, 80},
79  {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 80},
80
81  // We throw in a lot of extra URLs here to make sure we're testing the
82  // history database's query, not just the autocomplete provider.
83  {"http://startest.com/y/a", "A", 2, 2, 80},
84  {"http://startest.com/y/b", "B", 5, 2, 80},
85  {"http://startest.com/x/c", "C", 5, 2, 80},
86  {"http://startest.com/x/d", "D", 5, 5, 80},
87  {"http://startest.com/y/e", "E", 4, 2, 80},
88  {"http://startest.com/y/f", "F", 3, 2, 80},
89  {"http://startest.com/y/g", "G", 3, 2, 80},
90  {"http://startest.com/y/h", "H", 3, 2, 80},
91  {"http://startest.com/y/i", "I", 3, 2, 80},
92  {"http://startest.com/y/j", "J", 3, 2, 80},
93  {"http://startest.com/y/k", "K", 3, 2, 80},
94  {"http://startest.com/y/l", "L", 3, 2, 80},
95  {"http://startest.com/y/m", "M", 3, 2, 80},
96
97  // A file: URL is useful for testing that fixup does the right thing w.r.t.
98  // the number of trailing slashes on the user's input.
99  {"file:///C:/foo.txt", "", 2, 2, 80},
100
101  // Results with absurdly high typed_counts so that very generic queries like
102  // "http" will give consistent results even if more data is added above.
103  {"http://bogussite.com/a", "Bogus A", 10002, 10000, 80},
104  {"http://bogussite.com/b", "Bogus B", 10001, 10000, 80},
105  {"http://bogussite.com/c", "Bogus C", 10000, 10000, 80},
106
107  // Domain name with number.
108  {"http://www.17173.com/", "Domain with number", 3, 3, 80},
109
110  // URLs to test exact-matching behavior.
111  {"http://go/", "Intranet URL", 1, 1, 80},
112  {"http://gooey/", "Intranet URL 2", 5, 5, 80},
113
114  // URLs for testing offset adjustment.
115  {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2, 80},
116  {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 80},
117  {"http://ms/c++%20style%20guide", "Style guide", 2, 2, 80},
118
119  // URLs for testing ctrl-enter behavior.
120  {"http://binky/", "Intranet binky", 2, 2, 80},
121  {"http://winky/", "Intranet winky", 2, 2, 80},
122  {"http://www.winky.com/", "Internet winky", 5, 0, 80},
123
124  // URLs used by EmptyVisits.
125  {"http://pandora.com/", "Pandora", 2, 2, 80},
126  // This entry is explicitly added more recently than
127  // history::kLowQualityMatchAgeLimitInDays.
128  // {"http://p/", "p", 0, 0, 80},
129
130  // For intranet based tests.
131  {"http://intra/one", "Intranet", 2, 2, 80},
132  {"http://intra/two", "Intranet two", 1, 1, 80},
133  {"http://intra/three", "Intranet three", 2, 2, 80},
134  {"http://moo/bar", "Intranet moo", 1, 1, 80},
135  {"http://typedhost/typedpath", "Intranet typed", 1, 1, 80},
136  {"http://typedhost/untypedpath", "Intranet untyped", 1, 0, 80},
137
138  {"http://x.com/one", "Internet", 2, 2, 80},
139  {"http://x.com/two", "Internet two", 1, 1, 80},
140  {"http://x.com/three", "Internet three", 2, 2, 80},
141
142  // For experimental HUP scoring test.
143  {"http://7.com/1a", "One", 8, 4, 4},
144  {"http://7.com/2a", "Two A", 4, 2, 8},
145  {"http://7.com/2b", "Two B", 4, 1, 8},
146  {"http://7.com/3a", "Three", 2, 1, 16},
147  {"http://7.com/4a", "Four A", 1, 1, 32},
148  {"http://7.com/4b", "Four B", 1, 1, 64},
149  {"http://7.com/5a", "Five A", 8, 0, 64},  // never typed.
150};
151
152class HistoryURLProviderTest : public testing::Test,
153                               public AutocompleteProviderListener {
154 public:
155  struct UrlAndLegalDefault {
156    std::string url;
157    bool allowed_to_be_default_match;
158  };
159
160  HistoryURLProviderTest()
161      : sort_matches_(false) {
162    HistoryQuickProvider::set_disabled(true);
163  }
164
165  virtual ~HistoryURLProviderTest() {
166    HistoryQuickProvider::set_disabled(false);
167  }
168
169  // AutocompleteProviderListener:
170  virtual void OnProviderUpdate(bool updated_matches) OVERRIDE;
171
172 protected:
173  static KeyedService* CreateTemplateURLService(
174      content::BrowserContext* context) {
175    Profile* profile = static_cast<Profile*>(context);
176    return new TemplateURLService(
177        profile->GetPrefs(), make_scoped_ptr(new SearchTermsData), NULL,
178        scoped_ptr<TemplateURLServiceClient>(
179            new ChromeTemplateURLServiceClient(profile)),
180        NULL, NULL, base::Closure());
181  }
182
183  // testing::Test
184  virtual void SetUp() {
185    ASSERT_TRUE(SetUpImpl(false));
186  }
187  virtual void TearDown();
188
189  // Does the real setup.
190  bool SetUpImpl(bool no_db) WARN_UNUSED_RESULT;
191
192  // Fills test data into the history system.
193  void FillData();
194
195  // Runs an autocomplete query on |text| and checks to see that the returned
196  // results' destination URLs match those provided.  Also allows checking
197  // that the input type was identified correctly.
198  void RunTest(const base::string16 text,
199               const base::string16& desired_tld,
200               bool prevent_inline_autocomplete,
201               const UrlAndLegalDefault* expected_urls,
202               size_t num_results,
203               metrics::OmniboxInputType::Type* identified_input_type);
204
205  // A version of the above without the final |type| output parameter.
206  void RunTest(const base::string16 text,
207               const base::string16& desired_tld,
208               bool prevent_inline_autocomplete,
209               const UrlAndLegalDefault* expected_urls,
210               size_t num_results) {
211    metrics::OmniboxInputType::Type type;
212    return RunTest(text, desired_tld, prevent_inline_autocomplete,
213                   expected_urls, num_results, &type);
214  }
215
216  content::TestBrowserThreadBundle thread_bundle_;
217  ACMatches matches_;
218  scoped_ptr<TestingProfile> profile_;
219  HistoryService* history_service_;
220  scoped_refptr<HistoryURLProvider> autocomplete_;
221  // Should the matches be sorted and duplicates removed?
222  bool sort_matches_;
223};
224
225class HistoryURLProviderTestNoDB : public HistoryURLProviderTest {
226 protected:
227  virtual void SetUp() {
228    ASSERT_TRUE(SetUpImpl(true));
229  }
230};
231
232void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) {
233  if (autocomplete_->done())
234    base::MessageLoop::current()->Quit();
235}
236
237bool HistoryURLProviderTest::SetUpImpl(bool no_db) {
238  profile_.reset(new TestingProfile());
239  if (!(profile_->CreateHistoryService(true, no_db)))
240    return false;
241  if (!no_db) {
242    profile_->BlockUntilHistoryProcessesPendingRequests();
243    profile_->BlockUntilHistoryIndexIsRefreshed();
244  }
245  profile_->GetPrefs()->SetString(prefs::kAcceptLanguages, "en-US,en,ko");
246  history_service_ = HistoryServiceFactory::GetForProfile(
247      profile_.get(), Profile::EXPLICIT_ACCESS);
248
249  autocomplete_ = new HistoryURLProvider(this, profile_.get());
250  TemplateURLServiceFactory::GetInstance()->SetTestingFactoryAndUse(
251      profile_.get(), &HistoryURLProviderTest::CreateTemplateURLService);
252  FillData();
253  return true;
254}
255
256void HistoryURLProviderTest::TearDown() {
257  autocomplete_ = NULL;
258}
259
260void HistoryURLProviderTest::FillData() {
261  // Most visits are a long time ago (some tests require this since we do some
262  // special logic for things visited very recently). Note that this time must
263  // be more recent than the "expire history" threshold for the data to be kept
264  // in the main database.
265  //
266  // TODO(brettw) It would be nice if we could test this behavior, in which
267  // case the time would be specifed in the test_db structure.
268  const Time now = Time::Now();
269
270  for (size_t i = 0; i < arraysize(test_db); ++i) {
271    const TestURLInfo& cur = test_db[i];
272    const GURL current_url(cur.url);
273    history_service_->AddPageWithDetails(
274        current_url, base::UTF8ToUTF16(cur.title), cur.visit_count,
275        cur.typed_count, now - TimeDelta::FromDays(cur.age_in_days), false,
276        history::SOURCE_BROWSED);
277  }
278
279  history_service_->AddPageWithDetails(
280      GURL("http://p/"), base::UTF8ToUTF16("p"), 0, 0,
281      Time::Now() -
282      TimeDelta::FromDays(history::kLowQualityMatchAgeLimitInDays - 1),
283      false, history::SOURCE_BROWSED);
284}
285
286void HistoryURLProviderTest::RunTest(
287    const base::string16 text,
288    const base::string16& desired_tld,
289    bool prevent_inline_autocomplete,
290    const UrlAndLegalDefault* expected_urls,
291    size_t num_results,
292    metrics::OmniboxInputType::Type* identified_input_type) {
293  AutocompleteInput input(text, base::string16::npos, desired_tld, GURL(),
294                          metrics::OmniboxEventProto::INVALID_SPEC,
295                          prevent_inline_autocomplete, false, true, true,
296                          ChromeAutocompleteSchemeClassifier(profile_.get()));
297  *identified_input_type = input.type();
298  autocomplete_->Start(input, false);
299  if (!autocomplete_->done())
300    base::MessageLoop::current()->Run();
301
302  matches_ = autocomplete_->matches();
303  if (sort_matches_) {
304    TemplateURLService* service =
305        TemplateURLServiceFactory::GetForProfile(profile_.get());
306    for (ACMatches::iterator i = matches_.begin(); i != matches_.end(); ++i)
307      i->ComputeStrippedDestinationURL(service);
308    AutocompleteResult::DedupMatchesByDestination(
309        input.current_page_classification(), false, &matches_);
310    std::sort(matches_.begin(), matches_.end(),
311              &AutocompleteMatch::MoreRelevant);
312  }
313  ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text
314                                          << "\nTLD: \"" << desired_tld << "\"";
315  for (size_t i = 0; i < num_results; ++i) {
316    EXPECT_EQ(expected_urls[i].url, matches_[i].destination_url.spec());
317    EXPECT_EQ(expected_urls[i].allowed_to_be_default_match,
318              matches_[i].allowed_to_be_default_match);
319  }
320}
321
322TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
323  // Test that hosts get synthesized below popular pages.
324  const UrlAndLegalDefault expected_nonsynth[] = {
325    { "http://slashdot.org/favorite_page.html", false },
326    { "http://slashdot.org/", false }
327  };
328  RunTest(ASCIIToUTF16("slash"), base::string16(), true, expected_nonsynth,
329          arraysize(expected_nonsynth));
330
331  // Test that hosts get synthesized above less popular pages.
332  const UrlAndLegalDefault expected_synth[] = {
333    { "http://kerneltrap.org/", false },
334    { "http://kerneltrap.org/not_very_popular.html", false }
335  };
336  RunTest(ASCIIToUTF16("kernel"), base::string16(), true, expected_synth,
337          arraysize(expected_synth));
338
339  // Test that unpopular pages are ignored completely.
340  RunTest(ASCIIToUTF16("fresh"), base::string16(), true, NULL, 0);
341
342  // Test that if we create or promote shorter suggestions that would not
343  // normally be inline autocompletable, we make them inline autocompletable if
344  // the original suggestion (that we replaced as "top") was inline
345  // autocompletable.
346  const UrlAndLegalDefault expected_synthesisa[] = {
347    { "http://synthesisatest.com/", true },
348    { "http://synthesisatest.com/foo/", true }
349  };
350  RunTest(ASCIIToUTF16("synthesisa"), base::string16(), false,
351          expected_synthesisa, arraysize(expected_synthesisa));
352  EXPECT_LT(matches_.front().relevance, 1200);
353  const UrlAndLegalDefault expected_synthesisb[] = {
354    { "http://synthesisbtest.com/foo/", true },
355    { "http://synthesisbtest.com/foo/bar.html", true }
356  };
357  RunTest(ASCIIToUTF16("synthesisb"), base::string16(), false,
358          expected_synthesisb, arraysize(expected_synthesisb));
359  EXPECT_GE(matches_.front().relevance, 1410);
360
361  // Test that if we have a synthesized host that matches a suggestion, they
362  // get combined into one.
363  const UrlAndLegalDefault expected_combine[] = {
364    { "http://news.google.com/", false },
365    { "http://news.google.com/?ned=us&topic=n", false },
366  };
367  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), base::string16(), true,
368      expected_combine, arraysize(expected_combine)));
369  // The title should also have gotten set properly on the host for the
370  // synthesized one, since it was also in the results.
371  EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description);
372
373  // Test that short URL matching works correctly as the user types more
374  // (several tests):
375  // The entry for foo.com is the best of all five foo.com* entries.
376  const UrlAndLegalDefault short_1[] = {
377    { "http://foo.com/", false },
378    { "http://foo.com/dir/another/again/myfile.html", false },
379    { "http://foo.com/dir/", false }
380  };
381  RunTest(ASCIIToUTF16("foo"), base::string16(), true,
382          short_1, arraysize(short_1));
383
384  // When the user types the whole host, make sure we don't get two results for
385  // it.
386  const UrlAndLegalDefault short_2[] = {
387    { "http://foo.com/", true },
388    { "http://foo.com/dir/another/again/myfile.html", false },
389    { "http://foo.com/dir/", false },
390    { "http://foo.com/dir/another/", false }
391  };
392  RunTest(ASCIIToUTF16("foo.com"), base::string16(), true, short_2,
393          arraysize(short_2));
394  RunTest(ASCIIToUTF16("foo.com/"), base::string16(), true, short_2,
395          arraysize(short_2));
396
397  // The filename is the second best of the foo.com* entries, but there is a
398  // shorter URL that's "good enough".  The host doesn't match the user input
399  // and so should not appear.
400  const UrlAndLegalDefault short_3[] = {
401    { "http://foo.com/d", true },
402    { "http://foo.com/dir/another/", false },
403    { "http://foo.com/dir/another/again/myfile.html", false },
404    { "http://foo.com/dir/", false }
405  };
406  RunTest(ASCIIToUTF16("foo.com/d"), base::string16(), true, short_3,
407          arraysize(short_3));
408
409  // We shouldn't promote shorter URLs than the best if they're not good
410  // enough.
411  const UrlAndLegalDefault short_4[] = {
412    { "http://foo.com/dir/another/a", true },
413    { "http://foo.com/dir/another/again/myfile.html", false },
414    { "http://foo.com/dir/another/again/", false }
415  };
416  RunTest(ASCIIToUTF16("foo.com/dir/another/a"), base::string16(), true,
417          short_4, arraysize(short_4));
418
419  // Exact matches should always be best no matter how much more another match
420  // has been typed.
421  const UrlAndLegalDefault short_5a[] = {
422    { "http://gooey/", true },
423    { "http://www.google.com/", true },
424    { "http://go/", true }
425  };
426  const UrlAndLegalDefault short_5b[] = {
427    { "http://go/", true },
428    { "http://gooey/", true },
429    { "http://www.google.com/", true }
430  };
431  RunTest(ASCIIToUTF16("g"), base::string16(), false,
432          short_5a, arraysize(short_5a));
433  RunTest(ASCIIToUTF16("go"), base::string16(), false,
434          short_5b, arraysize(short_5b));
435}
436
437TEST_F(HistoryURLProviderTest, CullRedirects) {
438  // URLs we will be using, plus the visit counts they will initially get
439  // (the redirect set below will also increment the visit counts). We want
440  // the results to be in A,B,C order. Note also that our visit counts are
441  // all high enough so that domain synthesizing won't get triggered.
442  struct TestCase {
443    const char* url;
444    int count;
445  } test_cases[] = {
446    {"http://redirects/A", 30},
447    {"http://redirects/B", 20},
448    {"http://redirects/C", 10}
449  };
450  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
451    history_service_->AddPageWithDetails(GURL(test_cases[i].url),
452        ASCIIToUTF16("Title"), test_cases[i].count, test_cases[i].count,
453        Time::Now(), false, history::SOURCE_BROWSED);
454  }
455
456  // Create a B->C->A redirect chain, but set the visit counts such that they
457  // will appear in A,B,C order in the results. The autocomplete query will
458  // search for the most recent visit when looking for redirects, so this will
459  // be found even though the previous visits had no redirects.
460  history::RedirectList redirects_to_a;
461  redirects_to_a.push_back(GURL(test_cases[1].url));
462  redirects_to_a.push_back(GURL(test_cases[2].url));
463  redirects_to_a.push_back(GURL(test_cases[0].url));
464  history_service_->AddPage(GURL(test_cases[0].url), base::Time::Now(),
465      NULL, 0, GURL(), redirects_to_a, content::PAGE_TRANSITION_TYPED,
466      history::SOURCE_BROWSED, true);
467
468  // Because all the results are part of a redirect chain with other results,
469  // all but the first one (A) should be culled. We should get the default
470  // "what you typed" result, plus this one.
471  const base::string16 typing(ASCIIToUTF16("http://redirects/"));
472  const UrlAndLegalDefault expected_results[] = {
473    { base::UTF16ToUTF8(typing), true },
474    { test_cases[0].url, false }
475  };
476  RunTest(typing, base::string16(), true, expected_results,
477          arraysize(expected_results));
478}
479
480TEST_F(HistoryURLProviderTest, WhatYouTyped) {
481  // Make sure we suggest a What You Typed match at the right times.
482  RunTest(ASCIIToUTF16("wytmatch"), base::string16(), false, NULL, 0);
483  RunTest(ASCIIToUTF16("wytmatch foo bar"), base::string16(), false, NULL, 0);
484  RunTest(ASCIIToUTF16("wytmatch+foo+bar"), base::string16(), false, NULL, 0);
485  RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), base::string16(), false,
486          NULL, 0);
487
488  const UrlAndLegalDefault results_1[] = {
489    { "http://www.wytmatch.com/", true }
490  };
491  RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1,
492          arraysize(results_1));
493
494  const UrlAndLegalDefault results_2[] = {
495    { "http://wytmatch%20foo%20bar/", true }
496  };
497  RunTest(ASCIIToUTF16("http://wytmatch foo bar"), base::string16(), false,
498          results_2, arraysize(results_2));
499
500  const UrlAndLegalDefault results_3[] = {
501    { "https://wytmatch%20foo%20bar/", true }
502  };
503  RunTest(ASCIIToUTF16("https://wytmatch foo bar"), base::string16(), false,
504          results_3, arraysize(results_3));
505}
506
507TEST_F(HistoryURLProviderTest, Fixup) {
508  // Test for various past crashes we've had.
509  RunTest(ASCIIToUTF16("\\"), base::string16(), false, NULL, 0);
510  RunTest(ASCIIToUTF16("#"), base::string16(), false, NULL, 0);
511  RunTest(ASCIIToUTF16("%20"), base::string16(), false, NULL, 0);
512  const UrlAndLegalDefault fixup_crash[] = {
513    { "http://%EF%BD%A5@s/", true }
514  };
515  RunTest(base::WideToUTF16(L"\uff65@s"), base::string16(), false, fixup_crash,
516          arraysize(fixup_crash));
517  RunTest(base::WideToUTF16(L"\u2015\u2015@ \uff7c"), base::string16(), false,
518          NULL, 0);
519
520  // Fixing up "file:" should result in an inline autocomplete offset of just
521  // after "file:", not just after "file://".
522  const base::string16 input_1(ASCIIToUTF16("file:"));
523  const UrlAndLegalDefault fixup_1[] = {
524    { "file:///C:/foo.txt", true }
525  };
526  ASSERT_NO_FATAL_FAILURE(RunTest(input_1, base::string16(), false, fixup_1,
527                                  arraysize(fixup_1)));
528  EXPECT_EQ(ASCIIToUTF16("///C:/foo.txt"),
529            matches_.front().inline_autocompletion);
530
531  // Fixing up "http:/" should result in an inline autocomplete offset of just
532  // after "http:/", not just after "http:".
533  const base::string16 input_2(ASCIIToUTF16("http:/"));
534  const UrlAndLegalDefault fixup_2[] = {
535    { "http://bogussite.com/a", true },
536    { "http://bogussite.com/b", true },
537    { "http://bogussite.com/c", true }
538  };
539  ASSERT_NO_FATAL_FAILURE(RunTest(input_2, base::string16(), false, fixup_2,
540                                  arraysize(fixup_2)));
541  EXPECT_EQ(ASCIIToUTF16("/bogussite.com/a"),
542            matches_.front().inline_autocompletion);
543
544  // Adding a TLD to a small number like "56" should result in "www.56.com"
545  // rather than "0.0.0.56.com".
546  const UrlAndLegalDefault fixup_3[] = {
547    { "http://www.56.com/", true }
548  };
549  RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3,
550          arraysize(fixup_3));
551
552  // An input looks like a IP address like "127.0.0.1" should result in
553  // "http://127.0.0.1/".
554  const UrlAndLegalDefault fixup_4[] = {
555    { "http://127.0.0.1/", true }
556  };
557  RunTest(ASCIIToUTF16("127.0.0.1"), base::string16(), false, fixup_4,
558          arraysize(fixup_4));
559
560  // An number "17173" should result in "http://www.17173.com/" in db.
561  const UrlAndLegalDefault fixup_5[] = {
562    { "http://www.17173.com/", true }
563  };
564  RunTest(ASCIIToUTF16("17173"), base::string16(), false, fixup_5,
565          arraysize(fixup_5));
566}
567
568// Make sure the results for the input 'p' don't change between the first and
569// second passes.
570TEST_F(HistoryURLProviderTest, EmptyVisits) {
571  // Wait for history to create the in memory DB.
572  profile_->BlockUntilHistoryProcessesPendingRequests();
573
574  AutocompleteInput input(ASCIIToUTF16("p"), base::string16::npos,
575                          base::string16(), GURL(),
576                          metrics::OmniboxEventProto::INVALID_SPEC, false,
577                          false, true, true,
578                          ChromeAutocompleteSchemeClassifier(profile_.get()));
579  autocomplete_->Start(input, false);
580  // HistoryURLProvider shouldn't be done (waiting on async results).
581  EXPECT_FALSE(autocomplete_->done());
582
583  // We should get back an entry for pandora.
584  matches_ = autocomplete_->matches();
585  ASSERT_GT(matches_.size(), 0u);
586  EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
587  int pandora_relevance = matches_[0].relevance;
588
589  // Run the message loop. When |autocomplete_| finishes the loop is quit.
590  base::MessageLoop::current()->Run();
591  EXPECT_TRUE(autocomplete_->done());
592  matches_ = autocomplete_->matches();
593  ASSERT_GT(matches_.size(), 0u);
594  EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
595  EXPECT_EQ(pandora_relevance, matches_[0].relevance);
596}
597
598TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
599  // Ensure that we will still produce matches for navigation when there is no
600  // database.
601  UrlAndLegalDefault navigation_1[] = {
602    { "http://test.com/", true }
603  };
604  RunTest(ASCIIToUTF16("test.com"), base::string16(), false, navigation_1,
605          arraysize(navigation_1));
606
607  UrlAndLegalDefault navigation_2[] = {
608    { "http://slash/", true }
609  };
610  RunTest(ASCIIToUTF16("slash"), base::string16(), false, navigation_2,
611          arraysize(navigation_2));
612
613  RunTest(ASCIIToUTF16("this is a query"), base::string16(), false, NULL, 0);
614}
615
616TEST_F(HistoryURLProviderTest, DontAutocompleteOnTrailingWhitespace) {
617  AutocompleteInput input(ASCIIToUTF16("slash "), base::string16::npos,
618                          base::string16(), GURL(),
619                          metrics::OmniboxEventProto::INVALID_SPEC, false,
620                          false, true, true,
621                          ChromeAutocompleteSchemeClassifier(profile_.get()));
622  autocomplete_->Start(input, false);
623  if (!autocomplete_->done())
624    base::MessageLoop::current()->Run();
625
626  // None of the matches should attempt to autocomplete.
627  matches_ = autocomplete_->matches();
628  for (size_t i = 0; i < matches_.size(); ++i) {
629    EXPECT_TRUE(matches_[i].inline_autocompletion.empty());
630    EXPECT_FALSE(matches_[i].allowed_to_be_default_match);
631  }
632}
633
634TEST_F(HistoryURLProviderTest, TreatEmailsAsSearches) {
635  // Visiting foo.com should not make this string be treated as a navigation.
636  // That means the result should be scored around 1200 ("what you typed")
637  // and not 1400+.
638  const UrlAndLegalDefault expected[] = {
639    { "http://user@foo.com/", true }
640  };
641  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("user@foo.com"),
642                                  base::string16(), false, expected,
643                                  arraysize(expected)));
644  EXPECT_LE(1200, matches_[0].relevance);
645  EXPECT_LT(matches_[0].relevance, 1210);
646}
647
648TEST_F(HistoryURLProviderTest, IntranetURLsWithPaths) {
649  struct TestCase {
650    const char* input;
651    int relevance;
652  } test_cases[] = {
653    { "fooey", 0 },
654    { "fooey/", 1200 },     // 1200 for URL would still navigate by default.
655    { "fooey/a", 1200 },    // 1200 for UNKNOWN would not.
656    { "fooey/a b", 1200 },  // Also UNKNOWN.
657    { "gooey", 1410 },
658    { "gooey/", 1410 },
659    { "gooey/a", 1400 },
660    { "gooey/a b", 1400 },
661  };
662  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
663    SCOPED_TRACE(test_cases[i].input);
664    if (test_cases[i].relevance == 0) {
665      RunTest(ASCIIToUTF16(test_cases[i].input), base::string16(), false,
666              NULL, 0);
667    } else {
668      const UrlAndLegalDefault output[] = {
669          {url_fixer::FixupURL(test_cases[i].input, std::string()).spec(),
670           true}};
671      ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16(test_cases[i].input),
672                              base::string16(), false,
673                              output, arraysize(output)));
674      // Actual relevance should be at least what test_cases expects and
675      // and no more than 10 more.
676      EXPECT_LE(test_cases[i].relevance, matches_[0].relevance);
677      EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10);
678    }
679  }
680}
681
682TEST_F(HistoryURLProviderTest, IntranetURLsWithRefs) {
683  struct TestCase {
684    const char* input;
685    int relevance;
686    metrics::OmniboxInputType::Type type;
687  } test_cases[] = {
688    { "gooey", 1410, metrics::OmniboxInputType::UNKNOWN },
689    { "gooey/", 1410, metrics::OmniboxInputType::URL },
690    { "gooey#", 1200, metrics::OmniboxInputType::UNKNOWN },
691    { "gooey/#", 1200, metrics::OmniboxInputType::URL },
692    { "gooey#foo", 1200, metrics::OmniboxInputType::UNKNOWN },
693    { "gooey/#foo", 1200, metrics::OmniboxInputType::URL },
694    { "gooey# foo", 1200, metrics::OmniboxInputType::UNKNOWN },
695    { "gooey/# foo", 1200, metrics::OmniboxInputType::URL },
696  };
697  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
698    SCOPED_TRACE(test_cases[i].input);
699    const UrlAndLegalDefault output[] = {
700        {url_fixer::FixupURL(test_cases[i].input, std::string()).spec(), true}};
701    metrics::OmniboxInputType::Type type;
702    ASSERT_NO_FATAL_FAILURE(
703        RunTest(ASCIIToUTF16(test_cases[i].input),
704                base::string16(), false, output, arraysize(output), &type));
705    // Actual relevance should be at least what test_cases expects and
706    // and no more than 10 more.
707    EXPECT_LE(test_cases[i].relevance, matches_[0].relevance);
708    EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10);
709    // Input type should be what we expect.  This is important because
710    // this provider counts on SearchProvider to give queries a relevance
711    // score >1200 for UNKNOWN inputs and <1200 for URL inputs.  (That's
712    // already tested in search_provider_unittest.cc.)  For this test
713    // here to test that the user sees the correct behavior, it needs
714    // to check that the input type was identified correctly.
715    EXPECT_EQ(test_cases[i].type, type);
716  }
717}
718
719// Makes sure autocompletion happens for intranet sites that have been
720// previoulsy visited.
721TEST_F(HistoryURLProviderTest, IntranetURLCompletion) {
722  sort_matches_ = true;
723
724  const UrlAndLegalDefault expected1[] = {
725    { "http://intra/three", true },
726    { "http://intra/two", true }
727  };
728  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/t"), base::string16(),
729                                  false, expected1, arraysize(expected1)));
730  EXPECT_LE(1410, matches_[0].relevance);
731  EXPECT_LT(matches_[0].relevance, 1420);
732  EXPECT_EQ(matches_[0].relevance - 1, matches_[1].relevance);
733
734  const UrlAndLegalDefault expected2[] = {
735    { "http://moo/b", true },
736    { "http://moo/bar", true }
737  };
738  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("moo/b"), base::string16(),
739                                  false, expected2, arraysize(expected2)));
740  // The url what you typed match should be around 1400, otherwise the
741  // search what you typed match is going to be first.
742  EXPECT_LE(1400, matches_[0].relevance);
743  EXPECT_LT(matches_[0].relevance, 1410);
744
745  const UrlAndLegalDefault expected3[] = {
746    { "http://intra/one", true },
747    { "http://intra/three", true },
748    { "http://intra/two", true }
749  };
750  RunTest(ASCIIToUTF16("intra"), base::string16(), false, expected3,
751          arraysize(expected3));
752
753  const UrlAndLegalDefault expected4[] = {
754    { "http://intra/one", true },
755    { "http://intra/three", true },
756    { "http://intra/two", true }
757  };
758  RunTest(ASCIIToUTF16("intra/"), base::string16(), false, expected4,
759          arraysize(expected4));
760
761  const UrlAndLegalDefault expected5[] = {
762    { "http://intra/one", true }
763  };
764  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/o"), base::string16(),
765                                  false, expected5, arraysize(expected5)));
766  EXPECT_LE(1410, matches_[0].relevance);
767  EXPECT_LT(matches_[0].relevance, 1420);
768
769  const UrlAndLegalDefault expected6[] = {
770    { "http://intra/x", true }
771  };
772  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/x"), base::string16(),
773                                  false, expected6, arraysize(expected6)));
774  EXPECT_LE(1400, matches_[0].relevance);
775  EXPECT_LT(matches_[0].relevance, 1410);
776
777  const UrlAndLegalDefault expected7[] = {
778    { "http://typedhost/untypedpath", true }
779  };
780  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("typedhost/untypedpath"),
781      base::string16(), false, expected7, arraysize(expected7)));
782  EXPECT_LE(1400, matches_[0].relevance);
783  EXPECT_LT(matches_[0].relevance, 1410);
784}
785
786TEST_F(HistoryURLProviderTest, CrashDueToFixup) {
787  // This test passes if we don't crash.  The results don't matter.
788  const char* const test_cases[] = {
789    "//c",
790    "\\@st",
791    "view-source:x",
792  };
793  for (size_t i = 0; i < arraysize(test_cases); ++i) {
794    AutocompleteInput input(ASCIIToUTF16(test_cases[i]), base::string16::npos,
795                            base::string16(), GURL(),
796                            metrics::OmniboxEventProto::INVALID_SPEC,
797                            false, false, true, true,
798                            ChromeAutocompleteSchemeClassifier(profile_.get()));
799    autocomplete_->Start(input, false);
800    if (!autocomplete_->done())
801      base::MessageLoop::current()->Run();
802  }
803}
804
805TEST_F(HistoryURLProviderTest, CullSearchResults) {
806  // Set up a default search engine.
807  TemplateURLData data;
808  data.SetKeyword(ASCIIToUTF16("TestEngine"));
809  data.SetURL("http://testsearch.com/?q={searchTerms}");
810  TemplateURLService* template_url_service =
811      TemplateURLServiceFactory::GetForProfile(profile_.get());
812  TemplateURL* template_url = new TemplateURL(data);
813  template_url_service->Add(template_url);
814  template_url_service->SetUserSelectedDefaultSearchProvider(template_url);
815  template_url_service->Load();
816
817  // URLs we will be using, plus the visit counts they will initially get
818  // (the redirect set below will also increment the visit counts). We want
819  // the results to be in A,B,C order. Note also that our visit counts are
820  // all high enough so that domain synthesizing won't get triggered.
821  struct TestCase {
822    const char* url;
823    int count;
824  } test_cases[] = {
825    {"https://testsearch.com/", 30},
826    {"https://testsearch.com/?q=foobar", 20},
827    {"http://foobar.com/", 10}
828  };
829  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
830    history_service_->AddPageWithDetails(GURL(test_cases[i].url),
831        base::UTF8ToUTF16("Title"), test_cases[i].count, test_cases[i].count,
832        Time::Now(), false, history::SOURCE_BROWSED);
833  }
834
835  // We should not see search URLs when typing a previously used query.
836  const UrlAndLegalDefault expected_when_searching_query[] = {
837    { test_cases[2].url, false }
838  };
839  RunTest(ASCIIToUTF16("foobar"), base::string16(), true,
840      expected_when_searching_query, arraysize(expected_when_searching_query));
841
842  // We should not see search URLs when typing the search engine name.
843  const UrlAndLegalDefault expected_when_searching_site[] = {
844    { test_cases[0].url, false }
845  };
846  RunTest(ASCIIToUTF16("testsearch"), base::string16(), true,
847      expected_when_searching_site, arraysize(expected_when_searching_site));
848}
849
850TEST_F(HistoryURLProviderTest, SuggestExactInput) {
851  const size_t npos = std::string::npos;
852  struct TestCase {
853    // Inputs:
854    const char* input;
855    bool trim_http;
856    // Expected Outputs:
857    const char* contents;
858    // Offsets of the ACMatchClassifications, terminated by npos.
859    size_t offsets[3];
860    // The index of the ACMatchClassification that should have the MATCH bit
861    // set, npos if no ACMatchClassification should have the MATCH bit set.
862    size_t match_classification_index;
863  } test_cases[] = {
864    { "http://www.somesite.com", false,
865      "http://www.somesite.com", {0, npos, npos}, 0 },
866    { "www.somesite.com", true,
867      "www.somesite.com", {0, npos, npos}, 0 },
868    { "www.somesite.com", false,
869      "http://www.somesite.com", {0, 7, npos}, 1 },
870    { "somesite.com", true,
871      "somesite.com", {0, npos, npos}, 0 },
872    { "somesite.com", false,
873      "http://somesite.com", {0, 7, npos}, 1 },
874    { "w", true,
875      "w", {0, npos, npos}, 0 },
876    { "w", false,
877      "http://w", {0, 7, npos}, 1 },
878    { "w.com", true,
879      "w.com", {0, npos, npos}, 0 },
880    { "w.com", false,
881      "http://w.com", {0, 7, npos}, 1 },
882    { "www.w.com", true,
883      "www.w.com", {0, npos, npos}, 0 },
884    { "www.w.com", false,
885      "http://www.w.com", {0, 7, npos}, 1 },
886    { "view-source:w", true,
887      "view-source:w", {0, npos, npos}, 0 },
888    { "view-source:www.w.com/", true,
889      "view-source:www.w.com", {0, npos, npos}, npos },
890    { "view-source:www.w.com/", false,
891      "view-source:http://www.w.com", {0, npos, npos}, npos },
892    { "view-source:http://www.w.com/", false,
893      "view-source:http://www.w.com", {0, npos, npos}, 0 },
894    { "   view-source:", true,
895      "view-source:", {0, npos, npos}, 0 },
896    { "http:////////w.com", false,
897      "http://w.com", {0, npos, npos}, npos },
898    { "    http:////////www.w.com", false,
899      "http://www.w.com", {0, npos, npos}, npos },
900    { "http:a///www.w.com", false,
901      "http://a///www.w.com", {0, npos, npos}, npos },
902    { "mailto://a@b.com", true,
903      "mailto://a@b.com", {0, npos, npos}, 0 },
904    { "mailto://a@b.com", false,
905      "mailto://a@b.com", {0, npos, npos}, 0 },
906  };
907  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
908    SCOPED_TRACE(testing::Message() << "Index " << i << " input: "
909                                    << test_cases[i].input << ", trim_http: "
910                                    << test_cases[i].trim_http);
911
912    AutocompleteInput input(ASCIIToUTF16(test_cases[i].input),
913                            base::string16::npos, base::string16(),
914                            GURL("about:blank"),
915                            metrics::OmniboxEventProto::INVALID_SPEC, false,
916                            false, true, true,
917                            ChromeAutocompleteSchemeClassifier(profile_.get()));
918    AutocompleteMatch match(autocomplete_->SuggestExactInput(
919        input.text(), input.canonicalized_url(), test_cases[i].trim_http));
920    EXPECT_EQ(ASCIIToUTF16(test_cases[i].contents), match.contents);
921    for (size_t match_index = 0; match_index < match.contents_class.size();
922         ++match_index) {
923      EXPECT_EQ(test_cases[i].offsets[match_index],
924                match.contents_class[match_index].offset);
925      EXPECT_EQ(ACMatchClassification::URL |
926                (match_index == test_cases[i].match_classification_index ?
927                 ACMatchClassification::MATCH : 0),
928                match.contents_class[match_index].style);
929    }
930    EXPECT_EQ(npos, test_cases[i].offsets[match.contents_class.size()]);
931  }
932}
933
934TEST_F(HistoryURLProviderTest, HUPScoringExperiment) {
935  HUPScoringParams max_2000_no_time_decay;
936  max_2000_no_time_decay.typed_count_buckets.buckets().push_back(
937      std::make_pair(0.0, 2000));
938  HUPScoringParams max_1250_no_time_decay;
939  max_1250_no_time_decay.typed_count_buckets.buckets().push_back(
940      std::make_pair(0.0, 1250));
941  HUPScoringParams max_1000_no_time_decay;
942  max_1000_no_time_decay.typed_count_buckets.buckets().push_back(
943      std::make_pair(0.0, 1000));
944
945  HUPScoringParams max_1100_with_time_decay_and_max_cap;
946  max_1100_with_time_decay_and_max_cap.typed_count_buckets.
947      set_relevance_cap(1400);
948  max_1100_with_time_decay_and_max_cap.typed_count_buckets.
949      set_half_life_days(16);
950  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
951      std::make_pair(0.5, 1100));
952  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
953      std::make_pair(0.24, 200));
954  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
955      std::make_pair(0.0, 100));
956
957  HUPScoringParams max_1100_visit_typed_decays;
958  max_1100_visit_typed_decays.typed_count_buckets.set_half_life_days(16);
959  max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back(
960      std::make_pair(0.5, 1100));
961  max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back(
962      std::make_pair(0.0, 100));
963  max_1100_visit_typed_decays.visited_count_buckets.set_half_life_days(16);
964  max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back(
965      std::make_pair(0.5, 550));
966  max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back(
967      std::make_pair(0.0, 50));
968
969  const int kMaxMatches = 3;
970  struct TestCase {
971    const char* input;
972    HUPScoringParams scoring_params;
973    struct ExpectedMatch {
974      const char* url;
975      int control_relevance;
976      int experiment_relevance;
977    };
978    ExpectedMatch matches[kMaxMatches];
979  } test_cases[] = {
980    // Max score 2000 -> no demotion.
981    { "7.com/1", max_2000_no_time_decay,
982      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
983
984    // Limit score to 1250/1000 and make sure that the top match is unchanged.
985    { "7.com/1", max_1250_no_time_decay,
986      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
987    { "7.com/2", max_1250_no_time_decay,
988      {{"7.com/2a", 1413, 1413}, {"7.com/2b", 1412, 1250}, {NULL, 0, 0}} },
989    { "7.com/4", max_1000_no_time_decay,
990      {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 1000},
991       {"7.com/4b", 1201, 999}} },
992
993    // Max relevance cap is 1400 and half-life is 16 days.
994    { "7.com/1", max_1100_with_time_decay_and_max_cap,
995      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
996    { "7.com/4", max_1100_with_time_decay_and_max_cap,
997      {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 200},
998       {"7.com/4b", 1201, 100}} },
999
1000    // Max relevance cap is 1400 and half-life is 16 days for both visit/typed.
1001    { "7.com/5", max_1100_visit_typed_decays,
1002      {{"7.com/5", 1203, 1203}, {"7.com/5a", 1202, 50}, {NULL, 0, 0}} },
1003  };
1004  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
1005    SCOPED_TRACE(test_cases[i].input);
1006    UrlAndLegalDefault output[kMaxMatches];
1007    int max_matches;
1008    for (max_matches = 0; max_matches < kMaxMatches; ++max_matches) {
1009      if (test_cases[i].matches[max_matches].url == NULL)
1010        break;
1011      output[max_matches].url =
1012          url_fixer::FixupURL(test_cases[i].matches[max_matches].url,
1013                              std::string()).spec();
1014      output[max_matches].allowed_to_be_default_match = true;
1015    }
1016    autocomplete_->scoring_params_ = test_cases[i].scoring_params;
1017
1018    // Test the control (scoring disabled).
1019    autocomplete_->scoring_params_.experimental_scoring_enabled = false;
1020    ASSERT_NO_FATAL_FAILURE(
1021        RunTest(ASCIIToUTF16(test_cases[i].input),
1022                base::string16(), false, output, max_matches));
1023    for (int j = 0; j < max_matches; ++j) {
1024      EXPECT_EQ(test_cases[i].matches[j].control_relevance,
1025                matches_[j].relevance);
1026    }
1027
1028    // Test the experiment (scoring enabled).
1029    autocomplete_->scoring_params_.experimental_scoring_enabled = true;
1030    ASSERT_NO_FATAL_FAILURE(
1031        RunTest(ASCIIToUTF16(test_cases[i].input),
1032                base::string16(), false, output, max_matches));
1033    for (int j = 0; j < max_matches; ++j) {
1034      EXPECT_EQ(test_cases[i].matches[j].experiment_relevance,
1035                matches_[j].relevance);
1036    }
1037  }
1038}
1039