history_url_provider_unittest.cc revision 6d86b77056ed63eb6871182f42a9fd5f07550f90
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/history_url_provider.h"
6
7#include <algorithm>
8
9#include "base/message_loop/message_loop.h"
10#include "base/path_service.h"
11#include "base/prefs/pref_service.h"
12#include "base/strings/string_util.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "chrome/browser/autocomplete/autocomplete_match.h"
16#include "chrome/browser/autocomplete/autocomplete_provider.h"
17#include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
18#include "chrome/browser/autocomplete/autocomplete_result.h"
19#include "chrome/browser/autocomplete/history_quick_provider.h"
20#include "chrome/browser/history/history_service.h"
21#include "chrome/browser/history/history_service_factory.h"
22#include "chrome/browser/search_engines/template_url.h"
23#include "chrome/browser/search_engines/template_url_service.h"
24#include "chrome/browser/search_engines/template_url_service_factory.h"
25#include "chrome/common/pref_names.h"
26#include "chrome/test/base/testing_browser_process.h"
27#include "chrome/test/base/testing_profile.h"
28#include "components/metrics/proto/omnibox_event.pb.h"
29#include "components/metrics/proto/omnibox_input_type.pb.h"
30#include "components/url_fixer/url_fixer.h"
31#include "content/public/test/test_browser_thread_bundle.h"
32#include "testing/gtest/include/gtest/gtest.h"
33
34using base::ASCIIToUTF16;
35using base::Time;
36using base::TimeDelta;
37
38using content::TestBrowserThreadBundle;
39
40struct TestURLInfo {
41  const char* url;
42  const char* title;
43  int visit_count;
44  int typed_count;
45  int age_in_days;
46} test_db[] = {
47  {"http://www.google.com/", "Google", 3, 3, 80},
48
49  // High-quality pages should get a host synthesized as a lower-quality match.
50  {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100, 80},
51
52  // Less popular pages should have hosts synthesized as higher-quality
53  // matches.
54  {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0, 80},
55
56  // Unpopular pages should not appear in the results at all.
57  {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 0, 80},
58
59  // If a host has a match, we should pick it up during host synthesis.
60  {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 80},
61  {"http://news.google.com/", "Google News", 1, 1, 80},
62
63  // Matches that are normally not inline-autocompletable should be
64  // autocompleted if they are shorter substitutes for longer matches that would
65  // have been inline autocompleted.
66  {"http://synthesisatest.com/foo/", "Test A", 1, 1, 80},
67  {"http://synthesisbtest.com/foo/", "Test B", 1, 1, 80},
68  {"http://synthesisbtest.com/foo/bar.html", "Test B Bar", 2, 2, 80},
69
70  // Suggested short URLs must be "good enough" and must match user input.
71  {"http://foo.com/", "Dir", 5, 5, 80},
72  {"http://foo.com/dir/", "Dir", 2, 2, 80},
73  {"http://foo.com/dir/another/", "Dir", 5, 1, 80},
74  {"http://foo.com/dir/another/again/", "Dir", 10, 0, 80},
75  {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 80},
76
77  // We throw in a lot of extra URLs here to make sure we're testing the
78  // history database's query, not just the autocomplete provider.
79  {"http://startest.com/y/a", "A", 2, 2, 80},
80  {"http://startest.com/y/b", "B", 5, 2, 80},
81  {"http://startest.com/x/c", "C", 5, 2, 80},
82  {"http://startest.com/x/d", "D", 5, 5, 80},
83  {"http://startest.com/y/e", "E", 4, 2, 80},
84  {"http://startest.com/y/f", "F", 3, 2, 80},
85  {"http://startest.com/y/g", "G", 3, 2, 80},
86  {"http://startest.com/y/h", "H", 3, 2, 80},
87  {"http://startest.com/y/i", "I", 3, 2, 80},
88  {"http://startest.com/y/j", "J", 3, 2, 80},
89  {"http://startest.com/y/k", "K", 3, 2, 80},
90  {"http://startest.com/y/l", "L", 3, 2, 80},
91  {"http://startest.com/y/m", "M", 3, 2, 80},
92
93  // A file: URL is useful for testing that fixup does the right thing w.r.t.
94  // the number of trailing slashes on the user's input.
95  {"file:///C:/foo.txt", "", 2, 2, 80},
96
97  // Results with absurdly high typed_counts so that very generic queries like
98  // "http" will give consistent results even if more data is added above.
99  {"http://bogussite.com/a", "Bogus A", 10002, 10000, 80},
100  {"http://bogussite.com/b", "Bogus B", 10001, 10000, 80},
101  {"http://bogussite.com/c", "Bogus C", 10000, 10000, 80},
102
103  // Domain name with number.
104  {"http://www.17173.com/", "Domain with number", 3, 3, 80},
105
106  // URLs to test exact-matching behavior.
107  {"http://go/", "Intranet URL", 1, 1, 80},
108  {"http://gooey/", "Intranet URL 2", 5, 5, 80},
109
110  // URLs for testing offset adjustment.
111  {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2, 80},
112  {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 80},
113  {"http://ms/c++%20style%20guide", "Style guide", 2, 2, 80},
114
115  // URLs for testing ctrl-enter behavior.
116  {"http://binky/", "Intranet binky", 2, 2, 80},
117  {"http://winky/", "Intranet winky", 2, 2, 80},
118  {"http://www.winky.com/", "Internet winky", 5, 0, 80},
119
120  // URLs used by EmptyVisits.
121  {"http://pandora.com/", "Pandora", 2, 2, 80},
122  // This entry is explicitly added more recently than
123  // history::kLowQualityMatchAgeLimitInDays.
124  // {"http://p/", "p", 0, 0, 80},
125
126  // For intranet based tests.
127  {"http://intra/one", "Intranet", 2, 2, 80},
128  {"http://intra/two", "Intranet two", 1, 1, 80},
129  {"http://intra/three", "Intranet three", 2, 2, 80},
130  {"http://moo/bar", "Intranet moo", 1, 1, 80},
131  {"http://typedhost/typedpath", "Intranet typed", 1, 1, 80},
132  {"http://typedhost/untypedpath", "Intranet untyped", 1, 0, 80},
133
134  {"http://x.com/one", "Internet", 2, 2, 80},
135  {"http://x.com/two", "Internet two", 1, 1, 80},
136  {"http://x.com/three", "Internet three", 2, 2, 80},
137
138  // For experimental HUP scoring test.
139  {"http://7.com/1a", "One", 8, 4, 4},
140  {"http://7.com/2a", "Two A", 4, 2, 8},
141  {"http://7.com/2b", "Two B", 4, 1, 8},
142  {"http://7.com/3a", "Three", 2, 1, 16},
143  {"http://7.com/4a", "Four A", 1, 1, 32},
144  {"http://7.com/4b", "Four B", 1, 1, 64},
145  {"http://7.com/5a", "Five A", 8, 0, 64},  // never typed.
146};
147
148class HistoryURLProviderTest : public testing::Test,
149                               public AutocompleteProviderListener {
150 public:
151  struct UrlAndLegalDefault {
152    std::string url;
153    bool allowed_to_be_default_match;
154  };
155
156  HistoryURLProviderTest()
157      : sort_matches_(false) {
158    HistoryQuickProvider::set_disabled(true);
159  }
160
161  virtual ~HistoryURLProviderTest() {
162    HistoryQuickProvider::set_disabled(false);
163  }
164
165  // AutocompleteProviderListener:
166  virtual void OnProviderUpdate(bool updated_matches) OVERRIDE;
167
168 protected:
169  static KeyedService* CreateTemplateURLService(
170      content::BrowserContext* profile) {
171    return new TemplateURLService(static_cast<Profile*>(profile));
172  }
173
174  // testing::Test
175  virtual void SetUp() {
176    ASSERT_TRUE(SetUpImpl(false));
177  }
178  virtual void TearDown();
179
180  // Does the real setup.
181  bool SetUpImpl(bool no_db) WARN_UNUSED_RESULT;
182
183  // Fills test data into the history system.
184  void FillData();
185
186  // Runs an autocomplete query on |text| and checks to see that the returned
187  // results' destination URLs match those provided.  Also allows checking
188  // that the input type was identified correctly.
189  void RunTest(const base::string16 text,
190               const base::string16& desired_tld,
191               bool prevent_inline_autocomplete,
192               const UrlAndLegalDefault* expected_urls,
193               size_t num_results,
194               metrics::OmniboxInputType::Type* identified_input_type);
195
196  // A version of the above without the final |type| output parameter.
197  void RunTest(const base::string16 text,
198               const base::string16& desired_tld,
199               bool prevent_inline_autocomplete,
200               const UrlAndLegalDefault* expected_urls,
201               size_t num_results) {
202    metrics::OmniboxInputType::Type type;
203    return RunTest(text, desired_tld, prevent_inline_autocomplete,
204                   expected_urls, num_results, &type);
205  }
206
207  content::TestBrowserThreadBundle thread_bundle_;
208  ACMatches matches_;
209  scoped_ptr<TestingProfile> profile_;
210  HistoryService* history_service_;
211  scoped_refptr<HistoryURLProvider> autocomplete_;
212  // Should the matches be sorted and duplicates removed?
213  bool sort_matches_;
214};
215
216class HistoryURLProviderTestNoDB : public HistoryURLProviderTest {
217 protected:
218  virtual void SetUp() {
219    ASSERT_TRUE(SetUpImpl(true));
220  }
221};
222
223void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) {
224  if (autocomplete_->done())
225    base::MessageLoop::current()->Quit();
226}
227
228bool HistoryURLProviderTest::SetUpImpl(bool no_db) {
229  profile_.reset(new TestingProfile());
230  if (!(profile_->CreateHistoryService(true, no_db)))
231    return false;
232  if (!no_db) {
233    profile_->BlockUntilHistoryProcessesPendingRequests();
234    profile_->BlockUntilHistoryIndexIsRefreshed();
235  }
236  profile_->GetPrefs()->SetString(prefs::kAcceptLanguages, "en-US,en,ko");
237  history_service_ = HistoryServiceFactory::GetForProfile(
238      profile_.get(), Profile::EXPLICIT_ACCESS);
239
240  autocomplete_ = new HistoryURLProvider(this, profile_.get());
241  TemplateURLServiceFactory::GetInstance()->SetTestingFactoryAndUse(
242      profile_.get(), &HistoryURLProviderTest::CreateTemplateURLService);
243  FillData();
244  return true;
245}
246
247void HistoryURLProviderTest::TearDown() {
248  autocomplete_ = NULL;
249}
250
251void HistoryURLProviderTest::FillData() {
252  // Most visits are a long time ago (some tests require this since we do some
253  // special logic for things visited very recently). Note that this time must
254  // be more recent than the "expire history" threshold for the data to be kept
255  // in the main database.
256  //
257  // TODO(brettw) It would be nice if we could test this behavior, in which
258  // case the time would be specifed in the test_db structure.
259  const Time now = Time::Now();
260
261  for (size_t i = 0; i < arraysize(test_db); ++i) {
262    const TestURLInfo& cur = test_db[i];
263    const GURL current_url(cur.url);
264    history_service_->AddPageWithDetails(
265        current_url, base::UTF8ToUTF16(cur.title), cur.visit_count,
266        cur.typed_count, now - TimeDelta::FromDays(cur.age_in_days), false,
267        history::SOURCE_BROWSED);
268  }
269
270  history_service_->AddPageWithDetails(
271      GURL("http://p/"), base::UTF8ToUTF16("p"), 0, 0,
272      Time::Now() -
273      TimeDelta::FromDays(history::kLowQualityMatchAgeLimitInDays - 1),
274      false, history::SOURCE_BROWSED);
275}
276
277void HistoryURLProviderTest::RunTest(
278    const base::string16 text,
279    const base::string16& desired_tld,
280    bool prevent_inline_autocomplete,
281    const UrlAndLegalDefault* expected_urls,
282    size_t num_results,
283    metrics::OmniboxInputType::Type* identified_input_type) {
284  AutocompleteInput input(text, base::string16::npos, desired_tld, GURL(),
285                          metrics::OmniboxEventProto::INVALID_SPEC,
286                          prevent_inline_autocomplete, false, true, true);
287  *identified_input_type = input.type();
288  autocomplete_->Start(input, false);
289  if (!autocomplete_->done())
290    base::MessageLoop::current()->Run();
291
292  matches_ = autocomplete_->matches();
293  if (sort_matches_) {
294    for (ACMatches::iterator i = matches_.begin(); i != matches_.end(); ++i)
295      i->ComputeStrippedDestinationURL(profile_.get());
296    AutocompleteResult::DedupMatchesByDestination(
297        input.current_page_classification(), false, &matches_);
298    std::sort(matches_.begin(), matches_.end(),
299              &AutocompleteMatch::MoreRelevant);
300  }
301  ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text
302                                          << "\nTLD: \"" << desired_tld << "\"";
303  for (size_t i = 0; i < num_results; ++i) {
304    EXPECT_EQ(expected_urls[i].url, matches_[i].destination_url.spec());
305    EXPECT_EQ(expected_urls[i].allowed_to_be_default_match,
306              matches_[i].allowed_to_be_default_match);
307  }
308}
309
310TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
311  // Test that hosts get synthesized below popular pages.
312  const UrlAndLegalDefault expected_nonsynth[] = {
313    { "http://slashdot.org/favorite_page.html", false },
314    { "http://slashdot.org/", false }
315  };
316  RunTest(ASCIIToUTF16("slash"), base::string16(), true, expected_nonsynth,
317          arraysize(expected_nonsynth));
318
319  // Test that hosts get synthesized above less popular pages.
320  const UrlAndLegalDefault expected_synth[] = {
321    { "http://kerneltrap.org/", false },
322    { "http://kerneltrap.org/not_very_popular.html", false }
323  };
324  RunTest(ASCIIToUTF16("kernel"), base::string16(), true, expected_synth,
325          arraysize(expected_synth));
326
327  // Test that unpopular pages are ignored completely.
328  RunTest(ASCIIToUTF16("fresh"), base::string16(), true, NULL, 0);
329
330  // Test that if we create or promote shorter suggestions that would not
331  // normally be inline autocompletable, we make them inline autocompletable if
332  // the original suggestion (that we replaced as "top") was inline
333  // autocompletable.
334  const UrlAndLegalDefault expected_synthesisa[] = {
335    { "http://synthesisatest.com/", true },
336    { "http://synthesisatest.com/foo/", true }
337  };
338  RunTest(ASCIIToUTF16("synthesisa"), base::string16(), false,
339          expected_synthesisa, arraysize(expected_synthesisa));
340  EXPECT_LT(matches_.front().relevance, 1200);
341  const UrlAndLegalDefault expected_synthesisb[] = {
342    { "http://synthesisbtest.com/foo/", true },
343    { "http://synthesisbtest.com/foo/bar.html", true }
344  };
345  RunTest(ASCIIToUTF16("synthesisb"), base::string16(), false,
346          expected_synthesisb, arraysize(expected_synthesisb));
347  EXPECT_GE(matches_.front().relevance, 1410);
348
349  // Test that if we have a synthesized host that matches a suggestion, they
350  // get combined into one.
351  const UrlAndLegalDefault expected_combine[] = {
352    { "http://news.google.com/", false },
353    { "http://news.google.com/?ned=us&topic=n", false },
354  };
355  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), base::string16(), true,
356      expected_combine, arraysize(expected_combine)));
357  // The title should also have gotten set properly on the host for the
358  // synthesized one, since it was also in the results.
359  EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description);
360
361  // Test that short URL matching works correctly as the user types more
362  // (several tests):
363  // The entry for foo.com is the best of all five foo.com* entries.
364  const UrlAndLegalDefault short_1[] = {
365    { "http://foo.com/", false },
366    { "http://foo.com/dir/another/again/myfile.html", false },
367    { "http://foo.com/dir/", false }
368  };
369  RunTest(ASCIIToUTF16("foo"), base::string16(), true,
370          short_1, arraysize(short_1));
371
372  // When the user types the whole host, make sure we don't get two results for
373  // it.
374  const UrlAndLegalDefault short_2[] = {
375    { "http://foo.com/", true },
376    { "http://foo.com/dir/another/again/myfile.html", false },
377    { "http://foo.com/dir/", false },
378    { "http://foo.com/dir/another/", false }
379  };
380  RunTest(ASCIIToUTF16("foo.com"), base::string16(), true, short_2,
381          arraysize(short_2));
382  RunTest(ASCIIToUTF16("foo.com/"), base::string16(), true, short_2,
383          arraysize(short_2));
384
385  // The filename is the second best of the foo.com* entries, but there is a
386  // shorter URL that's "good enough".  The host doesn't match the user input
387  // and so should not appear.
388  const UrlAndLegalDefault short_3[] = {
389    { "http://foo.com/d", true },
390    { "http://foo.com/dir/another/", false },
391    { "http://foo.com/dir/another/again/myfile.html", false },
392    { "http://foo.com/dir/", false }
393  };
394  RunTest(ASCIIToUTF16("foo.com/d"), base::string16(), true, short_3,
395          arraysize(short_3));
396
397  // We shouldn't promote shorter URLs than the best if they're not good
398  // enough.
399  const UrlAndLegalDefault short_4[] = {
400    { "http://foo.com/dir/another/a", true },
401    { "http://foo.com/dir/another/again/myfile.html", false },
402    { "http://foo.com/dir/another/again/", false }
403  };
404  RunTest(ASCIIToUTF16("foo.com/dir/another/a"), base::string16(), true,
405          short_4, arraysize(short_4));
406
407  // Exact matches should always be best no matter how much more another match
408  // has been typed.
409  const UrlAndLegalDefault short_5a[] = {
410    { "http://gooey/", true },
411    { "http://www.google.com/", true },
412    { "http://go/", true }
413  };
414  const UrlAndLegalDefault short_5b[] = {
415    { "http://go/", true },
416    { "http://gooey/", true },
417    { "http://www.google.com/", true }
418  };
419  RunTest(ASCIIToUTF16("g"), base::string16(), false,
420          short_5a, arraysize(short_5a));
421  RunTest(ASCIIToUTF16("go"), base::string16(), false,
422          short_5b, arraysize(short_5b));
423}
424
425TEST_F(HistoryURLProviderTest, CullRedirects) {
426  // URLs we will be using, plus the visit counts they will initially get
427  // (the redirect set below will also increment the visit counts). We want
428  // the results to be in A,B,C order. Note also that our visit counts are
429  // all high enough so that domain synthesizing won't get triggered.
430  struct TestCase {
431    const char* url;
432    int count;
433  } test_cases[] = {
434    {"http://redirects/A", 30},
435    {"http://redirects/B", 20},
436    {"http://redirects/C", 10}
437  };
438  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
439    history_service_->AddPageWithDetails(GURL(test_cases[i].url),
440        ASCIIToUTF16("Title"), test_cases[i].count, test_cases[i].count,
441        Time::Now(), false, history::SOURCE_BROWSED);
442  }
443
444  // Create a B->C->A redirect chain, but set the visit counts such that they
445  // will appear in A,B,C order in the results. The autocomplete query will
446  // search for the most recent visit when looking for redirects, so this will
447  // be found even though the previous visits had no redirects.
448  history::RedirectList redirects_to_a;
449  redirects_to_a.push_back(GURL(test_cases[1].url));
450  redirects_to_a.push_back(GURL(test_cases[2].url));
451  redirects_to_a.push_back(GURL(test_cases[0].url));
452  history_service_->AddPage(GURL(test_cases[0].url), base::Time::Now(),
453      NULL, 0, GURL(), redirects_to_a, content::PAGE_TRANSITION_TYPED,
454      history::SOURCE_BROWSED, true);
455
456  // Because all the results are part of a redirect chain with other results,
457  // all but the first one (A) should be culled. We should get the default
458  // "what you typed" result, plus this one.
459  const base::string16 typing(ASCIIToUTF16("http://redirects/"));
460  const UrlAndLegalDefault expected_results[] = {
461    { base::UTF16ToUTF8(typing), true },
462    { test_cases[0].url, false }
463  };
464  RunTest(typing, base::string16(), true, expected_results,
465          arraysize(expected_results));
466}
467
468TEST_F(HistoryURLProviderTest, WhatYouTyped) {
469  // Make sure we suggest a What You Typed match at the right times.
470  RunTest(ASCIIToUTF16("wytmatch"), base::string16(), false, NULL, 0);
471  RunTest(ASCIIToUTF16("wytmatch foo bar"), base::string16(), false, NULL, 0);
472  RunTest(ASCIIToUTF16("wytmatch+foo+bar"), base::string16(), false, NULL, 0);
473  RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), base::string16(), false,
474          NULL, 0);
475
476  const UrlAndLegalDefault results_1[] = {
477    { "http://www.wytmatch.com/", true }
478  };
479  RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1,
480          arraysize(results_1));
481
482  const UrlAndLegalDefault results_2[] = {
483    { "http://wytmatch%20foo%20bar/", true }
484  };
485  RunTest(ASCIIToUTF16("http://wytmatch foo bar"), base::string16(), false,
486          results_2, arraysize(results_2));
487
488  const UrlAndLegalDefault results_3[] = {
489    { "https://wytmatch%20foo%20bar/", true }
490  };
491  RunTest(ASCIIToUTF16("https://wytmatch foo bar"), base::string16(), false,
492          results_3, arraysize(results_3));
493}
494
495TEST_F(HistoryURLProviderTest, Fixup) {
496  // Test for various past crashes we've had.
497  RunTest(ASCIIToUTF16("\\"), base::string16(), false, NULL, 0);
498  RunTest(ASCIIToUTF16("#"), base::string16(), false, NULL, 0);
499  RunTest(ASCIIToUTF16("%20"), base::string16(), false, NULL, 0);
500  const UrlAndLegalDefault fixup_crash[] = {
501    { "http://%EF%BD%A5@s/", true }
502  };
503  RunTest(base::WideToUTF16(L"\uff65@s"), base::string16(), false, fixup_crash,
504          arraysize(fixup_crash));
505  RunTest(base::WideToUTF16(L"\u2015\u2015@ \uff7c"), base::string16(), false,
506          NULL, 0);
507
508  // Fixing up "file:" should result in an inline autocomplete offset of just
509  // after "file:", not just after "file://".
510  const base::string16 input_1(ASCIIToUTF16("file:"));
511  const UrlAndLegalDefault fixup_1[] = {
512    { "file:///C:/foo.txt", true }
513  };
514  ASSERT_NO_FATAL_FAILURE(RunTest(input_1, base::string16(), false, fixup_1,
515                                  arraysize(fixup_1)));
516  EXPECT_EQ(ASCIIToUTF16("///C:/foo.txt"),
517            matches_.front().inline_autocompletion);
518
519  // Fixing up "http:/" should result in an inline autocomplete offset of just
520  // after "http:/", not just after "http:".
521  const base::string16 input_2(ASCIIToUTF16("http:/"));
522  const UrlAndLegalDefault fixup_2[] = {
523    { "http://bogussite.com/a", true },
524    { "http://bogussite.com/b", true },
525    { "http://bogussite.com/c", true }
526  };
527  ASSERT_NO_FATAL_FAILURE(RunTest(input_2, base::string16(), false, fixup_2,
528                                  arraysize(fixup_2)));
529  EXPECT_EQ(ASCIIToUTF16("/bogussite.com/a"),
530            matches_.front().inline_autocompletion);
531
532  // Adding a TLD to a small number like "56" should result in "www.56.com"
533  // rather than "0.0.0.56.com".
534  const UrlAndLegalDefault fixup_3[] = {
535    { "http://www.56.com/", true }
536  };
537  RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3,
538          arraysize(fixup_3));
539
540  // An input looks like a IP address like "127.0.0.1" should result in
541  // "http://127.0.0.1/".
542  const UrlAndLegalDefault fixup_4[] = {
543    { "http://127.0.0.1/", true }
544  };
545  RunTest(ASCIIToUTF16("127.0.0.1"), base::string16(), false, fixup_4,
546          arraysize(fixup_4));
547
548  // An number "17173" should result in "http://www.17173.com/" in db.
549  const UrlAndLegalDefault fixup_5[] = {
550    { "http://www.17173.com/", true }
551  };
552  RunTest(ASCIIToUTF16("17173"), base::string16(), false, fixup_5,
553          arraysize(fixup_5));
554}
555
556// Make sure the results for the input 'p' don't change between the first and
557// second passes.
558TEST_F(HistoryURLProviderTest, EmptyVisits) {
559  // Wait for history to create the in memory DB.
560  profile_->BlockUntilHistoryProcessesPendingRequests();
561
562  AutocompleteInput input(ASCIIToUTF16("p"), base::string16::npos,
563                          base::string16(), GURL(),
564                          metrics::OmniboxEventProto::INVALID_SPEC, false,
565                          false, true, true);
566  autocomplete_->Start(input, false);
567  // HistoryURLProvider shouldn't be done (waiting on async results).
568  EXPECT_FALSE(autocomplete_->done());
569
570  // We should get back an entry for pandora.
571  matches_ = autocomplete_->matches();
572  ASSERT_GT(matches_.size(), 0u);
573  EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
574  int pandora_relevance = matches_[0].relevance;
575
576  // Run the message loop. When |autocomplete_| finishes the loop is quit.
577  base::MessageLoop::current()->Run();
578  EXPECT_TRUE(autocomplete_->done());
579  matches_ = autocomplete_->matches();
580  ASSERT_GT(matches_.size(), 0u);
581  EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
582  EXPECT_EQ(pandora_relevance, matches_[0].relevance);
583}
584
585TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
586  // Ensure that we will still produce matches for navigation when there is no
587  // database.
588  UrlAndLegalDefault navigation_1[] = {
589    { "http://test.com/", true }
590  };
591  RunTest(ASCIIToUTF16("test.com"), base::string16(), false, navigation_1,
592          arraysize(navigation_1));
593
594  UrlAndLegalDefault navigation_2[] = {
595    { "http://slash/", true }
596  };
597  RunTest(ASCIIToUTF16("slash"), base::string16(), false, navigation_2,
598          arraysize(navigation_2));
599
600  RunTest(ASCIIToUTF16("this is a query"), base::string16(), false, NULL, 0);
601}
602
603TEST_F(HistoryURLProviderTest, DontAutocompleteOnTrailingWhitespace) {
604  AutocompleteInput input(ASCIIToUTF16("slash "), base::string16::npos,
605                          base::string16(), GURL(),
606                          metrics::OmniboxEventProto::INVALID_SPEC, false,
607                          false, true, true);
608  autocomplete_->Start(input, false);
609  if (!autocomplete_->done())
610    base::MessageLoop::current()->Run();
611
612  // None of the matches should attempt to autocomplete.
613  matches_ = autocomplete_->matches();
614  for (size_t i = 0; i < matches_.size(); ++i) {
615    EXPECT_TRUE(matches_[i].inline_autocompletion.empty());
616    EXPECT_FALSE(matches_[i].allowed_to_be_default_match);
617  }
618}
619
620TEST_F(HistoryURLProviderTest, TreatEmailsAsSearches) {
621  // Visiting foo.com should not make this string be treated as a navigation.
622  // That means the result should be scored around 1200 ("what you typed")
623  // and not 1400+.
624  const UrlAndLegalDefault expected[] = {
625    { "http://user@foo.com/", true }
626  };
627  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("user@foo.com"),
628                                  base::string16(), false, expected,
629                                  arraysize(expected)));
630  EXPECT_LE(1200, matches_[0].relevance);
631  EXPECT_LT(matches_[0].relevance, 1210);
632}
633
634TEST_F(HistoryURLProviderTest, IntranetURLsWithPaths) {
635  struct TestCase {
636    const char* input;
637    int relevance;
638  } test_cases[] = {
639    { "fooey", 0 },
640    { "fooey/", 1200 },     // 1200 for URL would still navigate by default.
641    { "fooey/a", 1200 },    // 1200 for UNKNOWN would not.
642    { "fooey/a b", 1200 },  // Also UNKNOWN.
643    { "gooey", 1410 },
644    { "gooey/", 1410 },
645    { "gooey/a", 1400 },
646    { "gooey/a b", 1400 },
647  };
648  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
649    SCOPED_TRACE(test_cases[i].input);
650    if (test_cases[i].relevance == 0) {
651      RunTest(ASCIIToUTF16(test_cases[i].input), base::string16(), false,
652              NULL, 0);
653    } else {
654      const UrlAndLegalDefault output[] = {
655          {url_fixer::FixupURL(test_cases[i].input, std::string()).spec(),
656           true}};
657      ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16(test_cases[i].input),
658                              base::string16(), false,
659                              output, arraysize(output)));
660      // Actual relevance should be at least what test_cases expects and
661      // and no more than 10 more.
662      EXPECT_LE(test_cases[i].relevance, matches_[0].relevance);
663      EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10);
664    }
665  }
666}
667
668TEST_F(HistoryURLProviderTest, IntranetURLsWithRefs) {
669  struct TestCase {
670    const char* input;
671    int relevance;
672    metrics::OmniboxInputType::Type type;
673  } test_cases[] = {
674    { "gooey", 1410, metrics::OmniboxInputType::UNKNOWN },
675    { "gooey/", 1410, metrics::OmniboxInputType::URL },
676    { "gooey#", 1200, metrics::OmniboxInputType::UNKNOWN },
677    { "gooey/#", 1200, metrics::OmniboxInputType::URL },
678    { "gooey#foo", 1200, metrics::OmniboxInputType::UNKNOWN },
679    { "gooey/#foo", 1200, metrics::OmniboxInputType::URL },
680    { "gooey# foo", 1200, metrics::OmniboxInputType::UNKNOWN },
681    { "gooey/# foo", 1200, metrics::OmniboxInputType::URL },
682  };
683  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
684    SCOPED_TRACE(test_cases[i].input);
685    const UrlAndLegalDefault output[] = {
686        {url_fixer::FixupURL(test_cases[i].input, std::string()).spec(), true}};
687    metrics::OmniboxInputType::Type type;
688    ASSERT_NO_FATAL_FAILURE(
689        RunTest(ASCIIToUTF16(test_cases[i].input),
690                base::string16(), false, output, arraysize(output), &type));
691    // Actual relevance should be at least what test_cases expects and
692    // and no more than 10 more.
693    EXPECT_LE(test_cases[i].relevance, matches_[0].relevance);
694    EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10);
695    // Input type should be what we expect.  This is important because
696    // this provider counts on SearchProvider to give queries a relevance
697    // score >1200 for UNKNOWN inputs and <1200 for URL inputs.  (That's
698    // already tested in search_provider_unittest.cc.)  For this test
699    // here to test that the user sees the correct behavior, it needs
700    // to check that the input type was identified correctly.
701    EXPECT_EQ(test_cases[i].type, type);
702  }
703}
704
705// Makes sure autocompletion happens for intranet sites that have been
706// previoulsy visited.
707TEST_F(HistoryURLProviderTest, IntranetURLCompletion) {
708  sort_matches_ = true;
709
710  const UrlAndLegalDefault expected1[] = {
711    { "http://intra/three", true },
712    { "http://intra/two", true }
713  };
714  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/t"), base::string16(),
715                                  false, expected1, arraysize(expected1)));
716  EXPECT_LE(1410, matches_[0].relevance);
717  EXPECT_LT(matches_[0].relevance, 1420);
718  EXPECT_EQ(matches_[0].relevance - 1, matches_[1].relevance);
719
720  const UrlAndLegalDefault expected2[] = {
721    { "http://moo/b", true },
722    { "http://moo/bar", true }
723  };
724  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("moo/b"), base::string16(),
725                                  false, expected2, arraysize(expected2)));
726  // The url what you typed match should be around 1400, otherwise the
727  // search what you typed match is going to be first.
728  EXPECT_LE(1400, matches_[0].relevance);
729  EXPECT_LT(matches_[0].relevance, 1410);
730
731  const UrlAndLegalDefault expected3[] = {
732    { "http://intra/one", true },
733    { "http://intra/three", true },
734    { "http://intra/two", true }
735  };
736  RunTest(ASCIIToUTF16("intra"), base::string16(), false, expected3,
737          arraysize(expected3));
738
739  const UrlAndLegalDefault expected4[] = {
740    { "http://intra/one", true },
741    { "http://intra/three", true },
742    { "http://intra/two", true }
743  };
744  RunTest(ASCIIToUTF16("intra/"), base::string16(), false, expected4,
745          arraysize(expected4));
746
747  const UrlAndLegalDefault expected5[] = {
748    { "http://intra/one", true }
749  };
750  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/o"), base::string16(),
751                                  false, expected5, arraysize(expected5)));
752  EXPECT_LE(1410, matches_[0].relevance);
753  EXPECT_LT(matches_[0].relevance, 1420);
754
755  const UrlAndLegalDefault expected6[] = {
756    { "http://intra/x", true }
757  };
758  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/x"), base::string16(),
759                                  false, expected6, arraysize(expected6)));
760  EXPECT_LE(1400, matches_[0].relevance);
761  EXPECT_LT(matches_[0].relevance, 1410);
762
763  const UrlAndLegalDefault expected7[] = {
764    { "http://typedhost/untypedpath", true }
765  };
766  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("typedhost/untypedpath"),
767      base::string16(), false, expected7, arraysize(expected7)));
768  EXPECT_LE(1400, matches_[0].relevance);
769  EXPECT_LT(matches_[0].relevance, 1410);
770}
771
772TEST_F(HistoryURLProviderTest, CrashDueToFixup) {
773  // This test passes if we don't crash.  The results don't matter.
774  const char* const test_cases[] = {
775    "//c",
776    "\\@st",
777    "view-source:x",
778  };
779  for (size_t i = 0; i < arraysize(test_cases); ++i) {
780    AutocompleteInput input(ASCIIToUTF16(test_cases[i]), base::string16::npos,
781                            base::string16(), GURL(),
782                            metrics::OmniboxEventProto::INVALID_SPEC,
783                            false, false, true, true);
784    autocomplete_->Start(input, false);
785    if (!autocomplete_->done())
786      base::MessageLoop::current()->Run();
787  }
788}
789
790TEST_F(HistoryURLProviderTest, CullSearchResults) {
791  // Set up a default search engine.
792  TemplateURLData data;
793  data.SetKeyword(ASCIIToUTF16("TestEngine"));
794  data.SetURL("http://testsearch.com/?q={searchTerms}");
795  TemplateURLService* template_url_service =
796      TemplateURLServiceFactory::GetForProfile(profile_.get());
797  TemplateURL* template_url = new TemplateURL(data);
798  template_url_service->Add(template_url);
799  template_url_service->SetUserSelectedDefaultSearchProvider(template_url);
800  template_url_service->Load();
801
802  // URLs we will be using, plus the visit counts they will initially get
803  // (the redirect set below will also increment the visit counts). We want
804  // the results to be in A,B,C order. Note also that our visit counts are
805  // all high enough so that domain synthesizing won't get triggered.
806  struct TestCase {
807    const char* url;
808    int count;
809  } test_cases[] = {
810    {"https://testsearch.com/", 30},
811    {"https://testsearch.com/?q=foobar", 20},
812    {"http://foobar.com/", 10}
813  };
814  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
815    history_service_->AddPageWithDetails(GURL(test_cases[i].url),
816        base::UTF8ToUTF16("Title"), test_cases[i].count, test_cases[i].count,
817        Time::Now(), false, history::SOURCE_BROWSED);
818  }
819
820  // We should not see search URLs when typing a previously used query.
821  const UrlAndLegalDefault expected_when_searching_query[] = {
822    { test_cases[2].url, false }
823  };
824  RunTest(ASCIIToUTF16("foobar"), base::string16(), true,
825      expected_when_searching_query, arraysize(expected_when_searching_query));
826
827  // We should not see search URLs when typing the search engine name.
828  const UrlAndLegalDefault expected_when_searching_site[] = {
829    { test_cases[0].url, false }
830  };
831  RunTest(ASCIIToUTF16("testsearch"), base::string16(), true,
832      expected_when_searching_site, arraysize(expected_when_searching_site));
833}
834
835TEST_F(HistoryURLProviderTest, SuggestExactInput) {
836  const size_t npos = std::string::npos;
837  struct TestCase {
838    // Inputs:
839    const char* input;
840    bool trim_http;
841    // Expected Outputs:
842    const char* contents;
843    // Offsets of the ACMatchClassifications, terminated by npos.
844    size_t offsets[3];
845    // The index of the ACMatchClassification that should have the MATCH bit
846    // set, npos if no ACMatchClassification should have the MATCH bit set.
847    size_t match_classification_index;
848  } test_cases[] = {
849    { "http://www.somesite.com", false,
850      "http://www.somesite.com", {0, npos, npos}, 0 },
851    { "www.somesite.com", true,
852      "www.somesite.com", {0, npos, npos}, 0 },
853    { "www.somesite.com", false,
854      "http://www.somesite.com", {0, 7, npos}, 1 },
855    { "somesite.com", true,
856      "somesite.com", {0, npos, npos}, 0 },
857    { "somesite.com", false,
858      "http://somesite.com", {0, 7, npos}, 1 },
859    { "w", true,
860      "w", {0, npos, npos}, 0 },
861    { "w", false,
862      "http://w", {0, 7, npos}, 1 },
863    { "w.com", true,
864      "w.com", {0, npos, npos}, 0 },
865    { "w.com", false,
866      "http://w.com", {0, 7, npos}, 1 },
867    { "www.w.com", true,
868      "www.w.com", {0, npos, npos}, 0 },
869    { "www.w.com", false,
870      "http://www.w.com", {0, 7, npos}, 1 },
871    { "view-source:w", true,
872      "view-source:w", {0, npos, npos}, 0 },
873    { "view-source:www.w.com/", true,
874      "view-source:www.w.com", {0, npos, npos}, npos },
875    { "view-source:www.w.com/", false,
876      "view-source:http://www.w.com", {0, npos, npos}, npos },
877    { "view-source:http://www.w.com/", false,
878      "view-source:http://www.w.com", {0, npos, npos}, 0 },
879    { "   view-source:", true,
880      "view-source:", {0, npos, npos}, 0 },
881    { "http:////////w.com", false,
882      "http://w.com", {0, npos, npos}, npos },
883    { "    http:////////www.w.com", false,
884      "http://www.w.com", {0, npos, npos}, npos },
885    { "http:a///www.w.com", false,
886      "http://a///www.w.com", {0, npos, npos}, npos },
887    { "mailto://a@b.com", true,
888      "mailto://a@b.com", {0, npos, npos}, 0 },
889    { "mailto://a@b.com", false,
890      "mailto://a@b.com", {0, npos, npos}, 0 },
891  };
892  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
893    SCOPED_TRACE(testing::Message() << "Index " << i << " input: "
894                                    << test_cases[i].input << ", trim_http: "
895                                    << test_cases[i].trim_http);
896
897    AutocompleteInput input(ASCIIToUTF16(test_cases[i].input),
898                            base::string16::npos, base::string16(),
899                            GURL("about:blank"),
900                            metrics::OmniboxEventProto::INVALID_SPEC, false,
901                            false, true, true);
902    AutocompleteMatch match(autocomplete_->SuggestExactInput(
903        input.text(), input.canonicalized_url(), test_cases[i].trim_http));
904    EXPECT_EQ(ASCIIToUTF16(test_cases[i].contents), match.contents);
905    for (size_t match_index = 0; match_index < match.contents_class.size();
906         ++match_index) {
907      EXPECT_EQ(test_cases[i].offsets[match_index],
908                match.contents_class[match_index].offset);
909      EXPECT_EQ(ACMatchClassification::URL |
910                (match_index == test_cases[i].match_classification_index ?
911                 ACMatchClassification::MATCH : 0),
912                match.contents_class[match_index].style);
913    }
914    EXPECT_EQ(npos, test_cases[i].offsets[match.contents_class.size()]);
915  }
916}
917
918TEST_F(HistoryURLProviderTest, HUPScoringExperiment) {
919  HUPScoringParams max_2000_no_time_decay;
920  max_2000_no_time_decay.typed_count_buckets.buckets().push_back(
921      std::make_pair(0.0, 2000));
922  HUPScoringParams max_1250_no_time_decay;
923  max_1250_no_time_decay.typed_count_buckets.buckets().push_back(
924      std::make_pair(0.0, 1250));
925  HUPScoringParams max_1000_no_time_decay;
926  max_1000_no_time_decay.typed_count_buckets.buckets().push_back(
927      std::make_pair(0.0, 1000));
928
929  HUPScoringParams max_1100_with_time_decay_and_max_cap;
930  max_1100_with_time_decay_and_max_cap.typed_count_buckets.
931      set_relevance_cap(1400);
932  max_1100_with_time_decay_and_max_cap.typed_count_buckets.
933      set_half_life_days(16);
934  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
935      std::make_pair(0.5, 1100));
936  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
937      std::make_pair(0.24, 200));
938  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
939      std::make_pair(0.0, 100));
940
941  HUPScoringParams max_1100_visit_typed_decays;
942  max_1100_visit_typed_decays.typed_count_buckets.set_half_life_days(16);
943  max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back(
944      std::make_pair(0.5, 1100));
945  max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back(
946      std::make_pair(0.0, 100));
947  max_1100_visit_typed_decays.visited_count_buckets.set_half_life_days(16);
948  max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back(
949      std::make_pair(0.5, 550));
950  max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back(
951      std::make_pair(0.0, 50));
952
953  const int kMaxMatches = 3;
954  struct TestCase {
955    const char* input;
956    HUPScoringParams scoring_params;
957    struct ExpectedMatch {
958      const char* url;
959      int control_relevance;
960      int experiment_relevance;
961    };
962    ExpectedMatch matches[kMaxMatches];
963  } test_cases[] = {
964    // Max score 2000 -> no demotion.
965    { "7.com/1", max_2000_no_time_decay,
966      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
967
968    // Limit score to 1250/1000 and make sure that the top match is unchanged.
969    { "7.com/1", max_1250_no_time_decay,
970      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
971    { "7.com/2", max_1250_no_time_decay,
972      {{"7.com/2a", 1413, 1413}, {"7.com/2b", 1412, 1250}, {NULL, 0, 0}} },
973    { "7.com/4", max_1000_no_time_decay,
974      {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 1000},
975       {"7.com/4b", 1201, 999}} },
976
977    // Max relevance cap is 1400 and half-life is 16 days.
978    { "7.com/1", max_1100_with_time_decay_and_max_cap,
979      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
980    { "7.com/4", max_1100_with_time_decay_and_max_cap,
981      {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 200},
982       {"7.com/4b", 1201, 100}} },
983
984    // Max relevance cap is 1400 and half-life is 16 days for both visit/typed.
985    { "7.com/5", max_1100_visit_typed_decays,
986      {{"7.com/5", 1203, 1203}, {"7.com/5a", 1202, 50}, {NULL, 0, 0}} },
987  };
988  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
989    SCOPED_TRACE(test_cases[i].input);
990    UrlAndLegalDefault output[kMaxMatches];
991    int max_matches;
992    for (max_matches = 0; max_matches < kMaxMatches; ++max_matches) {
993      if (test_cases[i].matches[max_matches].url == NULL)
994        break;
995      output[max_matches].url =
996          url_fixer::FixupURL(test_cases[i].matches[max_matches].url,
997                              std::string()).spec();
998      output[max_matches].allowed_to_be_default_match = true;
999    }
1000    autocomplete_->scoring_params_ = test_cases[i].scoring_params;
1001
1002    // Test the control (scoring disabled).
1003    autocomplete_->scoring_params_.experimental_scoring_enabled = false;
1004    ASSERT_NO_FATAL_FAILURE(
1005        RunTest(ASCIIToUTF16(test_cases[i].input),
1006                base::string16(), false, output, max_matches));
1007    for (int j = 0; j < max_matches; ++j) {
1008      EXPECT_EQ(test_cases[i].matches[j].control_relevance,
1009                matches_[j].relevance);
1010    }
1011
1012    // Test the experiment (scoring enabled).
1013    autocomplete_->scoring_params_.experimental_scoring_enabled = true;
1014    ASSERT_NO_FATAL_FAILURE(
1015        RunTest(ASCIIToUTF16(test_cases[i].input),
1016                base::string16(), false, output, max_matches));
1017    for (int j = 0; j < max_matches; ++j) {
1018      EXPECT_EQ(test_cases[i].matches[j].experiment_relevance,
1019                matches_[j].relevance);
1020    }
1021  }
1022}
1023