history_url_provider_unittest.cc revision 0529e5d033099cbfc42635f6f6183833b09dff6e
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/history_url_provider.h"
6
7#include <algorithm>
8
9#include "base/message_loop/message_loop.h"
10#include "base/path_service.h"
11#include "base/prefs/pref_service.h"
12#include "base/strings/string_util.h"
13#include "base/strings/utf_string_conversions.h"
14#include "base/time/time.h"
15#include "chrome/browser/autocomplete/autocomplete_match.h"
16#include "chrome/browser/autocomplete/autocomplete_provider.h"
17#include "chrome/browser/autocomplete/autocomplete_provider_listener.h"
18#include "chrome/browser/autocomplete/autocomplete_result.h"
19#include "chrome/browser/autocomplete/history_quick_provider.h"
20#include "chrome/browser/history/history_service.h"
21#include "chrome/browser/history/history_service_factory.h"
22#include "chrome/browser/search_engines/template_url.h"
23#include "chrome/browser/search_engines/template_url_service.h"
24#include "chrome/browser/search_engines/template_url_service_factory.h"
25#include "chrome/common/net/url_fixer_upper.h"
26#include "chrome/common/pref_names.h"
27#include "chrome/test/base/testing_browser_process.h"
28#include "chrome/test/base/testing_profile.h"
29#include "content/public/test/test_browser_thread_bundle.h"
30#include "testing/gtest/include/gtest/gtest.h"
31
32using base::ASCIIToUTF16;
33using base::Time;
34using base::TimeDelta;
35
36using content::TestBrowserThreadBundle;
37
38struct TestURLInfo {
39  const char* url;
40  const char* title;
41  int visit_count;
42  int typed_count;
43  int age_in_days;
44} test_db[] = {
45  {"http://www.google.com/", "Google", 3, 3, 80},
46
47  // High-quality pages should get a host synthesized as a lower-quality match.
48  {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100, 80},
49
50  // Less popular pages should have hosts synthesized as higher-quality
51  // matches.
52  {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0, 80},
53
54  // Unpopular pages should not appear in the results at all.
55  {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 0, 80},
56
57  // If a host has a match, we should pick it up during host synthesis.
58  {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 80},
59  {"http://news.google.com/", "Google News", 1, 1, 80},
60
61  // Matches that are normally not inline-autocompletable should be
62  // autocompleted if they are shorter substitutes for longer matches that would
63  // have been inline autocompleted.
64  {"http://synthesisatest.com/foo/", "Test A", 1, 1, 80},
65  {"http://synthesisbtest.com/foo/", "Test B", 1, 1, 80},
66  {"http://synthesisbtest.com/foo/bar.html", "Test B Bar", 2, 2, 80},
67
68  // Suggested short URLs must be "good enough" and must match user input.
69  {"http://foo.com/", "Dir", 5, 5, 80},
70  {"http://foo.com/dir/", "Dir", 2, 2, 80},
71  {"http://foo.com/dir/another/", "Dir", 5, 1, 80},
72  {"http://foo.com/dir/another/again/", "Dir", 10, 0, 80},
73  {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 80},
74
75  // We throw in a lot of extra URLs here to make sure we're testing the
76  // history database's query, not just the autocomplete provider.
77  {"http://startest.com/y/a", "A", 2, 2, 80},
78  {"http://startest.com/y/b", "B", 5, 2, 80},
79  {"http://startest.com/x/c", "C", 5, 2, 80},
80  {"http://startest.com/x/d", "D", 5, 5, 80},
81  {"http://startest.com/y/e", "E", 4, 2, 80},
82  {"http://startest.com/y/f", "F", 3, 2, 80},
83  {"http://startest.com/y/g", "G", 3, 2, 80},
84  {"http://startest.com/y/h", "H", 3, 2, 80},
85  {"http://startest.com/y/i", "I", 3, 2, 80},
86  {"http://startest.com/y/j", "J", 3, 2, 80},
87  {"http://startest.com/y/k", "K", 3, 2, 80},
88  {"http://startest.com/y/l", "L", 3, 2, 80},
89  {"http://startest.com/y/m", "M", 3, 2, 80},
90
91  // A file: URL is useful for testing that fixup does the right thing w.r.t.
92  // the number of trailing slashes on the user's input.
93  {"file:///C:/foo.txt", "", 2, 2, 80},
94
95  // Results with absurdly high typed_counts so that very generic queries like
96  // "http" will give consistent results even if more data is added above.
97  {"http://bogussite.com/a", "Bogus A", 10002, 10000, 80},
98  {"http://bogussite.com/b", "Bogus B", 10001, 10000, 80},
99  {"http://bogussite.com/c", "Bogus C", 10000, 10000, 80},
100
101  // Domain name with number.
102  {"http://www.17173.com/", "Domain with number", 3, 3, 80},
103
104  // URLs to test exact-matching behavior.
105  {"http://go/", "Intranet URL", 1, 1, 80},
106  {"http://gooey/", "Intranet URL 2", 5, 5, 80},
107
108  // URLs for testing offset adjustment.
109  {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2, 80},
110  {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 80},
111  {"http://ms/c++%20style%20guide", "Style guide", 2, 2, 80},
112
113  // URLs for testing ctrl-enter behavior.
114  {"http://binky/", "Intranet binky", 2, 2, 80},
115  {"http://winky/", "Intranet winky", 2, 2, 80},
116  {"http://www.winky.com/", "Internet winky", 5, 0, 80},
117
118  // URLs used by EmptyVisits.
119  {"http://pandora.com/", "Pandora", 2, 2, 80},
120  // This entry is explicitly added more recently than
121  // history::kLowQualityMatchAgeLimitInDays.
122  // {"http://p/", "p", 0, 0, 80},
123
124  // For intranet based tests.
125  {"http://intra/one", "Intranet", 2, 2, 80},
126  {"http://intra/two", "Intranet two", 1, 1, 80},
127  {"http://intra/three", "Intranet three", 2, 2, 80},
128  {"http://moo/bar", "Intranet moo", 1, 1, 80},
129  {"http://typedhost/typedpath", "Intranet typed", 1, 1, 80},
130  {"http://typedhost/untypedpath", "Intranet untyped", 1, 0, 80},
131
132  {"http://x.com/one", "Internet", 2, 2, 80},
133  {"http://x.com/two", "Internet two", 1, 1, 80},
134  {"http://x.com/three", "Internet three", 2, 2, 80},
135
136  // For experimental HUP scoring test.
137  {"http://7.com/1a", "One", 8, 4, 4},
138  {"http://7.com/2a", "Two A", 4, 2, 8},
139  {"http://7.com/2b", "Two B", 4, 1, 8},
140  {"http://7.com/3a", "Three", 2, 1, 16},
141  {"http://7.com/4a", "Four A", 1, 1, 32},
142  {"http://7.com/4b", "Four B", 1, 1, 64},
143  {"http://7.com/5a", "Five A", 8, 0, 64},  // never typed.
144};
145
146class HistoryURLProviderTest : public testing::Test,
147                               public AutocompleteProviderListener {
148 public:
149  struct UrlAndLegalDefault {
150    std::string url;
151    bool allowed_to_be_default_match;
152  };
153
154  HistoryURLProviderTest()
155      : sort_matches_(false) {
156    HistoryQuickProvider::set_disabled(true);
157  }
158
159  virtual ~HistoryURLProviderTest() {
160    HistoryQuickProvider::set_disabled(false);
161  }
162
163  // AutocompleteProviderListener:
164  virtual void OnProviderUpdate(bool updated_matches) OVERRIDE;
165
166 protected:
167  static KeyedService* CreateTemplateURLService(
168      content::BrowserContext* profile) {
169    return new TemplateURLService(static_cast<Profile*>(profile));
170  }
171
172  // testing::Test
173  virtual void SetUp() {
174    ASSERT_TRUE(SetUpImpl(false));
175  }
176  virtual void TearDown();
177
178  // Does the real setup.
179  bool SetUpImpl(bool no_db) WARN_UNUSED_RESULT;
180
181  // Fills test data into the history system.
182  void FillData();
183
184  // Runs an autocomplete query on |text| and checks to see that the returned
185  // results' destination URLs match those provided.  Also allows checking
186  // that the input type was identified correctly.
187  void RunTest(const base::string16 text,
188               const base::string16& desired_tld,
189               bool prevent_inline_autocomplete,
190               const UrlAndLegalDefault* expected_urls,
191               size_t num_results,
192               AutocompleteInput::Type* identified_input_type);
193
194  // A version of the above without the final |type| output parameter.
195  void RunTest(const base::string16 text,
196               const base::string16& desired_tld,
197               bool prevent_inline_autocomplete,
198               const UrlAndLegalDefault* expected_urls,
199               size_t num_results) {
200    AutocompleteInput::Type type;
201    return RunTest(text, desired_tld, prevent_inline_autocomplete,
202                   expected_urls, num_results, &type);
203  }
204
205  content::TestBrowserThreadBundle thread_bundle_;
206  ACMatches matches_;
207  scoped_ptr<TestingProfile> profile_;
208  HistoryService* history_service_;
209  scoped_refptr<HistoryURLProvider> autocomplete_;
210  // Should the matches be sorted and duplicates removed?
211  bool sort_matches_;
212};
213
214class HistoryURLProviderTestNoDB : public HistoryURLProviderTest {
215 protected:
216  virtual void SetUp() {
217    ASSERT_TRUE(SetUpImpl(true));
218  }
219};
220
221void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) {
222  if (autocomplete_->done())
223    base::MessageLoop::current()->Quit();
224}
225
226bool HistoryURLProviderTest::SetUpImpl(bool no_db) {
227  profile_.reset(new TestingProfile());
228  if (!(profile_->CreateHistoryService(true, no_db)))
229    return false;
230  if (!no_db) {
231    profile_->BlockUntilHistoryProcessesPendingRequests();
232    profile_->BlockUntilHistoryIndexIsRefreshed();
233  }
234  profile_->GetPrefs()->SetString(prefs::kAcceptLanguages, "en-US,en,ko");
235  history_service_ = HistoryServiceFactory::GetForProfile(
236      profile_.get(), Profile::EXPLICIT_ACCESS);
237
238  autocomplete_ = new HistoryURLProvider(this, profile_.get());
239  TemplateURLServiceFactory::GetInstance()->SetTestingFactoryAndUse(
240      profile_.get(), &HistoryURLProviderTest::CreateTemplateURLService);
241  FillData();
242  return true;
243}
244
245void HistoryURLProviderTest::TearDown() {
246  autocomplete_ = NULL;
247}
248
249void HistoryURLProviderTest::FillData() {
250  // Most visits are a long time ago (some tests require this since we do some
251  // special logic for things visited very recently). Note that this time must
252  // be more recent than the "archived history" threshold for the data to go
253  // into the main database.
254  //
255  // TODO(brettw) It would be nice if we could test this behavior, in which
256  // case the time would be specifed in the test_db structure.
257  const Time now = Time::Now();
258
259  for (size_t i = 0; i < arraysize(test_db); ++i) {
260    const TestURLInfo& cur = test_db[i];
261    const GURL current_url(cur.url);
262    history_service_->AddPageWithDetails(
263        current_url, base::UTF8ToUTF16(cur.title), cur.visit_count,
264        cur.typed_count, now - TimeDelta::FromDays(cur.age_in_days), false,
265        history::SOURCE_BROWSED);
266  }
267
268  history_service_->AddPageWithDetails(
269      GURL("http://p/"), base::UTF8ToUTF16("p"), 0, 0,
270      Time::Now() -
271      TimeDelta::FromDays(history::kLowQualityMatchAgeLimitInDays - 1),
272      false, history::SOURCE_BROWSED);
273}
274
275void HistoryURLProviderTest::RunTest(
276    const base::string16 text,
277    const base::string16& desired_tld,
278    bool prevent_inline_autocomplete,
279    const UrlAndLegalDefault* expected_urls,
280    size_t num_results,
281    AutocompleteInput::Type* identified_input_type) {
282  AutocompleteInput input(text, base::string16::npos, desired_tld, GURL(),
283                          AutocompleteInput::INVALID_SPEC,
284                          prevent_inline_autocomplete, false, true, true);
285  *identified_input_type = input.type();
286  autocomplete_->Start(input, false);
287  if (!autocomplete_->done())
288    base::MessageLoop::current()->Run();
289
290  matches_ = autocomplete_->matches();
291  if (sort_matches_) {
292    for (ACMatches::iterator i = matches_.begin(); i != matches_.end(); ++i)
293      i->ComputeStrippedDestinationURL(profile_.get());
294    AutocompleteResult::DedupMatchesByDestination(
295        input.current_page_classification(), false, &matches_);
296    std::sort(matches_.begin(), matches_.end(),
297              &AutocompleteMatch::MoreRelevant);
298  }
299  ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text
300                                          << "\nTLD: \"" << desired_tld << "\"";
301  for (size_t i = 0; i < num_results; ++i) {
302    EXPECT_EQ(expected_urls[i].url, matches_[i].destination_url.spec());
303    EXPECT_EQ(expected_urls[i].allowed_to_be_default_match,
304              matches_[i].allowed_to_be_default_match);
305  }
306}
307
308TEST_F(HistoryURLProviderTest, PromoteShorterURLs) {
309  // Test that hosts get synthesized below popular pages.
310  const UrlAndLegalDefault expected_nonsynth[] = {
311    { "http://slashdot.org/favorite_page.html", false },
312    { "http://slashdot.org/", false }
313  };
314  RunTest(ASCIIToUTF16("slash"), base::string16(), true, expected_nonsynth,
315          arraysize(expected_nonsynth));
316
317  // Test that hosts get synthesized above less popular pages.
318  const UrlAndLegalDefault expected_synth[] = {
319    { "http://kerneltrap.org/", false },
320    { "http://kerneltrap.org/not_very_popular.html", false }
321  };
322  RunTest(ASCIIToUTF16("kernel"), base::string16(), true, expected_synth,
323          arraysize(expected_synth));
324
325  // Test that unpopular pages are ignored completely.
326  RunTest(ASCIIToUTF16("fresh"), base::string16(), true, NULL, 0);
327
328  // Test that if we create or promote shorter suggestions that would not
329  // normally be inline autocompletable, we make them inline autocompletable if
330  // the original suggestion (that we replaced as "top") was inline
331  // autocompletable.
332  const UrlAndLegalDefault expected_synthesisa[] = {
333    { "http://synthesisatest.com/", true },
334    { "http://synthesisatest.com/foo/", true }
335  };
336  RunTest(ASCIIToUTF16("synthesisa"), base::string16(), false,
337          expected_synthesisa, arraysize(expected_synthesisa));
338  EXPECT_LT(matches_.front().relevance, 1200);
339  const UrlAndLegalDefault expected_synthesisb[] = {
340    { "http://synthesisbtest.com/foo/", true },
341    { "http://synthesisbtest.com/foo/bar.html", true }
342  };
343  RunTest(ASCIIToUTF16("synthesisb"), base::string16(), false,
344          expected_synthesisb, arraysize(expected_synthesisb));
345  EXPECT_GE(matches_.front().relevance, 1410);
346
347  // Test that if we have a synthesized host that matches a suggestion, they
348  // get combined into one.
349  const UrlAndLegalDefault expected_combine[] = {
350    { "http://news.google.com/", false },
351    { "http://news.google.com/?ned=us&topic=n", false },
352  };
353  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), base::string16(), true,
354      expected_combine, arraysize(expected_combine)));
355  // The title should also have gotten set properly on the host for the
356  // synthesized one, since it was also in the results.
357  EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description);
358
359  // Test that short URL matching works correctly as the user types more
360  // (several tests):
361  // The entry for foo.com is the best of all five foo.com* entries.
362  const UrlAndLegalDefault short_1[] = {
363    { "http://foo.com/", false },
364    { "http://foo.com/dir/another/again/myfile.html", false },
365    { "http://foo.com/dir/", false }
366  };
367  RunTest(ASCIIToUTF16("foo"), base::string16(), true,
368          short_1, arraysize(short_1));
369
370  // When the user types the whole host, make sure we don't get two results for
371  // it.
372  const UrlAndLegalDefault short_2[] = {
373    { "http://foo.com/", true },
374    { "http://foo.com/dir/another/again/myfile.html", false },
375    { "http://foo.com/dir/", false },
376    { "http://foo.com/dir/another/", false }
377  };
378  RunTest(ASCIIToUTF16("foo.com"), base::string16(), true, short_2,
379          arraysize(short_2));
380  RunTest(ASCIIToUTF16("foo.com/"), base::string16(), true, short_2,
381          arraysize(short_2));
382
383  // The filename is the second best of the foo.com* entries, but there is a
384  // shorter URL that's "good enough".  The host doesn't match the user input
385  // and so should not appear.
386  const UrlAndLegalDefault short_3[] = {
387    { "http://foo.com/d", true },
388    { "http://foo.com/dir/another/", false },
389    { "http://foo.com/dir/another/again/myfile.html", false },
390    { "http://foo.com/dir/", false }
391  };
392  RunTest(ASCIIToUTF16("foo.com/d"), base::string16(), true, short_3,
393          arraysize(short_3));
394
395  // We shouldn't promote shorter URLs than the best if they're not good
396  // enough.
397  const UrlAndLegalDefault short_4[] = {
398    { "http://foo.com/dir/another/a", true },
399    { "http://foo.com/dir/another/again/myfile.html", false },
400    { "http://foo.com/dir/another/again/", false }
401  };
402  RunTest(ASCIIToUTF16("foo.com/dir/another/a"), base::string16(), true,
403          short_4, arraysize(short_4));
404
405  // Exact matches should always be best no matter how much more another match
406  // has been typed.
407  const UrlAndLegalDefault short_5a[] = {
408    { "http://gooey/", true },
409    { "http://www.google.com/", true },
410    { "http://go/", true }
411  };
412  const UrlAndLegalDefault short_5b[] = {
413    { "http://go/", true },
414    { "http://gooey/", true },
415    { "http://www.google.com/", true }
416  };
417  RunTest(ASCIIToUTF16("g"), base::string16(), false,
418          short_5a, arraysize(short_5a));
419  RunTest(ASCIIToUTF16("go"), base::string16(), false,
420          short_5b, arraysize(short_5b));
421}
422
423TEST_F(HistoryURLProviderTest, CullRedirects) {
424  // URLs we will be using, plus the visit counts they will initially get
425  // (the redirect set below will also increment the visit counts). We want
426  // the results to be in A,B,C order. Note also that our visit counts are
427  // all high enough so that domain synthesizing won't get triggered.
428  struct TestCase {
429    const char* url;
430    int count;
431  } test_cases[] = {
432    {"http://redirects/A", 30},
433    {"http://redirects/B", 20},
434    {"http://redirects/C", 10}
435  };
436  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
437    history_service_->AddPageWithDetails(GURL(test_cases[i].url),
438        ASCIIToUTF16("Title"), test_cases[i].count, test_cases[i].count,
439        Time::Now(), false, history::SOURCE_BROWSED);
440  }
441
442  // Create a B->C->A redirect chain, but set the visit counts such that they
443  // will appear in A,B,C order in the results. The autocomplete query will
444  // search for the most recent visit when looking for redirects, so this will
445  // be found even though the previous visits had no redirects.
446  history::RedirectList redirects_to_a;
447  redirects_to_a.push_back(GURL(test_cases[1].url));
448  redirects_to_a.push_back(GURL(test_cases[2].url));
449  redirects_to_a.push_back(GURL(test_cases[0].url));
450  history_service_->AddPage(GURL(test_cases[0].url), base::Time::Now(),
451      NULL, 0, GURL(), redirects_to_a, content::PAGE_TRANSITION_TYPED,
452      history::SOURCE_BROWSED, true);
453
454  // Because all the results are part of a redirect chain with other results,
455  // all but the first one (A) should be culled. We should get the default
456  // "what you typed" result, plus this one.
457  const base::string16 typing(ASCIIToUTF16("http://redirects/"));
458  const UrlAndLegalDefault expected_results[] = {
459    { base::UTF16ToUTF8(typing), true },
460    { test_cases[0].url, false }
461  };
462  RunTest(typing, base::string16(), true, expected_results,
463          arraysize(expected_results));
464}
465
466TEST_F(HistoryURLProviderTest, WhatYouTyped) {
467  // Make sure we suggest a What You Typed match at the right times.
468  RunTest(ASCIIToUTF16("wytmatch"), base::string16(), false, NULL, 0);
469  RunTest(ASCIIToUTF16("wytmatch foo bar"), base::string16(), false, NULL, 0);
470  RunTest(ASCIIToUTF16("wytmatch+foo+bar"), base::string16(), false, NULL, 0);
471  RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), base::string16(), false,
472          NULL, 0);
473
474  const UrlAndLegalDefault results_1[] = {
475    { "http://www.wytmatch.com/", true }
476  };
477  RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1,
478          arraysize(results_1));
479
480  const UrlAndLegalDefault results_2[] = {
481    { "http://wytmatch%20foo%20bar/", true }
482  };
483  RunTest(ASCIIToUTF16("http://wytmatch foo bar"), base::string16(), false,
484          results_2, arraysize(results_2));
485
486  const UrlAndLegalDefault results_3[] = {
487    { "https://wytmatch%20foo%20bar/", true }
488  };
489  RunTest(ASCIIToUTF16("https://wytmatch foo bar"), base::string16(), false,
490          results_3, arraysize(results_3));
491}
492
493TEST_F(HistoryURLProviderTest, Fixup) {
494  // Test for various past crashes we've had.
495  RunTest(ASCIIToUTF16("\\"), base::string16(), false, NULL, 0);
496  RunTest(ASCIIToUTF16("#"), base::string16(), false, NULL, 0);
497  RunTest(ASCIIToUTF16("%20"), base::string16(), false, NULL, 0);
498  const UrlAndLegalDefault fixup_crash[] = {
499    { "http://%EF%BD%A5@s/", true }
500  };
501  RunTest(base::WideToUTF16(L"\uff65@s"), base::string16(), false, fixup_crash,
502          arraysize(fixup_crash));
503  RunTest(base::WideToUTF16(L"\u2015\u2015@ \uff7c"), base::string16(), false,
504          NULL, 0);
505
506  // Fixing up "file:" should result in an inline autocomplete offset of just
507  // after "file:", not just after "file://".
508  const base::string16 input_1(ASCIIToUTF16("file:"));
509  const UrlAndLegalDefault fixup_1[] = {
510    { "file:///C:/foo.txt", true }
511  };
512  ASSERT_NO_FATAL_FAILURE(RunTest(input_1, base::string16(), false, fixup_1,
513                                  arraysize(fixup_1)));
514  EXPECT_EQ(ASCIIToUTF16("///C:/foo.txt"),
515            matches_.front().inline_autocompletion);
516
517  // Fixing up "http:/" should result in an inline autocomplete offset of just
518  // after "http:/", not just after "http:".
519  const base::string16 input_2(ASCIIToUTF16("http:/"));
520  const UrlAndLegalDefault fixup_2[] = {
521    { "http://bogussite.com/a", true },
522    { "http://bogussite.com/b", true },
523    { "http://bogussite.com/c", true }
524  };
525  ASSERT_NO_FATAL_FAILURE(RunTest(input_2, base::string16(), false, fixup_2,
526                                  arraysize(fixup_2)));
527  EXPECT_EQ(ASCIIToUTF16("/bogussite.com/a"),
528            matches_.front().inline_autocompletion);
529
530  // Adding a TLD to a small number like "56" should result in "www.56.com"
531  // rather than "0.0.0.56.com".
532  const UrlAndLegalDefault fixup_3[] = {
533    { "http://www.56.com/", true }
534  };
535  RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3,
536          arraysize(fixup_3));
537
538  // An input looks like a IP address like "127.0.0.1" should result in
539  // "http://127.0.0.1/".
540  const UrlAndLegalDefault fixup_4[] = {
541    { "http://127.0.0.1/", true }
542  };
543  RunTest(ASCIIToUTF16("127.0.0.1"), base::string16(), false, fixup_4,
544          arraysize(fixup_4));
545
546  // An number "17173" should result in "http://www.17173.com/" in db.
547  const UrlAndLegalDefault fixup_5[] = {
548    { "http://www.17173.com/", true }
549  };
550  RunTest(ASCIIToUTF16("17173"), base::string16(), false, fixup_5,
551          arraysize(fixup_5));
552}
553
554// Make sure the results for the input 'p' don't change between the first and
555// second passes.
556TEST_F(HistoryURLProviderTest, EmptyVisits) {
557  // Wait for history to create the in memory DB.
558  profile_->BlockUntilHistoryProcessesPendingRequests();
559
560  AutocompleteInput input(ASCIIToUTF16("p"), base::string16::npos,
561                          base::string16(), GURL(),
562                          AutocompleteInput::INVALID_SPEC, false, false, true,
563                          true);
564  autocomplete_->Start(input, false);
565  // HistoryURLProvider shouldn't be done (waiting on async results).
566  EXPECT_FALSE(autocomplete_->done());
567
568  // We should get back an entry for pandora.
569  matches_ = autocomplete_->matches();
570  ASSERT_GT(matches_.size(), 0u);
571  EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
572  int pandora_relevance = matches_[0].relevance;
573
574  // Run the message loop. When |autocomplete_| finishes the loop is quit.
575  base::MessageLoop::current()->Run();
576  EXPECT_TRUE(autocomplete_->done());
577  matches_ = autocomplete_->matches();
578  ASSERT_GT(matches_.size(), 0u);
579  EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url);
580  EXPECT_EQ(pandora_relevance, matches_[0].relevance);
581}
582
583TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) {
584  // Ensure that we will still produce matches for navigation when there is no
585  // database.
586  UrlAndLegalDefault navigation_1[] = {
587    { "http://test.com/", true }
588  };
589  RunTest(ASCIIToUTF16("test.com"), base::string16(), false, navigation_1,
590          arraysize(navigation_1));
591
592  UrlAndLegalDefault navigation_2[] = {
593    { "http://slash/", true }
594  };
595  RunTest(ASCIIToUTF16("slash"), base::string16(), false, navigation_2,
596          arraysize(navigation_2));
597
598  RunTest(ASCIIToUTF16("this is a query"), base::string16(), false, NULL, 0);
599}
600
601TEST_F(HistoryURLProviderTest, DontAutocompleteOnTrailingWhitespace) {
602  AutocompleteInput input(ASCIIToUTF16("slash "), base::string16::npos,
603                          base::string16(), GURL(),
604                          AutocompleteInput::INVALID_SPEC, false, false,
605                          true, true);
606  autocomplete_->Start(input, false);
607  if (!autocomplete_->done())
608    base::MessageLoop::current()->Run();
609
610  // None of the matches should attempt to autocomplete.
611  matches_ = autocomplete_->matches();
612  for (size_t i = 0; i < matches_.size(); ++i) {
613    EXPECT_TRUE(matches_[i].inline_autocompletion.empty());
614    EXPECT_FALSE(matches_[i].allowed_to_be_default_match);
615  }
616}
617
618TEST_F(HistoryURLProviderTest, TreatEmailsAsSearches) {
619  // Visiting foo.com should not make this string be treated as a navigation.
620  // That means the result should be scored around 1200 ("what you typed")
621  // and not 1400+.
622  const UrlAndLegalDefault expected[] = {
623    { "http://user@foo.com/", true }
624  };
625  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("user@foo.com"),
626                                  base::string16(), false, expected,
627                                  arraysize(expected)));
628  EXPECT_LE(1200, matches_[0].relevance);
629  EXPECT_LT(matches_[0].relevance, 1210);
630}
631
632TEST_F(HistoryURLProviderTest, IntranetURLsWithPaths) {
633  struct TestCase {
634    const char* input;
635    int relevance;
636  } test_cases[] = {
637    { "fooey", 0 },
638    { "fooey/", 1200 },     // 1200 for URL would still navigate by default.
639    { "fooey/a", 1200 },    // 1200 for UNKNOWN would not.
640    { "fooey/a b", 1200 },  // Also UNKNOWN.
641    { "gooey", 1410 },
642    { "gooey/", 1410 },
643    { "gooey/a", 1400 },
644    { "gooey/a b", 1400 },
645  };
646  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
647    SCOPED_TRACE(test_cases[i].input);
648    if (test_cases[i].relevance == 0) {
649      RunTest(ASCIIToUTF16(test_cases[i].input), base::string16(), false,
650              NULL, 0);
651    } else {
652      const UrlAndLegalDefault output[] = {
653        { URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec(),
654          true }
655      };
656      ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16(test_cases[i].input),
657                              base::string16(), false,
658                              output, arraysize(output)));
659      // Actual relevance should be at least what test_cases expects and
660      // and no more than 10 more.
661      EXPECT_LE(test_cases[i].relevance, matches_[0].relevance);
662      EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10);
663    }
664  }
665}
666
667TEST_F(HistoryURLProviderTest, IntranetURLsWithRefs) {
668  struct TestCase {
669    const char* input;
670    int relevance;
671    AutocompleteInput::Type type;
672  } test_cases[] = {
673    { "gooey", 1410, AutocompleteInput::UNKNOWN },
674    { "gooey/", 1410, AutocompleteInput::URL },
675    { "gooey#", 1200, AutocompleteInput::UNKNOWN },
676    { "gooey/#", 1200, AutocompleteInput::URL },
677    { "gooey#foo", 1200, AutocompleteInput::UNKNOWN },
678    { "gooey/#foo", 1200, AutocompleteInput::URL },
679    { "gooey# foo", 1200, AutocompleteInput::UNKNOWN },
680    { "gooey/# foo", 1200, AutocompleteInput::URL },
681  };
682  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
683    SCOPED_TRACE(test_cases[i].input);
684    const UrlAndLegalDefault output[] = {
685      { URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec(),
686        true }
687    };
688    AutocompleteInput::Type type;
689    ASSERT_NO_FATAL_FAILURE(
690        RunTest(ASCIIToUTF16(test_cases[i].input),
691                base::string16(), false, output, arraysize(output), &type));
692    // Actual relevance should be at least what test_cases expects and
693    // and no more than 10 more.
694    EXPECT_LE(test_cases[i].relevance, matches_[0].relevance);
695    EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10);
696    // Input type should be what we expect.  This is important because
697    // this provider counts on SearchProvider to give queries a relevance
698    // score >1200 for UNKNOWN inputs and <1200 for URL inputs.  (That's
699    // already tested in search_provider_unittest.cc.)  For this test
700    // here to test that the user sees the correct behavior, it needs
701    // to check that the input type was identified correctly.
702    EXPECT_EQ(test_cases[i].type, type);
703  }
704}
705
706// Makes sure autocompletion happens for intranet sites that have been
707// previoulsy visited.
708TEST_F(HistoryURLProviderTest, IntranetURLCompletion) {
709  sort_matches_ = true;
710
711  const UrlAndLegalDefault expected1[] = {
712    { "http://intra/three", true },
713    { "http://intra/two", true }
714  };
715  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/t"), base::string16(),
716                                  false, expected1, arraysize(expected1)));
717  EXPECT_LE(1410, matches_[0].relevance);
718  EXPECT_LT(matches_[0].relevance, 1420);
719  EXPECT_EQ(matches_[0].relevance - 1, matches_[1].relevance);
720
721  const UrlAndLegalDefault expected2[] = {
722    { "http://moo/b", true },
723    { "http://moo/bar", true }
724  };
725  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("moo/b"), base::string16(),
726                                  false, expected2, arraysize(expected2)));
727  // The url what you typed match should be around 1400, otherwise the
728  // search what you typed match is going to be first.
729  EXPECT_LE(1400, matches_[0].relevance);
730  EXPECT_LT(matches_[0].relevance, 1410);
731
732  const UrlAndLegalDefault expected3[] = {
733    { "http://intra/one", true },
734    { "http://intra/three", true },
735    { "http://intra/two", true }
736  };
737  RunTest(ASCIIToUTF16("intra"), base::string16(), false, expected3,
738          arraysize(expected3));
739
740  const UrlAndLegalDefault expected4[] = {
741    { "http://intra/one", true },
742    { "http://intra/three", true },
743    { "http://intra/two", true }
744  };
745  RunTest(ASCIIToUTF16("intra/"), base::string16(), false, expected4,
746          arraysize(expected4));
747
748  const UrlAndLegalDefault expected5[] = {
749    { "http://intra/one", true }
750  };
751  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/o"), base::string16(),
752                                  false, expected5, arraysize(expected5)));
753  EXPECT_LE(1410, matches_[0].relevance);
754  EXPECT_LT(matches_[0].relevance, 1420);
755
756  const UrlAndLegalDefault expected6[] = {
757    { "http://intra/x", true }
758  };
759  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/x"), base::string16(),
760                                  false, expected6, arraysize(expected6)));
761  EXPECT_LE(1400, matches_[0].relevance);
762  EXPECT_LT(matches_[0].relevance, 1410);
763
764  const UrlAndLegalDefault expected7[] = {
765    { "http://typedhost/untypedpath", true }
766  };
767  ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("typedhost/untypedpath"),
768      base::string16(), false, expected7, arraysize(expected7)));
769  EXPECT_LE(1400, matches_[0].relevance);
770  EXPECT_LT(matches_[0].relevance, 1410);
771}
772
773TEST_F(HistoryURLProviderTest, CrashDueToFixup) {
774  // This test passes if we don't crash.  The results don't matter.
775  const char* const test_cases[] = {
776    "//c",
777    "\\@st",
778    "view-source:x",
779  };
780  for (size_t i = 0; i < arraysize(test_cases); ++i) {
781    AutocompleteInput input(ASCIIToUTF16(test_cases[i]), base::string16::npos,
782                            base::string16(), GURL(),
783                            AutocompleteInput::INVALID_SPEC,
784                            false, false, true, true);
785    autocomplete_->Start(input, false);
786    if (!autocomplete_->done())
787      base::MessageLoop::current()->Run();
788  }
789}
790
791TEST_F(HistoryURLProviderTest, CullSearchResults) {
792  // Set up a default search engine.
793  TemplateURLData data;
794  data.SetKeyword(ASCIIToUTF16("TestEngine"));
795  data.SetURL("http://testsearch.com/?q={searchTerms}");
796  TemplateURLService* template_url_service =
797      TemplateURLServiceFactory::GetForProfile(profile_.get());
798  TemplateURL* template_url = new TemplateURL(profile_.get(), data);
799  template_url_service->Add(template_url);
800  template_url_service->SetDefaultSearchProvider(template_url);
801  template_url_service->Load();
802
803  // URLs we will be using, plus the visit counts they will initially get
804  // (the redirect set below will also increment the visit counts). We want
805  // the results to be in A,B,C order. Note also that our visit counts are
806  // all high enough so that domain synthesizing won't get triggered.
807  struct TestCase {
808    const char* url;
809    int count;
810  } test_cases[] = {
811    {"https://testsearch.com/", 30},
812    {"https://testsearch.com/?q=foobar", 20},
813    {"http://foobar.com/", 10}
814  };
815  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
816    history_service_->AddPageWithDetails(GURL(test_cases[i].url),
817        base::UTF8ToUTF16("Title"), test_cases[i].count, test_cases[i].count,
818        Time::Now(), false, history::SOURCE_BROWSED);
819  }
820
821  // We should not see search URLs when typing a previously used query.
822  const UrlAndLegalDefault expected_when_searching_query[] = {
823    { test_cases[2].url, false }
824  };
825  RunTest(ASCIIToUTF16("foobar"), base::string16(), true,
826      expected_when_searching_query, arraysize(expected_when_searching_query));
827
828  // We should not see search URLs when typing the search engine name.
829  const UrlAndLegalDefault expected_when_searching_site[] = {
830    { test_cases[0].url, false }
831  };
832  RunTest(ASCIIToUTF16("testsearch"), base::string16(), true,
833      expected_when_searching_site, arraysize(expected_when_searching_site));
834}
835
836TEST_F(HistoryURLProviderTest, SuggestExactInput) {
837  const size_t npos = std::string::npos;
838  struct TestCase {
839    // Inputs:
840    const char* input;
841    bool trim_http;
842    // Expected Outputs:
843    const char* contents;
844    // Offsets of the ACMatchClassifications, terminated by npos.
845    size_t offsets[3];
846    // The index of the ACMatchClassification that should have the MATCH bit
847    // set, npos if no ACMatchClassification should have the MATCH bit set.
848    size_t match_classification_index;
849  } test_cases[] = {
850    { "http://www.somesite.com", false,
851      "http://www.somesite.com", {0, npos, npos}, 0 },
852    { "www.somesite.com", true,
853      "www.somesite.com", {0, npos, npos}, 0 },
854    { "www.somesite.com", false,
855      "http://www.somesite.com", {0, 7, npos}, 1 },
856    { "somesite.com", true,
857      "somesite.com", {0, npos, npos}, 0 },
858    { "somesite.com", false,
859      "http://somesite.com", {0, 7, npos}, 1 },
860    { "w", true,
861      "w", {0, npos, npos}, 0 },
862    { "w", false,
863      "http://w", {0, 7, npos}, 1 },
864    { "w.com", true,
865      "w.com", {0, npos, npos}, 0 },
866    { "w.com", false,
867      "http://w.com", {0, 7, npos}, 1 },
868    { "www.w.com", true,
869      "www.w.com", {0, npos, npos}, 0 },
870    { "www.w.com", false,
871      "http://www.w.com", {0, 7, npos}, 1 },
872    { "view-source:w", true,
873      "view-source:w", {0, npos, npos}, 0 },
874    { "view-source:www.w.com/", true,
875      "view-source:www.w.com", {0, npos, npos}, npos },
876    { "view-source:www.w.com/", false,
877      "view-source:http://www.w.com", {0, npos, npos}, npos },
878    { "view-source:http://www.w.com/", false,
879      "view-source:http://www.w.com", {0, npos, npos}, 0 },
880    { "   view-source:", true,
881      "view-source:", {0, npos, npos}, 0 },
882    { "http:////////w.com", false,
883      "http://w.com", {0, npos, npos}, npos },
884    { "    http:////////www.w.com", false,
885      "http://www.w.com", {0, npos, npos}, npos },
886    { "http:a///www.w.com", false,
887      "http://a///www.w.com", {0, npos, npos}, npos },
888    { "mailto://a@b.com", true,
889      "mailto://a@b.com", {0, npos, npos}, 0 },
890    { "mailto://a@b.com", false,
891      "mailto://a@b.com", {0, npos, npos}, 0 },
892  };
893  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
894    SCOPED_TRACE(testing::Message() << "Index " << i << " input: "
895                                    << test_cases[i].input << ", trim_http: "
896                                    << test_cases[i].trim_http);
897
898    AutocompleteInput input(ASCIIToUTF16(test_cases[i].input),
899                            base::string16::npos, base::string16(),
900                            GURL("about:blank"),
901                            AutocompleteInput::INVALID_SPEC, false, false, true,
902                            true);
903    AutocompleteMatch match(autocomplete_->SuggestExactInput(
904        input.text(), input.canonicalized_url(), test_cases[i].trim_http));
905    EXPECT_EQ(ASCIIToUTF16(test_cases[i].contents), match.contents);
906    for (size_t match_index = 0; match_index < match.contents_class.size();
907         ++match_index) {
908      EXPECT_EQ(test_cases[i].offsets[match_index],
909                match.contents_class[match_index].offset);
910      EXPECT_EQ(ACMatchClassification::URL |
911                (match_index == test_cases[i].match_classification_index ?
912                 ACMatchClassification::MATCH : 0),
913                match.contents_class[match_index].style);
914    }
915    EXPECT_EQ(npos, test_cases[i].offsets[match.contents_class.size()]);
916  }
917}
918
919TEST_F(HistoryURLProviderTest, HUPScoringExperiment) {
920  HUPScoringParams max_2000_no_time_decay;
921  max_2000_no_time_decay.typed_count_buckets.buckets().push_back(
922      std::make_pair(0.0, 2000));
923  HUPScoringParams max_1250_no_time_decay;
924  max_1250_no_time_decay.typed_count_buckets.buckets().push_back(
925      std::make_pair(0.0, 1250));
926  HUPScoringParams max_1000_no_time_decay;
927  max_1000_no_time_decay.typed_count_buckets.buckets().push_back(
928      std::make_pair(0.0, 1000));
929
930  HUPScoringParams max_1100_with_time_decay_and_max_cap;
931  max_1100_with_time_decay_and_max_cap.typed_count_buckets.
932      set_relevance_cap(1400);
933  max_1100_with_time_decay_and_max_cap.typed_count_buckets.
934      set_half_life_days(16);
935  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
936      std::make_pair(0.5, 1100));
937  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
938      std::make_pair(0.24, 200));
939  max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back(
940      std::make_pair(0.0, 100));
941
942  HUPScoringParams max_1100_visit_typed_decays;
943  max_1100_visit_typed_decays.typed_count_buckets.set_half_life_days(16);
944  max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back(
945      std::make_pair(0.5, 1100));
946  max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back(
947      std::make_pair(0.0, 100));
948  max_1100_visit_typed_decays.visited_count_buckets.set_half_life_days(16);
949  max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back(
950      std::make_pair(0.5, 550));
951  max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back(
952      std::make_pair(0.0, 50));
953
954  const int kMaxMatches = 3;
955  struct TestCase {
956    const char* input;
957    HUPScoringParams scoring_params;
958    struct ExpectedMatch {
959      const char* url;
960      int control_relevance;
961      int experiment_relevance;
962    };
963    ExpectedMatch matches[kMaxMatches];
964  } test_cases[] = {
965    // Max score 2000 -> no demotion.
966    { "7.com/1", max_2000_no_time_decay,
967      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
968
969    // Limit score to 1250/1000 and make sure that the top match is unchanged.
970    { "7.com/1", max_1250_no_time_decay,
971      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
972    { "7.com/2", max_1250_no_time_decay,
973      {{"7.com/2a", 1413, 1413}, {"7.com/2b", 1412, 1250}, {NULL, 0, 0}} },
974    { "7.com/4", max_1000_no_time_decay,
975      {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 1000},
976       {"7.com/4b", 1201, 999}} },
977
978    // Max relevance cap is 1400 and half-life is 16 days.
979    { "7.com/1", max_1100_with_time_decay_and_max_cap,
980      {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} },
981    { "7.com/4", max_1100_with_time_decay_and_max_cap,
982      {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 200},
983       {"7.com/4b", 1201, 100}} },
984
985    // Max relevance cap is 1400 and half-life is 16 days for both visit/typed.
986    { "7.com/5", max_1100_visit_typed_decays,
987      {{"7.com/5", 1203, 1203}, {"7.com/5a", 1202, 50}, {NULL, 0, 0}} },
988  };
989  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) {
990    SCOPED_TRACE(test_cases[i].input);
991    UrlAndLegalDefault output[kMaxMatches];
992    int max_matches;
993    for (max_matches = 0; max_matches < kMaxMatches; ++max_matches) {
994      if (test_cases[i].matches[max_matches].url == NULL)
995        break;
996      output[max_matches].url = URLFixerUpper::FixupURL(
997          test_cases[i].matches[max_matches].url, std::string()).spec();
998      output[max_matches].allowed_to_be_default_match = true;
999    }
1000    autocomplete_->scoring_params_ = test_cases[i].scoring_params;
1001
1002    // Test the control (scoring disabled).
1003    autocomplete_->scoring_params_.experimental_scoring_enabled = false;
1004    ASSERT_NO_FATAL_FAILURE(
1005        RunTest(ASCIIToUTF16(test_cases[i].input),
1006                base::string16(), false, output, max_matches));
1007    for (int j = 0; j < max_matches; ++j) {
1008      EXPECT_EQ(test_cases[i].matches[j].control_relevance,
1009                matches_[j].relevance);
1010    }
1011
1012    // Test the experiment (scoring enabled).
1013    autocomplete_->scoring_params_.experimental_scoring_enabled = true;
1014    ASSERT_NO_FATAL_FAILURE(
1015        RunTest(ASCIIToUTF16(test_cases[i].input),
1016                base::string16(), false, output, max_matches));
1017    for (int j = 0; j < max_matches; ++j) {
1018      EXPECT_EQ(test_cases[i].matches[j].experiment_relevance,
1019                matches_[j].relevance);
1020    }
1021  }
1022}
1023