history_url_provider_unittest.cc revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_url_provider.h" 6 7#include <algorithm> 8 9#include "base/message_loop/message_loop.h" 10#include "base/path_service.h" 11#include "base/prefs/pref_service.h" 12#include "base/strings/string_util.h" 13#include "base/strings/utf_string_conversions.h" 14#include "base/time/time.h" 15#include "chrome/browser/autocomplete/autocomplete_match.h" 16#include "chrome/browser/autocomplete/autocomplete_provider.h" 17#include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 18#include "chrome/browser/autocomplete/history_quick_provider.h" 19#include "chrome/browser/history/history_service.h" 20#include "chrome/browser/history/history_service_factory.h" 21#include "chrome/browser/search_engines/template_url.h" 22#include "chrome/browser/search_engines/template_url_service.h" 23#include "chrome/browser/search_engines/template_url_service_factory.h" 24#include "chrome/common/net/url_fixer_upper.h" 25#include "chrome/common/pref_names.h" 26#include "chrome/test/base/testing_browser_process.h" 27#include "chrome/test/base/testing_profile.h" 28#include "content/public/test/test_browser_thread_bundle.h" 29#include "testing/gtest/include/gtest/gtest.h" 30 31using base::ASCIIToUTF16; 32using base::Time; 33using base::TimeDelta; 34 35using content::TestBrowserThreadBundle; 36 37struct TestURLInfo { 38 const char* url; 39 const char* title; 40 int visit_count; 41 int typed_count; 42 int age_in_days; 43} test_db[] = { 44 {"http://www.google.com/", "Google", 3, 3, 80}, 45 46 // High-quality pages should get a host synthesized as a lower-quality match. 47 {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100, 80}, 48 49 // Less popular pages should have hosts synthesized as higher-quality 50 // matches. 51 {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0, 80}, 52 53 // Unpopular pages should not appear in the results at all. 54 {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 0, 80}, 55 56 // If a host has a match, we should pick it up during host synthesis. 57 {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2, 80}, 58 {"http://news.google.com/", "Google News", 1, 1, 80}, 59 60 // Matches that are normally not inline-autocompletable should be 61 // autocompleted if they are shorter substitutes for longer matches that would 62 // have been inline autocompleted. 63 {"http://synthesisatest.com/foo/", "Test A", 1, 1, 80}, 64 {"http://synthesisbtest.com/foo/", "Test B", 1, 1, 80}, 65 {"http://synthesisbtest.com/foo/bar.html", "Test B Bar", 2, 2, 80}, 66 67 // Suggested short URLs must be "good enough" and must match user input. 68 {"http://foo.com/", "Dir", 5, 5, 80}, 69 {"http://foo.com/dir/", "Dir", 2, 2, 80}, 70 {"http://foo.com/dir/another/", "Dir", 5, 1, 80}, 71 {"http://foo.com/dir/another/again/", "Dir", 10, 0, 80}, 72 {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2, 80}, 73 74 // We throw in a lot of extra URLs here to make sure we're testing the 75 // history database's query, not just the autocomplete provider. 76 {"http://startest.com/y/a", "A", 2, 2, 80}, 77 {"http://startest.com/y/b", "B", 5, 2, 80}, 78 {"http://startest.com/x/c", "C", 5, 2, 80}, 79 {"http://startest.com/x/d", "D", 5, 5, 80}, 80 {"http://startest.com/y/e", "E", 4, 2, 80}, 81 {"http://startest.com/y/f", "F", 3, 2, 80}, 82 {"http://startest.com/y/g", "G", 3, 2, 80}, 83 {"http://startest.com/y/h", "H", 3, 2, 80}, 84 {"http://startest.com/y/i", "I", 3, 2, 80}, 85 {"http://startest.com/y/j", "J", 3, 2, 80}, 86 {"http://startest.com/y/k", "K", 3, 2, 80}, 87 {"http://startest.com/y/l", "L", 3, 2, 80}, 88 {"http://startest.com/y/m", "M", 3, 2, 80}, 89 90 // A file: URL is useful for testing that fixup does the right thing w.r.t. 91 // the number of trailing slashes on the user's input. 92 {"file:///C:/foo.txt", "", 2, 2, 80}, 93 94 // Results with absurdly high typed_counts so that very generic queries like 95 // "http" will give consistent results even if more data is added above. 96 {"http://bogussite.com/a", "Bogus A", 10002, 10000, 80}, 97 {"http://bogussite.com/b", "Bogus B", 10001, 10000, 80}, 98 {"http://bogussite.com/c", "Bogus C", 10000, 10000, 80}, 99 100 // Domain name with number. 101 {"http://www.17173.com/", "Domain with number", 3, 3, 80}, 102 103 // URLs to test exact-matching behavior. 104 {"http://go/", "Intranet URL", 1, 1, 80}, 105 {"http://gooey/", "Intranet URL 2", 5, 5, 80}, 106 107 // URLs for testing offset adjustment. 108 {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2, 80}, 109 {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2, 80}, 110 {"http://ms/c++%20style%20guide", "Style guide", 2, 2, 80}, 111 112 // URLs for testing ctrl-enter behavior. 113 {"http://binky/", "Intranet binky", 2, 2, 80}, 114 {"http://winky/", "Intranet winky", 2, 2, 80}, 115 {"http://www.winky.com/", "Internet winky", 5, 0, 80}, 116 117 // URLs used by EmptyVisits. 118 {"http://pandora.com/", "Pandora", 2, 2, 80}, 119 // This entry is explicitly added more recently than 120 // history::kLowQualityMatchAgeLimitInDays. 121 // {"http://p/", "p", 0, 0, 80}, 122 123 // For intranet based tests. 124 {"http://intra/one", "Intranet", 2, 2, 80}, 125 {"http://intra/two", "Intranet two", 1, 1, 80}, 126 {"http://intra/three", "Intranet three", 2, 2, 80}, 127 {"http://moo/bar", "Intranet moo", 1, 1, 80}, 128 {"http://typedhost/typedpath", "Intranet typed", 1, 1, 80}, 129 {"http://typedhost/untypedpath", "Intranet untyped", 1, 0, 80}, 130 131 {"http://x.com/one", "Internet", 2, 2, 80}, 132 {"http://x.com/two", "Internet two", 1, 1, 80}, 133 {"http://x.com/three", "Internet three", 2, 2, 80}, 134 135 // For experimental HUP scoring test. 136 {"http://7.com/1a", "One", 8, 4, 4}, 137 {"http://7.com/2a", "Two A", 4, 2, 8}, 138 {"http://7.com/2b", "Two B", 4, 1, 8}, 139 {"http://7.com/3a", "Three", 2, 1, 16}, 140 {"http://7.com/4a", "Four A", 1, 1, 32}, 141 {"http://7.com/4b", "Four B", 1, 1, 64}, 142 {"http://7.com/5a", "Five A", 8, 0, 64}, // never typed. 143}; 144 145class HistoryURLProviderTest : public testing::Test, 146 public AutocompleteProviderListener { 147 public: 148 struct UrlAndLegalDefault { 149 std::string url; 150 bool allowed_to_be_default_match; 151 }; 152 153 HistoryURLProviderTest() 154 : sort_matches_(false) { 155 HistoryQuickProvider::set_disabled(true); 156 } 157 158 virtual ~HistoryURLProviderTest() { 159 HistoryQuickProvider::set_disabled(false); 160 } 161 162 // AutocompleteProviderListener: 163 virtual void OnProviderUpdate(bool updated_matches) OVERRIDE; 164 165 protected: 166 static BrowserContextKeyedService* CreateTemplateURLService( 167 content::BrowserContext* profile) { 168 return new TemplateURLService(static_cast<Profile*>(profile)); 169 } 170 171 // testing::Test 172 virtual void SetUp() { 173 ASSERT_TRUE(SetUpImpl(false)); 174 } 175 virtual void TearDown(); 176 177 // Does the real setup. 178 bool SetUpImpl(bool no_db) WARN_UNUSED_RESULT; 179 180 // Fills test data into the history system. 181 void FillData(); 182 183 // Runs an autocomplete query on |text| and checks to see that the returned 184 // results' destination URLs match those provided. Also allows checking 185 // that the input type was identified correctly. 186 void RunTest(const base::string16 text, 187 const base::string16& desired_tld, 188 bool prevent_inline_autocomplete, 189 const UrlAndLegalDefault* expected_urls, 190 size_t num_results, 191 AutocompleteInput::Type* identified_input_type); 192 193 // A version of the above without the final |type| output parameter. 194 void RunTest(const base::string16 text, 195 const base::string16& desired_tld, 196 bool prevent_inline_autocomplete, 197 const UrlAndLegalDefault* expected_urls, 198 size_t num_results) { 199 AutocompleteInput::Type type; 200 return RunTest(text, desired_tld, prevent_inline_autocomplete, 201 expected_urls, num_results, &type); 202 } 203 204 content::TestBrowserThreadBundle thread_bundle_; 205 ACMatches matches_; 206 scoped_ptr<TestingProfile> profile_; 207 HistoryService* history_service_; 208 scoped_refptr<HistoryURLProvider> autocomplete_; 209 // Should the matches be sorted and duplicates removed? 210 bool sort_matches_; 211}; 212 213class HistoryURLProviderTestNoDB : public HistoryURLProviderTest { 214 protected: 215 virtual void SetUp() { 216 ASSERT_TRUE(SetUpImpl(true)); 217 } 218}; 219 220void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) { 221 if (autocomplete_->done()) 222 base::MessageLoop::current()->Quit(); 223} 224 225bool HistoryURLProviderTest::SetUpImpl(bool no_db) { 226 profile_.reset(new TestingProfile()); 227 if (!(profile_->CreateHistoryService(true, no_db))) 228 return false; 229 if (!no_db) { 230 profile_->BlockUntilHistoryProcessesPendingRequests(); 231 profile_->BlockUntilHistoryIndexIsRefreshed(); 232 } 233 profile_->GetPrefs()->SetString(prefs::kAcceptLanguages, "en-US,en,ko"); 234 history_service_ = HistoryServiceFactory::GetForProfile( 235 profile_.get(), Profile::EXPLICIT_ACCESS); 236 237 autocomplete_ = new HistoryURLProvider(this, profile_.get()); 238 TemplateURLServiceFactory::GetInstance()->SetTestingFactoryAndUse( 239 profile_.get(), &HistoryURLProviderTest::CreateTemplateURLService); 240 FillData(); 241 return true; 242} 243 244void HistoryURLProviderTest::TearDown() { 245 autocomplete_ = NULL; 246} 247 248void HistoryURLProviderTest::FillData() { 249 // Most visits are a long time ago (some tests require this since we do some 250 // special logic for things visited very recently). Note that this time must 251 // be more recent than the "archived history" threshold for the data to go 252 // into the main database. 253 // 254 // TODO(brettw) It would be nice if we could test this behavior, in which 255 // case the time would be specifed in the test_db structure. 256 const Time now = Time::Now(); 257 258 for (size_t i = 0; i < arraysize(test_db); ++i) { 259 const TestURLInfo& cur = test_db[i]; 260 const GURL current_url(cur.url); 261 history_service_->AddPageWithDetails( 262 current_url, base::UTF8ToUTF16(cur.title), cur.visit_count, 263 cur.typed_count, now - TimeDelta::FromDays(cur.age_in_days), false, 264 history::SOURCE_BROWSED); 265 } 266 267 history_service_->AddPageWithDetails( 268 GURL("http://p/"), base::UTF8ToUTF16("p"), 0, 0, 269 Time::Now() - 270 TimeDelta::FromDays(history::kLowQualityMatchAgeLimitInDays - 1), 271 false, history::SOURCE_BROWSED); 272} 273 274void HistoryURLProviderTest::RunTest( 275 const base::string16 text, 276 const base::string16& desired_tld, 277 bool prevent_inline_autocomplete, 278 const UrlAndLegalDefault* expected_urls, 279 size_t num_results, 280 AutocompleteInput::Type* identified_input_type) { 281 AutocompleteInput input(text, base::string16::npos, desired_tld, GURL(), 282 AutocompleteInput::INVALID_SPEC, 283 prevent_inline_autocomplete, false, true, 284 AutocompleteInput::ALL_MATCHES); 285 *identified_input_type = input.type(); 286 autocomplete_->Start(input, false); 287 if (!autocomplete_->done()) 288 base::MessageLoop::current()->Run(); 289 290 matches_ = autocomplete_->matches(); 291 if (sort_matches_) { 292 for (ACMatches::iterator i = matches_.begin(); i != matches_.end(); ++i) 293 i->ComputeStrippedDestinationURL(profile_.get()); 294 std::sort(matches_.begin(), matches_.end(), 295 &AutocompleteMatch::DestinationSortFunc); 296 matches_.erase(std::unique(matches_.begin(), matches_.end(), 297 &AutocompleteMatch::DestinationsEqual), 298 matches_.end()); 299 std::sort(matches_.begin(), matches_.end(), 300 &AutocompleteMatch::MoreRelevant); 301 } 302 ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text 303 << "\nTLD: \"" << desired_tld << "\""; 304 for (size_t i = 0; i < num_results; ++i) { 305 EXPECT_EQ(expected_urls[i].url, matches_[i].destination_url.spec()); 306 EXPECT_EQ(expected_urls[i].allowed_to_be_default_match, 307 matches_[i].allowed_to_be_default_match); 308 } 309} 310 311TEST_F(HistoryURLProviderTest, PromoteShorterURLs) { 312 // Test that hosts get synthesized below popular pages. 313 const UrlAndLegalDefault expected_nonsynth[] = { 314 { "http://slashdot.org/favorite_page.html", false }, 315 { "http://slashdot.org/", false } 316 }; 317 RunTest(ASCIIToUTF16("slash"), base::string16(), true, expected_nonsynth, 318 arraysize(expected_nonsynth)); 319 320 // Test that hosts get synthesized above less popular pages. 321 const UrlAndLegalDefault expected_synth[] = { 322 { "http://kerneltrap.org/", false }, 323 { "http://kerneltrap.org/not_very_popular.html", false } 324 }; 325 RunTest(ASCIIToUTF16("kernel"), base::string16(), true, expected_synth, 326 arraysize(expected_synth)); 327 328 // Test that unpopular pages are ignored completely. 329 RunTest(ASCIIToUTF16("fresh"), base::string16(), true, NULL, 0); 330 331 // Test that if we create or promote shorter suggestions that would not 332 // normally be inline autocompletable, we make them inline autocompletable if 333 // the original suggestion (that we replaced as "top") was inline 334 // autocompletable. 335 const UrlAndLegalDefault expected_synthesisa[] = { 336 { "http://synthesisatest.com/", true }, 337 { "http://synthesisatest.com/foo/", true } 338 }; 339 RunTest(ASCIIToUTF16("synthesisa"), base::string16(), false, 340 expected_synthesisa, arraysize(expected_synthesisa)); 341 EXPECT_LT(matches_.front().relevance, 1200); 342 const UrlAndLegalDefault expected_synthesisb[] = { 343 { "http://synthesisbtest.com/foo/", true }, 344 { "http://synthesisbtest.com/foo/bar.html", true } 345 }; 346 RunTest(ASCIIToUTF16("synthesisb"), base::string16(), false, 347 expected_synthesisb, arraysize(expected_synthesisb)); 348 EXPECT_GE(matches_.front().relevance, 1410); 349 350 // Test that if we have a synthesized host that matches a suggestion, they 351 // get combined into one. 352 const UrlAndLegalDefault expected_combine[] = { 353 { "http://news.google.com/", false }, 354 { "http://news.google.com/?ned=us&topic=n", false }, 355 }; 356 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), base::string16(), true, 357 expected_combine, arraysize(expected_combine))); 358 // The title should also have gotten set properly on the host for the 359 // synthesized one, since it was also in the results. 360 EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description); 361 362 // Test that short URL matching works correctly as the user types more 363 // (several tests): 364 // The entry for foo.com is the best of all five foo.com* entries. 365 const UrlAndLegalDefault short_1[] = { 366 { "http://foo.com/", false }, 367 { "http://foo.com/dir/another/again/myfile.html", false }, 368 { "http://foo.com/dir/", false } 369 }; 370 RunTest(ASCIIToUTF16("foo"), base::string16(), true, 371 short_1, arraysize(short_1)); 372 373 // When the user types the whole host, make sure we don't get two results for 374 // it. 375 const UrlAndLegalDefault short_2[] = { 376 { "http://foo.com/", true }, 377 { "http://foo.com/dir/another/again/myfile.html", false }, 378 { "http://foo.com/dir/", false }, 379 { "http://foo.com/dir/another/", false } 380 }; 381 RunTest(ASCIIToUTF16("foo.com"), base::string16(), true, short_2, 382 arraysize(short_2)); 383 RunTest(ASCIIToUTF16("foo.com/"), base::string16(), true, short_2, 384 arraysize(short_2)); 385 386 // The filename is the second best of the foo.com* entries, but there is a 387 // shorter URL that's "good enough". The host doesn't match the user input 388 // and so should not appear. 389 const UrlAndLegalDefault short_3[] = { 390 { "http://foo.com/d", true }, 391 { "http://foo.com/dir/another/", false }, 392 { "http://foo.com/dir/another/again/myfile.html", false }, 393 { "http://foo.com/dir/", false } 394 }; 395 RunTest(ASCIIToUTF16("foo.com/d"), base::string16(), true, short_3, 396 arraysize(short_3)); 397 398 // We shouldn't promote shorter URLs than the best if they're not good 399 // enough. 400 const UrlAndLegalDefault short_4[] = { 401 { "http://foo.com/dir/another/a", true }, 402 { "http://foo.com/dir/another/again/myfile.html", false }, 403 { "http://foo.com/dir/another/again/", false } 404 }; 405 RunTest(ASCIIToUTF16("foo.com/dir/another/a"), base::string16(), true, 406 short_4, arraysize(short_4)); 407 408 // Exact matches should always be best no matter how much more another match 409 // has been typed. 410 const UrlAndLegalDefault short_5a[] = { 411 { "http://gooey/", true }, 412 { "http://www.google.com/", true }, 413 { "http://go/", true } 414 }; 415 const UrlAndLegalDefault short_5b[] = { 416 { "http://go/", true }, 417 { "http://gooey/", true }, 418 { "http://www.google.com/", true } 419 }; 420 RunTest(ASCIIToUTF16("g"), base::string16(), false, 421 short_5a, arraysize(short_5a)); 422 RunTest(ASCIIToUTF16("go"), base::string16(), false, 423 short_5b, arraysize(short_5b)); 424} 425 426TEST_F(HistoryURLProviderTest, CullRedirects) { 427 // URLs we will be using, plus the visit counts they will initially get 428 // (the redirect set below will also increment the visit counts). We want 429 // the results to be in A,B,C order. Note also that our visit counts are 430 // all high enough so that domain synthesizing won't get triggered. 431 struct TestCase { 432 const char* url; 433 int count; 434 } test_cases[] = { 435 {"http://redirects/A", 30}, 436 {"http://redirects/B", 20}, 437 {"http://redirects/C", 10} 438 }; 439 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 440 history_service_->AddPageWithDetails(GURL(test_cases[i].url), 441 ASCIIToUTF16("Title"), test_cases[i].count, test_cases[i].count, 442 Time::Now(), false, history::SOURCE_BROWSED); 443 } 444 445 // Create a B->C->A redirect chain, but set the visit counts such that they 446 // will appear in A,B,C order in the results. The autocomplete query will 447 // search for the most recent visit when looking for redirects, so this will 448 // be found even though the previous visits had no redirects. 449 history::RedirectList redirects_to_a; 450 redirects_to_a.push_back(GURL(test_cases[1].url)); 451 redirects_to_a.push_back(GURL(test_cases[2].url)); 452 redirects_to_a.push_back(GURL(test_cases[0].url)); 453 history_service_->AddPage(GURL(test_cases[0].url), base::Time::Now(), 454 NULL, 0, GURL(), redirects_to_a, content::PAGE_TRANSITION_TYPED, 455 history::SOURCE_BROWSED, true); 456 457 // Because all the results are part of a redirect chain with other results, 458 // all but the first one (A) should be culled. We should get the default 459 // "what you typed" result, plus this one. 460 const base::string16 typing(ASCIIToUTF16("http://redirects/")); 461 const UrlAndLegalDefault expected_results[] = { 462 { base::UTF16ToUTF8(typing), true }, 463 { test_cases[0].url, false } 464 }; 465 RunTest(typing, base::string16(), true, expected_results, 466 arraysize(expected_results)); 467} 468 469TEST_F(HistoryURLProviderTest, WhatYouTyped) { 470 // Make sure we suggest a What You Typed match at the right times. 471 RunTest(ASCIIToUTF16("wytmatch"), base::string16(), false, NULL, 0); 472 RunTest(ASCIIToUTF16("wytmatch foo bar"), base::string16(), false, NULL, 0); 473 RunTest(ASCIIToUTF16("wytmatch+foo+bar"), base::string16(), false, NULL, 0); 474 RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), base::string16(), false, 475 NULL, 0); 476 477 const UrlAndLegalDefault results_1[] = { 478 { "http://www.wytmatch.com/", true } 479 }; 480 RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1, 481 arraysize(results_1)); 482 483 const UrlAndLegalDefault results_2[] = { 484 { "http://wytmatch%20foo%20bar/", true } 485 }; 486 RunTest(ASCIIToUTF16("http://wytmatch foo bar"), base::string16(), false, 487 results_2, arraysize(results_2)); 488 489 const UrlAndLegalDefault results_3[] = { 490 { "https://wytmatch%20foo%20bar/", true } 491 }; 492 RunTest(ASCIIToUTF16("https://wytmatch foo bar"), base::string16(), false, 493 results_3, arraysize(results_3)); 494} 495 496TEST_F(HistoryURLProviderTest, Fixup) { 497 // Test for various past crashes we've had. 498 RunTest(ASCIIToUTF16("\\"), base::string16(), false, NULL, 0); 499 RunTest(ASCIIToUTF16("#"), base::string16(), false, NULL, 0); 500 RunTest(ASCIIToUTF16("%20"), base::string16(), false, NULL, 0); 501 const UrlAndLegalDefault fixup_crash[] = { 502 { "http://%EF%BD%A5@s/", true } 503 }; 504 RunTest(base::WideToUTF16(L"\uff65@s"), base::string16(), false, fixup_crash, 505 arraysize(fixup_crash)); 506 RunTest(base::WideToUTF16(L"\u2015\u2015@ \uff7c"), base::string16(), false, 507 NULL, 0); 508 509 // Fixing up "file:" should result in an inline autocomplete offset of just 510 // after "file:", not just after "file://". 511 const base::string16 input_1(ASCIIToUTF16("file:")); 512 const UrlAndLegalDefault fixup_1[] = { 513 { "file:///C:/foo.txt", true } 514 }; 515 ASSERT_NO_FATAL_FAILURE(RunTest(input_1, base::string16(), false, fixup_1, 516 arraysize(fixup_1))); 517 EXPECT_EQ(ASCIIToUTF16("///C:/foo.txt"), 518 matches_.front().inline_autocompletion); 519 520 // Fixing up "http:/" should result in an inline autocomplete offset of just 521 // after "http:/", not just after "http:". 522 const base::string16 input_2(ASCIIToUTF16("http:/")); 523 const UrlAndLegalDefault fixup_2[] = { 524 { "http://bogussite.com/a", true }, 525 { "http://bogussite.com/b", true }, 526 { "http://bogussite.com/c", true } 527 }; 528 ASSERT_NO_FATAL_FAILURE(RunTest(input_2, base::string16(), false, fixup_2, 529 arraysize(fixup_2))); 530 EXPECT_EQ(ASCIIToUTF16("/bogussite.com/a"), 531 matches_.front().inline_autocompletion); 532 533 // Adding a TLD to a small number like "56" should result in "www.56.com" 534 // rather than "0.0.0.56.com". 535 const UrlAndLegalDefault fixup_3[] = { 536 { "http://www.56.com/", true } 537 }; 538 RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3, 539 arraysize(fixup_3)); 540 541 // An input looks like a IP address like "127.0.0.1" should result in 542 // "http://127.0.0.1/". 543 const UrlAndLegalDefault fixup_4[] = { 544 { "http://127.0.0.1/", true } 545 }; 546 RunTest(ASCIIToUTF16("127.0.0.1"), base::string16(), false, fixup_4, 547 arraysize(fixup_4)); 548 549 // An number "17173" should result in "http://www.17173.com/" in db. 550 const UrlAndLegalDefault fixup_5[] = { 551 { "http://www.17173.com/", true } 552 }; 553 RunTest(ASCIIToUTF16("17173"), base::string16(), false, fixup_5, 554 arraysize(fixup_5)); 555} 556 557// Make sure the results for the input 'p' don't change between the first and 558// second passes. 559TEST_F(HistoryURLProviderTest, EmptyVisits) { 560 // Wait for history to create the in memory DB. 561 profile_->BlockUntilHistoryProcessesPendingRequests(); 562 563 AutocompleteInput input(ASCIIToUTF16("p"), base::string16::npos, 564 base::string16(), GURL(), 565 AutocompleteInput::INVALID_SPEC, false, false, true, 566 AutocompleteInput::ALL_MATCHES); 567 autocomplete_->Start(input, false); 568 // HistoryURLProvider shouldn't be done (waiting on async results). 569 EXPECT_FALSE(autocomplete_->done()); 570 571 // We should get back an entry for pandora. 572 matches_ = autocomplete_->matches(); 573 ASSERT_GT(matches_.size(), 0u); 574 EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url); 575 int pandora_relevance = matches_[0].relevance; 576 577 // Run the message loop. When |autocomplete_| finishes the loop is quit. 578 base::MessageLoop::current()->Run(); 579 EXPECT_TRUE(autocomplete_->done()); 580 matches_ = autocomplete_->matches(); 581 ASSERT_GT(matches_.size(), 0u); 582 EXPECT_EQ(GURL("http://pandora.com/"), matches_[0].destination_url); 583 EXPECT_EQ(pandora_relevance, matches_[0].relevance); 584} 585 586TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) { 587 // Ensure that we will still produce matches for navigation when there is no 588 // database. 589 UrlAndLegalDefault navigation_1[] = { 590 { "http://test.com/", true } 591 }; 592 RunTest(ASCIIToUTF16("test.com"), base::string16(), false, navigation_1, 593 arraysize(navigation_1)); 594 595 UrlAndLegalDefault navigation_2[] = { 596 { "http://slash/", true } 597 }; 598 RunTest(ASCIIToUTF16("slash"), base::string16(), false, navigation_2, 599 arraysize(navigation_2)); 600 601 RunTest(ASCIIToUTF16("this is a query"), base::string16(), false, NULL, 0); 602} 603 604TEST_F(HistoryURLProviderTest, DontAutocompleteOnTrailingWhitespace) { 605 AutocompleteInput input(ASCIIToUTF16("slash "), base::string16::npos, 606 base::string16(), GURL(), 607 AutocompleteInput::INVALID_SPEC, false, false, 608 true, AutocompleteInput::ALL_MATCHES); 609 autocomplete_->Start(input, false); 610 if (!autocomplete_->done()) 611 base::MessageLoop::current()->Run(); 612 613 // None of the matches should attempt to autocomplete. 614 matches_ = autocomplete_->matches(); 615 for (size_t i = 0; i < matches_.size(); ++i) { 616 EXPECT_TRUE(matches_[i].inline_autocompletion.empty()); 617 EXPECT_FALSE(matches_[i].allowed_to_be_default_match); 618 } 619} 620 621TEST_F(HistoryURLProviderTest, TreatEmailsAsSearches) { 622 // Visiting foo.com should not make this string be treated as a navigation. 623 // That means the result should be scored around 1200 ("what you typed") 624 // and not 1400+. 625 const UrlAndLegalDefault expected[] = { 626 { "http://user@foo.com/", true } 627 }; 628 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("user@foo.com"), 629 base::string16(), false, expected, 630 arraysize(expected))); 631 EXPECT_LE(1200, matches_[0].relevance); 632 EXPECT_LT(matches_[0].relevance, 1210); 633} 634 635TEST_F(HistoryURLProviderTest, IntranetURLsWithPaths) { 636 struct TestCase { 637 const char* input; 638 int relevance; 639 } test_cases[] = { 640 { "fooey", 0 }, 641 { "fooey/", 1200 }, // 1200 for URL would still navigate by default. 642 { "fooey/a", 1200 }, // 1200 for UNKNOWN would not. 643 { "fooey/a b", 1200 }, // Also UNKNOWN. 644 { "gooey", 1410 }, 645 { "gooey/", 1410 }, 646 { "gooey/a", 1400 }, 647 { "gooey/a b", 1400 }, 648 }; 649 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 650 SCOPED_TRACE(test_cases[i].input); 651 if (test_cases[i].relevance == 0) { 652 RunTest(ASCIIToUTF16(test_cases[i].input), base::string16(), false, 653 NULL, 0); 654 } else { 655 const UrlAndLegalDefault output[] = { 656 { URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec(), 657 true } 658 }; 659 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16(test_cases[i].input), 660 base::string16(), false, 661 output, arraysize(output))); 662 // Actual relevance should be at least what test_cases expects and 663 // and no more than 10 more. 664 EXPECT_LE(test_cases[i].relevance, matches_[0].relevance); 665 EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10); 666 } 667 } 668} 669 670TEST_F(HistoryURLProviderTest, IntranetURLsWithRefs) { 671 struct TestCase { 672 const char* input; 673 int relevance; 674 AutocompleteInput::Type type; 675 } test_cases[] = { 676 { "gooey", 1410, AutocompleteInput::UNKNOWN }, 677 { "gooey/", 1410, AutocompleteInput::URL }, 678 { "gooey#", 1200, AutocompleteInput::UNKNOWN }, 679 { "gooey/#", 1200, AutocompleteInput::URL }, 680 { "gooey#foo", 1200, AutocompleteInput::UNKNOWN }, 681 { "gooey/#foo", 1200, AutocompleteInput::URL }, 682 { "gooey# foo", 1200, AutocompleteInput::UNKNOWN }, 683 { "gooey/# foo", 1200, AutocompleteInput::URL }, 684 }; 685 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 686 SCOPED_TRACE(test_cases[i].input); 687 const UrlAndLegalDefault output[] = { 688 { URLFixerUpper::FixupURL(test_cases[i].input, std::string()).spec(), 689 true } 690 }; 691 AutocompleteInput::Type type; 692 ASSERT_NO_FATAL_FAILURE( 693 RunTest(ASCIIToUTF16(test_cases[i].input), 694 base::string16(), false, output, arraysize(output), &type)); 695 // Actual relevance should be at least what test_cases expects and 696 // and no more than 10 more. 697 EXPECT_LE(test_cases[i].relevance, matches_[0].relevance); 698 EXPECT_LT(matches_[0].relevance, test_cases[i].relevance + 10); 699 // Input type should be what we expect. This is important because 700 // this provider counts on SearchProvider to give queries a relevance 701 // score >1200 for UNKNOWN inputs and <1200 for URL inputs. (That's 702 // already tested in search_provider_unittest.cc.) For this test 703 // here to test that the user sees the correct behavior, it needs 704 // to check that the input type was identified correctly. 705 EXPECT_EQ(test_cases[i].type, type); 706 } 707} 708 709// Makes sure autocompletion happens for intranet sites that have been 710// previoulsy visited. 711TEST_F(HistoryURLProviderTest, IntranetURLCompletion) { 712 sort_matches_ = true; 713 714 const UrlAndLegalDefault expected1[] = { 715 { "http://intra/three", true }, 716 { "http://intra/two", true } 717 }; 718 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/t"), base::string16(), 719 false, expected1, arraysize(expected1))); 720 EXPECT_LE(1410, matches_[0].relevance); 721 EXPECT_LT(matches_[0].relevance, 1420); 722 EXPECT_EQ(matches_[0].relevance - 1, matches_[1].relevance); 723 724 const UrlAndLegalDefault expected2[] = { 725 { "http://moo/b", true }, 726 { "http://moo/bar", true } 727 }; 728 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("moo/b"), base::string16(), 729 false, expected2, arraysize(expected2))); 730 // The url what you typed match should be around 1400, otherwise the 731 // search what you typed match is going to be first. 732 EXPECT_LE(1400, matches_[0].relevance); 733 EXPECT_LT(matches_[0].relevance, 1410); 734 735 const UrlAndLegalDefault expected3[] = { 736 { "http://intra/one", true }, 737 { "http://intra/three", true }, 738 { "http://intra/two", true } 739 }; 740 RunTest(ASCIIToUTF16("intra"), base::string16(), false, expected3, 741 arraysize(expected3)); 742 743 const UrlAndLegalDefault expected4[] = { 744 { "http://intra/one", true }, 745 { "http://intra/three", true }, 746 { "http://intra/two", true } 747 }; 748 RunTest(ASCIIToUTF16("intra/"), base::string16(), false, expected4, 749 arraysize(expected4)); 750 751 const UrlAndLegalDefault expected5[] = { 752 { "http://intra/one", true } 753 }; 754 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/o"), base::string16(), 755 false, expected5, arraysize(expected5))); 756 EXPECT_LE(1410, matches_[0].relevance); 757 EXPECT_LT(matches_[0].relevance, 1420); 758 759 const UrlAndLegalDefault expected6[] = { 760 { "http://intra/x", true } 761 }; 762 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("intra/x"), base::string16(), 763 false, expected6, arraysize(expected6))); 764 EXPECT_LE(1400, matches_[0].relevance); 765 EXPECT_LT(matches_[0].relevance, 1410); 766 767 const UrlAndLegalDefault expected7[] = { 768 { "http://typedhost/untypedpath", true } 769 }; 770 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("typedhost/untypedpath"), 771 base::string16(), false, expected7, arraysize(expected7))); 772 EXPECT_LE(1400, matches_[0].relevance); 773 EXPECT_LT(matches_[0].relevance, 1410); 774} 775 776TEST_F(HistoryURLProviderTest, CrashDueToFixup) { 777 // This test passes if we don't crash. The results don't matter. 778 const char* const test_cases[] = { 779 "//c", 780 "\\@st", 781 "view-source:x", 782 }; 783 for (size_t i = 0; i < arraysize(test_cases); ++i) { 784 AutocompleteInput input(ASCIIToUTF16(test_cases[i]), base::string16::npos, 785 base::string16(), GURL(), 786 AutocompleteInput::INVALID_SPEC, 787 false, false, true, AutocompleteInput::ALL_MATCHES); 788 autocomplete_->Start(input, false); 789 if (!autocomplete_->done()) 790 base::MessageLoop::current()->Run(); 791 } 792} 793 794TEST_F(HistoryURLProviderTest, CullSearchResults) { 795 // Set up a default search engine. 796 TemplateURLData data; 797 data.SetKeyword(ASCIIToUTF16("TestEngine")); 798 data.SetURL("http://testsearch.com/?q={searchTerms}"); 799 TemplateURLService* template_url_service = 800 TemplateURLServiceFactory::GetForProfile(profile_.get()); 801 TemplateURL* template_url = new TemplateURL(profile_.get(), data); 802 template_url_service->Add(template_url); 803 template_url_service->SetDefaultSearchProvider(template_url); 804 template_url_service->Load(); 805 806 // URLs we will be using, plus the visit counts they will initially get 807 // (the redirect set below will also increment the visit counts). We want 808 // the results to be in A,B,C order. Note also that our visit counts are 809 // all high enough so that domain synthesizing won't get triggered. 810 struct TestCase { 811 const char* url; 812 int count; 813 } test_cases[] = { 814 {"https://testsearch.com/", 30}, 815 {"https://testsearch.com/?q=foobar", 20}, 816 {"http://foobar.com/", 10} 817 }; 818 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 819 history_service_->AddPageWithDetails(GURL(test_cases[i].url), 820 base::UTF8ToUTF16("Title"), test_cases[i].count, test_cases[i].count, 821 Time::Now(), false, history::SOURCE_BROWSED); 822 } 823 824 // We should not see search URLs when typing a previously used query. 825 const UrlAndLegalDefault expected_when_searching_query[] = { 826 { test_cases[2].url, false } 827 }; 828 RunTest(ASCIIToUTF16("foobar"), base::string16(), true, 829 expected_when_searching_query, arraysize(expected_when_searching_query)); 830 831 // We should not see search URLs when typing the search engine name. 832 const UrlAndLegalDefault expected_when_searching_site[] = { 833 { test_cases[0].url, false } 834 }; 835 RunTest(ASCIIToUTF16("testsearch"), base::string16(), true, 836 expected_when_searching_site, arraysize(expected_when_searching_site)); 837} 838 839TEST_F(HistoryURLProviderTest, SuggestExactInput) { 840 const size_t npos = std::string::npos; 841 struct TestCase { 842 // Inputs: 843 const char* input; 844 bool trim_http; 845 // Expected Outputs: 846 const char* contents; 847 // Offsets of the ACMatchClassifications, terminated by npos. 848 size_t offsets[3]; 849 // The index of the ACMatchClassification that should have the MATCH bit 850 // set, npos if no ACMatchClassification should have the MATCH bit set. 851 size_t match_classification_index; 852 } test_cases[] = { 853 { "http://www.somesite.com", false, 854 "http://www.somesite.com", {0, npos, npos}, 0 }, 855 { "www.somesite.com", true, 856 "www.somesite.com", {0, npos, npos}, 0 }, 857 { "www.somesite.com", false, 858 "http://www.somesite.com", {0, 7, npos}, 1 }, 859 { "somesite.com", true, 860 "somesite.com", {0, npos, npos}, 0 }, 861 { "somesite.com", false, 862 "http://somesite.com", {0, 7, npos}, 1 }, 863 { "w", true, 864 "w", {0, npos, npos}, 0 }, 865 { "w", false, 866 "http://w", {0, 7, npos}, 1 }, 867 { "w.com", true, 868 "w.com", {0, npos, npos}, 0 }, 869 { "w.com", false, 870 "http://w.com", {0, 7, npos}, 1 }, 871 { "www.w.com", true, 872 "www.w.com", {0, npos, npos}, 0 }, 873 { "www.w.com", false, 874 "http://www.w.com", {0, 7, npos}, 1 }, 875 { "view-source:w", true, 876 "view-source:w", {0, npos, npos}, 0 }, 877 { "view-source:www.w.com/", true, 878 "view-source:www.w.com", {0, npos, npos}, npos }, 879 { "view-source:www.w.com/", false, 880 "view-source:http://www.w.com", {0, npos, npos}, npos }, 881 { "view-source:http://www.w.com/", false, 882 "view-source:http://www.w.com", {0, npos, npos}, 0 }, 883 { " view-source:", true, 884 "view-source:", {0, npos, npos}, 0 }, 885 { "http:////////w.com", false, 886 "http://w.com", {0, npos, npos}, npos }, 887 { " http:////////www.w.com", false, 888 "http://www.w.com", {0, npos, npos}, npos }, 889 { "http:a///www.w.com", false, 890 "http://a///www.w.com", {0, npos, npos}, npos }, 891 { "mailto://a@b.com", true, 892 "mailto://a@b.com", {0, npos, npos}, 0 }, 893 { "mailto://a@b.com", false, 894 "mailto://a@b.com", {0, npos, npos}, 0 }, 895 }; 896 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 897 SCOPED_TRACE(testing::Message() << "Index " << i << " input: " 898 << test_cases[i].input << ", trim_http: " 899 << test_cases[i].trim_http); 900 901 AutocompleteInput input(ASCIIToUTF16(test_cases[i].input), 902 base::string16::npos, base::string16(), 903 GURL("about:blank"), 904 AutocompleteInput::INVALID_SPEC, false, false, true, 905 AutocompleteInput::ALL_MATCHES); 906 AutocompleteMatch match(autocomplete_->SuggestExactInput( 907 input.text(), input.canonicalized_url(), test_cases[i].trim_http)); 908 EXPECT_EQ(ASCIIToUTF16(test_cases[i].contents), match.contents); 909 for (size_t match_index = 0; match_index < match.contents_class.size(); 910 ++match_index) { 911 EXPECT_EQ(test_cases[i].offsets[match_index], 912 match.contents_class[match_index].offset); 913 EXPECT_EQ(ACMatchClassification::URL | 914 (match_index == test_cases[i].match_classification_index ? 915 ACMatchClassification::MATCH : 0), 916 match.contents_class[match_index].style); 917 } 918 EXPECT_EQ(npos, test_cases[i].offsets[match.contents_class.size()]); 919 } 920} 921 922TEST_F(HistoryURLProviderTest, HUPScoringExperiment) { 923 HUPScoringParams max_2000_no_time_decay; 924 max_2000_no_time_decay.typed_count_buckets.buckets().push_back( 925 std::make_pair(0.0, 2000)); 926 HUPScoringParams max_1250_no_time_decay; 927 max_1250_no_time_decay.typed_count_buckets.buckets().push_back( 928 std::make_pair(0.0, 1250)); 929 HUPScoringParams max_1000_no_time_decay; 930 max_1000_no_time_decay.typed_count_buckets.buckets().push_back( 931 std::make_pair(0.0, 1000)); 932 933 HUPScoringParams max_1100_with_time_decay_and_max_cap; 934 max_1100_with_time_decay_and_max_cap.typed_count_buckets. 935 set_relevance_cap(1400); 936 max_1100_with_time_decay_and_max_cap.typed_count_buckets. 937 set_half_life_days(16); 938 max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back( 939 std::make_pair(0.5, 1100)); 940 max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back( 941 std::make_pair(0.24, 200)); 942 max_1100_with_time_decay_and_max_cap.typed_count_buckets.buckets().push_back( 943 std::make_pair(0.0, 100)); 944 945 HUPScoringParams max_1100_visit_typed_decays; 946 max_1100_visit_typed_decays.typed_count_buckets.set_half_life_days(16); 947 max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back( 948 std::make_pair(0.5, 1100)); 949 max_1100_visit_typed_decays.typed_count_buckets.buckets().push_back( 950 std::make_pair(0.0, 100)); 951 max_1100_visit_typed_decays.visited_count_buckets.set_half_life_days(16); 952 max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back( 953 std::make_pair(0.5, 550)); 954 max_1100_visit_typed_decays.visited_count_buckets.buckets().push_back( 955 std::make_pair(0.0, 50)); 956 957 const int kMaxMatches = 3; 958 struct TestCase { 959 const char* input; 960 HUPScoringParams scoring_params; 961 struct ExpectedMatch { 962 const char* url; 963 int control_relevance; 964 int experiment_relevance; 965 }; 966 ExpectedMatch matches[kMaxMatches]; 967 } test_cases[] = { 968 // Max score 2000 -> no demotion. 969 { "7.com/1", max_2000_no_time_decay, 970 {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} }, 971 972 // Limit score to 1250/1000 and make sure that the top match is unchanged. 973 { "7.com/1", max_1250_no_time_decay, 974 {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} }, 975 { "7.com/2", max_1250_no_time_decay, 976 {{"7.com/2a", 1413, 1413}, {"7.com/2b", 1412, 1250}, {NULL, 0, 0}} }, 977 { "7.com/4", max_1000_no_time_decay, 978 {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 1000}, 979 {"7.com/4b", 1201, 999}} }, 980 981 // Max relevance cap is 1400 and half-life is 16 days. 982 { "7.com/1", max_1100_with_time_decay_and_max_cap, 983 {{"7.com/1a", 1413, 1413}, {NULL, 0, 0}, {NULL, 0, 0}} }, 984 { "7.com/4", max_1100_with_time_decay_and_max_cap, 985 {{"7.com/4", 1203, 1203}, {"7.com/4a", 1202, 200}, 986 {"7.com/4b", 1201, 100}} }, 987 988 // Max relevance cap is 1400 and half-life is 16 days for both visit/typed. 989 { "7.com/5", max_1100_visit_typed_decays, 990 {{"7.com/5", 1203, 1203}, {"7.com/5a", 1202, 50}, {NULL, 0, 0}} }, 991 }; 992 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(test_cases); ++i) { 993 SCOPED_TRACE(test_cases[i].input); 994 UrlAndLegalDefault output[kMaxMatches]; 995 int max_matches; 996 for (max_matches = 0; max_matches < kMaxMatches; ++max_matches) { 997 if (test_cases[i].matches[max_matches].url == NULL) 998 break; 999 output[max_matches].url = URLFixerUpper::FixupURL( 1000 test_cases[i].matches[max_matches].url, std::string()).spec(); 1001 output[max_matches].allowed_to_be_default_match = true; 1002 } 1003 autocomplete_->scoring_params_ = test_cases[i].scoring_params; 1004 1005 // Test the control (scoring disabled). 1006 autocomplete_->scoring_params_.experimental_scoring_enabled = false; 1007 ASSERT_NO_FATAL_FAILURE( 1008 RunTest(ASCIIToUTF16(test_cases[i].input), 1009 base::string16(), false, output, max_matches)); 1010 for (int j = 0; j < max_matches; ++j) { 1011 EXPECT_EQ(test_cases[i].matches[j].control_relevance, 1012 matches_[j].relevance); 1013 } 1014 1015 // Test the experiment (scoring enabled). 1016 autocomplete_->scoring_params_.experimental_scoring_enabled = true; 1017 ASSERT_NO_FATAL_FAILURE( 1018 RunTest(ASCIIToUTF16(test_cases[i].input), 1019 base::string16(), false, output, max_matches)); 1020 for (int j = 0; j < max_matches; ++j) { 1021 EXPECT_EQ(test_cases[i].matches[j].experiment_relevance, 1022 matches_[j].relevance); 1023 } 1024 } 1025} 1026