history_url_provider_unittest.cc revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/file_util.h" 6#include "base/message_loop.h" 7#include "base/path_service.h" 8#include "base/string_util.h" 9#include "base/utf_string_conversions.h" 10#include "chrome/browser/autocomplete/autocomplete_match.h" 11#include "chrome/browser/autocomplete/history_url_provider.h" 12#include "chrome/browser/browser_thread.h" 13#include "chrome/browser/history/history.h" 14#include "chrome/test/testing_profile.h" 15#include "testing/gtest/include/gtest/gtest.h" 16 17using base::Time; 18using base::TimeDelta; 19 20struct TestURLInfo { 21 std::string url; 22 std::string title; 23 int visit_count; 24 int typed_count; 25} test_db[] = { 26 {"http://www.google.com/", "Google", 3, 3}, 27 28 // High-quality pages should get a host synthesized as a lower-quality match. 29 {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100}, 30 31 // Less popular pages should have hosts synthesized as higher-quality 32 // matches. 33 {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0}, 34 35 // Unpopular pages should not appear in the results at all. 36 {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 1}, 37 38 // If a host has a match, we should pick it up during host synthesis. 39 {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2}, 40 {"http://news.google.com/", "Google News", 1, 1}, 41 42 // Suggested short URLs must be "good enough" and must match user input. 43 {"http://foo.com/", "Dir", 5, 5}, 44 {"http://foo.com/dir/", "Dir", 2, 2}, 45 {"http://foo.com/dir/another/", "Dir", 5, 1}, 46 {"http://foo.com/dir/another/again/", "Dir", 10, 0}, 47 {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2}, 48 49 // We throw in a lot of extra URLs here to make sure we're testing the 50 // history database's query, not just the autocomplete provider. 51 {"http://startest.com/y/a", "A", 2, 2}, 52 {"http://startest.com/y/b", "B", 5, 2}, 53 {"http://startest.com/x/c", "C", 5, 2}, 54 {"http://startest.com/x/d", "D", 5, 5}, 55 {"http://startest.com/y/e", "E", 4, 2}, 56 {"http://startest.com/y/f", "F", 3, 2}, 57 {"http://startest.com/y/g", "G", 3, 2}, 58 {"http://startest.com/y/h", "H", 3, 2}, 59 {"http://startest.com/y/i", "I", 3, 2}, 60 {"http://startest.com/y/j", "J", 3, 2}, 61 {"http://startest.com/y/k", "K", 3, 2}, 62 {"http://startest.com/y/l", "L", 3, 2}, 63 {"http://startest.com/y/m", "M", 3, 2}, 64 65 // A file: URL is useful for testing that fixup does the right thing w.r.t. 66 // the number of trailing slashes on the user's input. 67 {"file:///C:/foo.txt", "", 2, 2}, 68 69 // Results with absurdly high typed_counts so that very generic queries like 70 // "http" will give consistent results even if more data is added above. 71 {"http://bogussite.com/a", "Bogus A", 10002, 10000}, 72 {"http://bogussite.com/b", "Bogus B", 10001, 10000}, 73 {"http://bogussite.com/c", "Bogus C", 10000, 10000}, 74 75 // Domain name with number. 76 {"http://www.17173.com/", "Domain with number", 3, 3}, 77 78 // URLs to test exact-matching behavior. 79 {"http://go/", "Intranet URL", 1, 1}, 80 {"http://gooey/", "Intranet URL 2", 5, 5}, 81 82 // URLs for testing offset adjustment. 83 {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2}, 84 {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2}, 85 {"http://ms/c++%20style%20guide", "Style guide", 2, 2}, 86 87 // URLs for testing ctrl-enter behavior. 88 {"http://binky/", "Intranet binky", 2, 2}, 89 {"http://winky/", "Intranet winky", 2, 2}, 90 {"http://www.winky.com/", "Internet winky", 5, 0}, 91}; 92 93class HistoryURLProviderTest : public testing::Test, 94 public ACProviderListener { 95 public: 96 HistoryURLProviderTest() 97 : ui_thread_(BrowserThread::UI, &message_loop_), 98 file_thread_(BrowserThread::FILE, &message_loop_) {} 99 100 // ACProviderListener 101 virtual void OnProviderUpdate(bool updated_matches); 102 103 protected: 104 // testing::Test 105 virtual void SetUp() { 106 SetUpImpl(false); 107 } 108 virtual void TearDown(); 109 110 // Does the real setup. 111 void SetUpImpl(bool no_db); 112 113 // Fills test data into the history system. 114 void FillData(); 115 116 // Runs an autocomplete query on |text| and checks to see that the returned 117 // results' destination URLs match those provided. 118 void RunTest(const string16 text, 119 const string16& desired_tld, 120 bool prevent_inline_autocomplete, 121 const std::string* expected_urls, 122 size_t num_results); 123 124 void RunAdjustOffsetTest(const string16 text, size_t expected_offset); 125 126 MessageLoopForUI message_loop_; 127 BrowserThread ui_thread_; 128 BrowserThread file_thread_; 129 ACMatches matches_; 130 scoped_ptr<TestingProfile> profile_; 131 HistoryService* history_service_; 132 133 private: 134 scoped_refptr<HistoryURLProvider> autocomplete_; 135}; 136 137class HistoryURLProviderTestNoDB : public HistoryURLProviderTest { 138 protected: 139 virtual void SetUp() { 140 SetUpImpl(true); 141 } 142}; 143 144void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) { 145 if (autocomplete_->done()) 146 MessageLoop::current()->Quit(); 147} 148 149void HistoryURLProviderTest::SetUpImpl(bool no_db) { 150 profile_.reset(new TestingProfile()); 151 profile_->CreateHistoryService(true, no_db); 152 history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); 153 154 autocomplete_ = new HistoryURLProvider(this, profile_.get(), "en-US,en,ko"); 155 156 FillData(); 157} 158 159void HistoryURLProviderTest::TearDown() { 160 autocomplete_ = NULL; 161} 162 163void HistoryURLProviderTest::FillData() { 164 // All visits are a long time ago (some tests require this since we do some 165 // special logic for things visited very recently). Note that this time must 166 // be more recent than the "archived history" threshold for the data to go 167 // into the main database. 168 // 169 // TODO(brettw) It would be nice if we could test this behavior, in which 170 // case the time would be specifed in the test_db structure. 171 Time visit_time = Time::Now() - TimeDelta::FromDays(80); 172 173 for (size_t i = 0; i < arraysize(test_db); ++i) { 174 const TestURLInfo& cur = test_db[i]; 175 const GURL current_url(cur.url); 176 history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title), 177 cur.visit_count, cur.typed_count, 178 visit_time, false, 179 history::SOURCE_BROWSED); 180 } 181} 182 183void HistoryURLProviderTest::RunTest(const string16 text, 184 const string16& desired_tld, 185 bool prevent_inline_autocomplete, 186 const std::string* expected_urls, 187 size_t num_results) { 188 AutocompleteInput input(text, desired_tld, prevent_inline_autocomplete, 189 false, true, false); 190 autocomplete_->Start(input, false); 191 if (!autocomplete_->done()) 192 MessageLoop::current()->Run(); 193 194 matches_ = autocomplete_->matches(); 195 ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text 196 << "\nTLD: \"" << desired_tld << "\""; 197 for (size_t i = 0; i < num_results; ++i) 198 EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec()); 199} 200 201void HistoryURLProviderTest::RunAdjustOffsetTest(const string16 text, 202 size_t expected_offset) { 203 AutocompleteInput input(text, string16(), false, false, true, false); 204 autocomplete_->Start(input, false); 205 if (!autocomplete_->done()) 206 MessageLoop::current()->Run(); 207 208 matches_ = autocomplete_->matches(); 209 ASSERT_GE(matches_.size(), 1U) << "Input text: " << text; 210 EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset); 211} 212 213TEST_F(HistoryURLProviderTest, PromoteShorterURLs) { 214 // Test that hosts get synthesized below popular pages. 215 const std::string expected_nonsynth[] = { 216 "http://slashdot.org/favorite_page.html", 217 "http://slashdot.org/", 218 }; 219 RunTest(ASCIIToUTF16("slash"), string16(), true, expected_nonsynth, 220 arraysize(expected_nonsynth)); 221 222 // Test that hosts get synthesized above less popular pages. 223 const std::string expected_synth[] = { 224 "http://kerneltrap.org/", 225 "http://kerneltrap.org/not_very_popular.html", 226 }; 227 RunTest(ASCIIToUTF16("kernel"), string16(), true, expected_synth, 228 arraysize(expected_synth)); 229 230 // Test that unpopular pages are ignored completely. 231 RunTest(ASCIIToUTF16("fresh"), string16(), true, NULL, 0); 232 233 // Test that if we have a synthesized host that matches a suggestion, they 234 // get combined into one. 235 const std::string expected_combine[] = { 236 "http://news.google.com/", 237 "http://news.google.com/?ned=us&topic=n", 238 }; 239 ASSERT_NO_FATAL_FAILURE(RunTest(ASCIIToUTF16("news"), string16(), true, 240 expected_combine, arraysize(expected_combine))); 241 // The title should also have gotten set properly on the host for the 242 // synthesized one, since it was also in the results. 243 EXPECT_EQ(ASCIIToUTF16("Google News"), matches_.front().description); 244 245 // Test that short URL matching works correctly as the user types more 246 // (several tests): 247 // The entry for foo.com is the best of all five foo.com* entries. 248 const std::string short_1[] = { 249 "http://foo.com/", 250 "http://foo.com/dir/another/again/myfile.html", 251 "http://foo.com/dir/", 252 }; 253 RunTest(ASCIIToUTF16("foo"), string16(), true, short_1, arraysize(short_1)); 254 255 // When the user types the whole host, make sure we don't get two results for 256 // it. 257 const std::string short_2[] = { 258 "http://foo.com/", 259 "http://foo.com/dir/another/again/myfile.html", 260 "http://foo.com/dir/", 261 "http://foo.com/dir/another/", 262 }; 263 RunTest(ASCIIToUTF16("foo.com"), string16(), true, short_2, 264 arraysize(short_2)); 265 RunTest(ASCIIToUTF16("foo.com/"), string16(), true, short_2, 266 arraysize(short_2)); 267 268 // The filename is the second best of the foo.com* entries, but there is a 269 // shorter URL that's "good enough". The host doesn't match the user input 270 // and so should not appear. 271 const std::string short_3[] = { 272 "http://foo.com/d", 273 "http://foo.com/dir/another/", 274 "http://foo.com/dir/another/again/myfile.html", 275 "http://foo.com/dir/", 276 }; 277 RunTest(ASCIIToUTF16("foo.com/d"), string16(), true, short_3, 278 arraysize(short_3)); 279 280 // We shouldn't promote shorter URLs than the best if they're not good 281 // enough. 282 const std::string short_4[] = { 283 "http://foo.com/dir/another/a", 284 "http://foo.com/dir/another/again/myfile.html", 285 "http://foo.com/dir/another/again/", 286 }; 287 RunTest(ASCIIToUTF16("foo.com/dir/another/a"), string16(), true, short_4, 288 arraysize(short_4)); 289 290 // Exact matches should always be best no matter how much more another match 291 // has been typed. 292 const std::string short_5a[] = { 293 "http://gooey/", 294 "http://www.google.com/", 295 }; 296 const std::string short_5b[] = { 297 "http://go/", 298 "http://gooey/", 299 "http://www.google.com/", 300 }; 301 RunTest(ASCIIToUTF16("g"), string16(), false, short_5a, arraysize(short_5a)); 302 RunTest(ASCIIToUTF16("go"), string16(), false, short_5b, arraysize(short_5b)); 303} 304 305TEST_F(HistoryURLProviderTest, CullRedirects) { 306 // URLs we will be using, plus the visit counts they will initially get 307 // (the redirect set below will also increment the visit counts). We want 308 // the results to be in A,B,C order. Note also that our visit counts are 309 // all high enough so that domain synthesizing won't get triggered. 310 struct RedirectCase { 311 const char* url; 312 int count; 313 }; 314 static const RedirectCase redirect[] = { 315 {"http://redirects/A", 30}, 316 {"http://redirects/B", 20}, 317 {"http://redirects/C", 10} 318 }; 319 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(redirect); i++) { 320 history_service_->AddPageWithDetails(GURL(redirect[i].url), 321 UTF8ToUTF16("Title"), 322 redirect[i].count, redirect[i].count, 323 Time::Now(), false, 324 history::SOURCE_BROWSED); 325 } 326 327 // Create a B->C->A redirect chain, but set the visit counts such that they 328 // will appear in A,B,C order in the results. The autocomplete query will 329 // search for the most recent visit when looking for redirects, so this will 330 // be found even though the previous visits had no redirects. 331 history::RedirectList redirects_to_a; 332 redirects_to_a.push_back(GURL(redirect[1].url)); 333 redirects_to_a.push_back(GURL(redirect[2].url)); 334 redirects_to_a.push_back(GURL(redirect[0].url)); 335 history_service_->AddPage(GURL(redirect[0].url), NULL, 0, GURL(), 336 PageTransition::TYPED, redirects_to_a, 337 history::SOURCE_BROWSED, true); 338 339 // Because all the results are part of a redirect chain with other results, 340 // all but the first one (A) should be culled. We should get the default 341 // "what you typed" result, plus this one. 342 const string16 typing(ASCIIToUTF16("http://redirects/")); 343 const std::string expected_results[] = { 344 UTF16ToUTF8(typing), 345 redirect[0].url}; 346 RunTest(typing, string16(), true, expected_results, 347 arraysize(expected_results)); 348} 349 350TEST_F(HistoryURLProviderTest, WhatYouTyped) { 351 // Make sure we suggest a What You Typed match at the right times. 352 RunTest(ASCIIToUTF16("wytmatch"), string16(), false, NULL, 0); 353 RunTest(ASCIIToUTF16("wytmatch foo bar"), string16(), false, NULL, 0); 354 RunTest(ASCIIToUTF16("wytmatch+foo+bar"), string16(), false, NULL, 0); 355 RunTest(ASCIIToUTF16("wytmatch+foo+bar.com"), string16(), false, NULL, 0); 356 357 const std::string results_1[] = {"http://www.wytmatch.com/"}; 358 RunTest(ASCIIToUTF16("wytmatch"), ASCIIToUTF16("com"), false, results_1, 359 arraysize(results_1)); 360 361 const std::string results_2[] = {"http://wytmatch%20foo%20bar/"}; 362 RunTest(ASCIIToUTF16("http://wytmatch foo bar"), string16(), false, results_2, 363 arraysize(results_2)); 364 365 const std::string results_3[] = {"https://wytmatch%20foo%20bar/"}; 366 RunTest(ASCIIToUTF16("https://wytmatch foo bar"), string16(), false, 367 results_3, arraysize(results_3)); 368 369 // Test the corner case where a user has fully typed a previously visited 370 // intranet address and is now hitting ctrl-enter, which completes to a 371 // previously unvisted internet domain. 372 const std::string binky_results[] = {"http://binky/"}; 373 const std::string binky_com_results[] = { 374 "http://www.binky.com/", 375 "http://binky/", 376 }; 377 RunTest(ASCIIToUTF16("binky"), string16(), false, binky_results, 378 arraysize(binky_results)); 379 RunTest(ASCIIToUTF16("binky"), ASCIIToUTF16("com"), false, binky_com_results, 380 arraysize(binky_com_results)); 381 382 // Test the related case where a user has fully typed a previously visited 383 // intranet address and is now hitting ctrl-enter, which completes to a 384 // previously visted internet domain. 385 const std::string winky_results[] = { 386 "http://winky/", 387 "http://www.winky.com/", 388 }; 389 const std::string winky_com_results[] = { 390 "http://www.winky.com/", 391 "http://winky/", 392 }; 393 RunTest(ASCIIToUTF16("winky"), string16(), false, winky_results, 394 arraysize(winky_results)); 395 RunTest(ASCIIToUTF16("winky"), ASCIIToUTF16("com"), false, winky_com_results, 396 arraysize(winky_com_results)); 397} 398 399TEST_F(HistoryURLProviderTest, Fixup) { 400 // Test for various past crashes we've had. 401 RunTest(ASCIIToUTF16("\\"), string16(), false, NULL, 0); 402 RunTest(ASCIIToUTF16("#"), string16(), false, NULL, 0); 403 RunTest(ASCIIToUTF16("%20"), string16(), false, NULL, 0); 404 RunTest(WideToUTF16(L"\uff65@s"), string16(), false, NULL, 0); 405 RunTest(WideToUTF16(L"\u2015\u2015@ \uff7c"), string16(), false, NULL, 0); 406 407 // Fixing up "file:" should result in an inline autocomplete offset of just 408 // after "file:", not just after "file://". 409 const string16 input_1(ASCIIToUTF16("file:")); 410 const std::string fixup_1[] = {"file:///C:/foo.txt"}; 411 ASSERT_NO_FATAL_FAILURE(RunTest(input_1, string16(), false, fixup_1, 412 arraysize(fixup_1))); 413 EXPECT_EQ(input_1.length(), matches_.front().inline_autocomplete_offset); 414 415 // Fixing up "http:/" should result in an inline autocomplete offset of just 416 // after "http:/", not just after "http:". 417 const string16 input_2(ASCIIToUTF16("http:/")); 418 const std::string fixup_2[] = { 419 "http://bogussite.com/a", 420 "http://bogussite.com/b", 421 "http://bogussite.com/c", 422 }; 423 ASSERT_NO_FATAL_FAILURE(RunTest(input_2, string16(), false, fixup_2, 424 arraysize(fixup_2))); 425 EXPECT_EQ(input_2.length(), matches_.front().inline_autocomplete_offset); 426 427 // Adding a TLD to a small number like "56" should result in "www.56.com" 428 // rather than "0.0.0.56.com". 429 const std::string fixup_3[] = {"http://www.56.com/"}; 430 RunTest(ASCIIToUTF16("56"), ASCIIToUTF16("com"), true, fixup_3, 431 arraysize(fixup_3)); 432 433 // An input looks like a IP address like "127.0.0.1" should result in 434 // "http://127.0.0.1/". 435 const std::string fixup_4[] = {"http://127.0.0.1/"}; 436 RunTest(ASCIIToUTF16("127.0.0.1"), string16(), false, fixup_4, 437 arraysize(fixup_4)); 438 439 // An number "17173" should result in "http://www.17173.com/" in db. 440 const std::string fixup_5[] = {"http://www.17173.com/"}; 441 RunTest(ASCIIToUTF16("17173"), string16(), false, fixup_5, 442 arraysize(fixup_5)); 443} 444 445TEST_F(HistoryURLProviderTest, AdjustOffset) { 446 RunAdjustOffsetTest(WideToUTF16(L"http://www.\uAD50\uC721"), 13); 447 RunAdjustOffsetTest(ASCIIToUTF16("http://spaces.com/path%20with%20spa"), 31); 448 RunAdjustOffsetTest(ASCIIToUTF16("http://ms/c++ s"), 15); 449} 450 451TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) { 452 // Ensure that we will still produce matches for navigation when there is no 453 // database. 454 std::string navigation_1[] = {"http://test.com/"}; 455 RunTest(ASCIIToUTF16("test.com"), string16(), false, navigation_1, 456 arraysize(navigation_1)); 457 458 std::string navigation_2[] = {"http://slash/"}; 459 RunTest(ASCIIToUTF16("slash"), string16(), false, navigation_2, 460 arraysize(navigation_2)); 461 462 RunTest(ASCIIToUTF16("this is a query"), string16(), false, NULL, 0); 463} 464