history_url_provider_unittest.cc revision 3345a6884c488ff3a535c2c9acdd33d74b37e311
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "base/file_util.h" 6#include "base/message_loop.h" 7#include "base/path_service.h" 8#include "base/string_util.h" 9#include "base/utf_string_conversions.h" 10#include "chrome/browser/autocomplete/history_url_provider.h" 11#include "chrome/browser/chrome_thread.h" 12#include "chrome/browser/history/history.h" 13#include "chrome/test/testing_profile.h" 14#include "testing/gtest/include/gtest/gtest.h" 15#if defined(OS_MACOSX) 16#include "base/mac_util.h" 17#endif 18 19using base::Time; 20using base::TimeDelta; 21 22struct TestURLInfo { 23 std::string url; 24 std::string title; 25 int visit_count; 26 int typed_count; 27} test_db[] = { 28 {"http://www.google.com/", "Google", 3, 3}, 29 30 // High-quality pages should get a host synthesized as a lower-quality match. 31 {"http://slashdot.org/favorite_page.html", "Favorite page", 200, 100}, 32 33 // Less popular pages should have hosts synthesized as higher-quality 34 // matches. 35 {"http://kerneltrap.org/not_very_popular.html", "Less popular", 4, 0}, 36 37 // Unpopular pages should not appear in the results at all. 38 {"http://freshmeat.net/unpopular.html", "Unpopular", 1, 1}, 39 40 // If a host has a match, we should pick it up during host synthesis. 41 {"http://news.google.com/?ned=us&topic=n", "Google News - U.S.", 2, 2}, 42 {"http://news.google.com/", "Google News", 1, 1}, 43 44 // Suggested short URLs must be "good enough" and must match user input. 45 {"http://foo.com/", "Dir", 5, 5}, 46 {"http://foo.com/dir/", "Dir", 2, 2}, 47 {"http://foo.com/dir/another/", "Dir", 5, 1}, 48 {"http://foo.com/dir/another/again/", "Dir", 10, 0}, 49 {"http://foo.com/dir/another/again/myfile.html", "File", 10, 2}, 50 51 // We throw in a lot of extra URLs here to make sure we're testing the 52 // history database's query, not just the autocomplete provider. 53 {"http://startest.com/y/a", "A", 2, 2}, 54 {"http://startest.com/y/b", "B", 5, 2}, 55 {"http://startest.com/x/c", "C", 5, 2}, 56 {"http://startest.com/x/d", "D", 5, 5}, 57 {"http://startest.com/y/e", "E", 4, 2}, 58 {"http://startest.com/y/f", "F", 3, 2}, 59 {"http://startest.com/y/g", "G", 3, 2}, 60 {"http://startest.com/y/h", "H", 3, 2}, 61 {"http://startest.com/y/i", "I", 3, 2}, 62 {"http://startest.com/y/j", "J", 3, 2}, 63 {"http://startest.com/y/k", "K", 3, 2}, 64 {"http://startest.com/y/l", "L", 3, 2}, 65 {"http://startest.com/y/m", "M", 3, 2}, 66 67 // A file: URL is useful for testing that fixup does the right thing w.r.t. 68 // the number of trailing slashes on the user's input. 69 {"file:///C:/foo.txt", "", 2, 2}, 70 71 // Results with absurdly high typed_counts so that very generic queries like 72 // "http" will give consistent results even if more data is added above. 73 {"http://bogussite.com/a", "Bogus A", 10002, 10000}, 74 {"http://bogussite.com/b", "Bogus B", 10001, 10000}, 75 {"http://bogussite.com/c", "Bogus C", 10000, 10000}, 76 77 // Domain name with number. 78 {"http://www.17173.com/", "Domain with number", 3, 3}, 79 80 // URLs to test exact-matching behavior. 81 {"http://go/", "Intranet URL", 1, 1}, 82 {"http://gooey/", "Intranet URL 2", 5, 5}, 83 84 // URLs for testing offset adjustment. 85 {"http://www.\xEA\xB5\x90\xEC\x9C\xA1.kr/", "Korean", 2, 2}, 86 {"http://spaces.com/path%20with%20spaces/foo.html", "Spaces", 2, 2}, 87 {"http://ms/c++%20style%20guide", "Style guide", 2, 2}, 88 89 // URLs for testing ctrl-enter behavior. 90 {"http://binky/", "Intranet binky", 2, 2}, 91 {"http://winky/", "Intranet winky", 2, 2}, 92 {"http://www.winky.com/", "Internet winky", 5, 0}, 93}; 94 95class HistoryURLProviderTest : public testing::Test, 96 public ACProviderListener { 97 public: 98 HistoryURLProviderTest() 99 : ui_thread_(ChromeThread::UI, &message_loop_), 100 file_thread_(ChromeThread::FILE, &message_loop_) {} 101 102 // ACProviderListener 103 virtual void OnProviderUpdate(bool updated_matches); 104 105 protected: 106 // testing::Test 107 virtual void SetUp() { 108 SetUpImpl(false); 109 } 110 virtual void TearDown(); 111 112 // Does the real setup. 113 void SetUpImpl(bool no_db); 114 115 // Fills test data into the history system. 116 void FillData(); 117 118 // Runs an autocomplete query on |text| and checks to see that the returned 119 // results' destination URLs match those provided. 120 void RunTest(const std::wstring text, 121 const std::wstring& desired_tld, 122 bool prevent_inline_autocomplete, 123 const std::string* expected_urls, 124 size_t num_results); 125 126 void RunAdjustOffsetTest(const std::wstring text, size_t expected_offset); 127 128 MessageLoopForUI message_loop_; 129 ChromeThread ui_thread_; 130 ChromeThread file_thread_; 131 ACMatches matches_; 132 scoped_ptr<TestingProfile> profile_; 133 HistoryService* history_service_; 134 135 private: 136 scoped_refptr<HistoryURLProvider> autocomplete_; 137}; 138 139class HistoryURLProviderTestNoDB : public HistoryURLProviderTest { 140 protected: 141 virtual void SetUp() { 142 SetUpImpl(true); 143 } 144}; 145 146void HistoryURLProviderTest::OnProviderUpdate(bool updated_matches) { 147 if (autocomplete_->done()) 148 MessageLoop::current()->Quit(); 149} 150 151void HistoryURLProviderTest::SetUpImpl(bool no_db) { 152 profile_.reset(new TestingProfile()); 153 profile_->CreateHistoryService(true, no_db); 154 history_service_ = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); 155 156 autocomplete_ = new HistoryURLProvider(this, profile_.get(), L"en-US,en,ko"); 157 158 FillData(); 159} 160 161void HistoryURLProviderTest::TearDown() { 162 autocomplete_ = NULL; 163} 164 165void HistoryURLProviderTest::FillData() { 166 // All visits are a long time ago (some tests require this since we do some 167 // special logic for things visited very recently). Note that this time must 168 // be more recent than the "archived history" threshold for the data to go 169 // into the main database. 170 // 171 // TODO(brettw) It would be nice if we could test this behavior, in which 172 // case the time would be specifed in the test_db structure. 173 Time visit_time = Time::Now() - TimeDelta::FromDays(80); 174 175 for (size_t i = 0; i < arraysize(test_db); ++i) { 176 const TestURLInfo& cur = test_db[i]; 177 const GURL current_url(cur.url); 178 history_service_->AddPageWithDetails(current_url, UTF8ToUTF16(cur.title), 179 cur.visit_count, cur.typed_count, 180 visit_time, false, 181 history::SOURCE_BROWSED); 182 } 183} 184 185void HistoryURLProviderTest::RunTest(const std::wstring text, 186 const std::wstring& desired_tld, 187 bool prevent_inline_autocomplete, 188 const std::string* expected_urls, 189 size_t num_results) { 190 AutocompleteInput input(text, desired_tld, prevent_inline_autocomplete, 191 false, false); 192 autocomplete_->Start(input, false); 193 if (!autocomplete_->done()) 194 MessageLoop::current()->Run(); 195 196 matches_ = autocomplete_->matches(); 197 ASSERT_EQ(num_results, matches_.size()) << "Input text: " << text 198 << "\nTLD: \"" << desired_tld << "\""; 199 for (size_t i = 0; i < num_results; ++i) 200 EXPECT_EQ(expected_urls[i], matches_[i].destination_url.spec()); 201} 202 203void HistoryURLProviderTest::RunAdjustOffsetTest(const std::wstring text, 204 size_t expected_offset) { 205 AutocompleteInput input(text, std::wstring(), false, false, false); 206 autocomplete_->Start(input, false); 207 if (!autocomplete_->done()) 208 MessageLoop::current()->Run(); 209 210 matches_ = autocomplete_->matches(); 211 ASSERT_GE(matches_.size(), 1U) << "Input text: " << text; 212 EXPECT_EQ(expected_offset, matches_[0].inline_autocomplete_offset); 213} 214 215TEST_F(HistoryURLProviderTest, PromoteShorterURLs) { 216 // Test that hosts get synthesized below popular pages. 217 const std::string expected_nonsynth[] = { 218 "http://slashdot.org/favorite_page.html", 219 "http://slashdot.org/", 220 }; 221 RunTest(L"slash", std::wstring(), true, expected_nonsynth, 222 arraysize(expected_nonsynth)); 223 224 // Test that hosts get synthesized above less popular pages. 225 const std::string expected_synth[] = { 226 "http://kerneltrap.org/", 227 "http://kerneltrap.org/not_very_popular.html", 228 }; 229 RunTest(L"kernel", std::wstring(), true, expected_synth, 230 arraysize(expected_synth)); 231 232 // Test that unpopular pages are ignored completely. 233 RunTest(L"fresh", std::wstring(), true, NULL, 0); 234 235 // Test that if we have a synthesized host that matches a suggestion, they 236 // get combined into one. 237 const std::string expected_combine[] = { 238 "http://news.google.com/", 239 "http://news.google.com/?ned=us&topic=n", 240 }; 241 ASSERT_NO_FATAL_FAILURE(RunTest(L"news", std::wstring(), true, 242 expected_combine, arraysize(expected_combine))); 243 // The title should also have gotten set properly on the host for the 244 // synthesized one, since it was also in the results. 245 EXPECT_EQ(std::wstring(L"Google News"), matches_.front().description); 246 247 // Test that short URL matching works correctly as the user types more 248 // (several tests): 249 // The entry for foo.com is the best of all five foo.com* entries. 250 const std::string short_1[] = { 251 "http://foo.com/", 252 "http://foo.com/dir/another/again/myfile.html", 253 "http://foo.com/dir/", 254 }; 255 RunTest(L"foo", std::wstring(), true, short_1, arraysize(short_1)); 256 257 // When the user types the whole host, make sure we don't get two results for 258 // it. 259 const std::string short_2[] = { 260 "http://foo.com/", 261 "http://foo.com/dir/another/again/myfile.html", 262 "http://foo.com/dir/", 263 "http://foo.com/dir/another/", 264 }; 265 RunTest(L"foo.com", std::wstring(), true, short_2, arraysize(short_2)); 266 RunTest(L"foo.com/", std::wstring(), true, short_2, arraysize(short_2)); 267 268 // The filename is the second best of the foo.com* entries, but there is a 269 // shorter URL that's "good enough". The host doesn't match the user input 270 // and so should not appear. 271 const std::string short_3[] = { 272 "http://foo.com/d", 273 "http://foo.com/dir/another/", 274 "http://foo.com/dir/another/again/myfile.html", 275 "http://foo.com/dir/", 276 }; 277 RunTest(L"foo.com/d", std::wstring(), true, short_3, arraysize(short_3)); 278 279 // We shouldn't promote shorter URLs than the best if they're not good 280 // enough. 281 const std::string short_4[] = { 282 "http://foo.com/dir/another/a", 283 "http://foo.com/dir/another/again/myfile.html", 284 "http://foo.com/dir/another/again/", 285 }; 286 RunTest(L"foo.com/dir/another/a", std::wstring(), true, short_4, 287 arraysize(short_4)); 288 289 // Exact matches should always be best no matter how much more another match 290 // has been typed. 291 const std::string short_5a[] = { 292 "http://gooey/", 293 "http://www.google.com/", 294 }; 295 const std::string short_5b[] = { 296 "http://go/", 297 "http://gooey/", 298 "http://www.google.com/", 299 }; 300 RunTest(L"g", std::wstring(), false, short_5a, arraysize(short_5a)); 301 RunTest(L"go", std::wstring(), false, short_5b, arraysize(short_5b)); 302} 303 304TEST_F(HistoryURLProviderTest, CullRedirects) { 305 // URLs we will be using, plus the visit counts they will initially get 306 // (the redirect set below will also increment the visit counts). We want 307 // the results to be in A,B,C order. Note also that our visit counts are 308 // all high enough so that domain synthesizing won't get triggered. 309 struct RedirectCase { 310 const char* url; 311 int count; 312 }; 313 static const RedirectCase redirect[] = { 314 {"http://redirects/A", 30}, 315 {"http://redirects/B", 20}, 316 {"http://redirects/C", 10} 317 }; 318 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(redirect); i++) { 319 history_service_->AddPageWithDetails(GURL(redirect[i].url), 320 UTF8ToUTF16("Title"), 321 redirect[i].count, redirect[i].count, 322 Time::Now(), false, 323 history::SOURCE_BROWSED); 324 } 325 326 // Create a B->C->A redirect chain, but set the visit counts such that they 327 // will appear in A,B,C order in the results. The autocomplete query will 328 // search for the most recent visit when looking for redirects, so this will 329 // be found even though the previous visits had no redirects. 330 history::RedirectList redirects_to_a; 331 redirects_to_a.push_back(GURL(redirect[1].url)); 332 redirects_to_a.push_back(GURL(redirect[2].url)); 333 redirects_to_a.push_back(GURL(redirect[0].url)); 334 history_service_->AddPage(GURL(redirect[0].url), NULL, 0, GURL(), 335 PageTransition::TYPED, redirects_to_a, 336 history::SOURCE_BROWSED, true); 337 338 // Because all the results are part of a redirect chain with other results, 339 // all but the first one (A) should be culled. We should get the default 340 // "what you typed" result, plus this one. 341 const std::wstring typing(L"http://redirects/"); 342 const std::string expected_results[] = { 343 WideToUTF8(typing), 344 redirect[0].url}; 345 RunTest(typing, std::wstring(), true, expected_results, 346 arraysize(expected_results)); 347} 348 349TEST_F(HistoryURLProviderTest, WhatYouTyped) { 350 // Make sure we suggest a What You Typed match at the right times. 351 RunTest(L"wytmatch", std::wstring(), false, NULL, 0); 352 RunTest(L"wytmatch foo bar", std::wstring(), false, NULL, 0); 353 RunTest(L"wytmatch+foo+bar", std::wstring(), false, NULL, 0); 354 RunTest(L"wytmatch+foo+bar.com", std::wstring(), false, NULL, 0); 355 356 const std::string results_1[] = {"http://www.wytmatch.com/"}; 357 RunTest(L"wytmatch", L"com", false, results_1, arraysize(results_1)); 358 359 const std::string results_2[] = {"http://wytmatch%20foo%20bar/"}; 360 RunTest(L"http://wytmatch foo bar", std::wstring(), false, results_2, 361 arraysize(results_2)); 362 363 const std::string results_3[] = {"https://wytmatch%20foo%20bar/"}; 364 RunTest(L"https://wytmatch foo bar", std::wstring(), false, results_3, 365 arraysize(results_3)); 366 367 // Test the corner case where a user has fully typed a previously visited 368 // intranet address and is now hitting ctrl-enter, which completes to a 369 // previously unvisted internet domain. 370 const std::string binky_results[] = {"http://binky/"}; 371 const std::string binky_com_results[] = { 372 "http://www.binky.com/", 373 "http://binky/", 374 }; 375 RunTest(L"binky", std::wstring(), false, binky_results, 376 arraysize(binky_results)); 377 RunTest(L"binky", L"com", false, binky_com_results, 378 arraysize(binky_com_results)); 379 380 // Test the related case where a user has fully typed a previously visited 381 // intranet address and is now hitting ctrl-enter, which completes to a 382 // previously visted internet domain. 383 const std::string winky_results[] = { 384 "http://winky/", 385 "http://www.winky.com/", 386 }; 387 const std::string winky_com_results[] = { 388 "http://www.winky.com/", 389 "http://winky/", 390 }; 391 RunTest(L"winky", std::wstring(), false, winky_results, 392 arraysize(winky_results)); 393 RunTest(L"winky", L"com", false, winky_com_results, 394 arraysize(winky_com_results)); 395} 396 397TEST_F(HistoryURLProviderTest, Fixup) { 398 // Test for various past crashes we've had. 399 RunTest(L"\\", std::wstring(), false, NULL, 0); 400 RunTest(L"#", std::wstring(), false, NULL, 0); 401 RunTest(L"%20", std::wstring(), false, NULL, 0); 402 RunTest(L"\uff65@s", std::wstring(), false, NULL, 0); 403 RunTest(L"\u2015\u2015@ \uff7c", std::wstring(), false, NULL, 0); 404 405 // Fixing up "file:" should result in an inline autocomplete offset of just 406 // after "file:", not just after "file://". 407 const std::wstring input_1(L"file:"); 408 const std::string fixup_1[] = {"file:///C:/foo.txt"}; 409 ASSERT_NO_FATAL_FAILURE(RunTest(input_1, std::wstring(), false, fixup_1, 410 arraysize(fixup_1))); 411 EXPECT_EQ(input_1.length(), matches_.front().inline_autocomplete_offset); 412 413 // Fixing up "http:/" should result in an inline autocomplete offset of just 414 // after "http:/", not just after "http:". 415 const std::wstring input_2(L"http:/"); 416 const std::string fixup_2[] = { 417 "http://bogussite.com/a", 418 "http://bogussite.com/b", 419 "http://bogussite.com/c", 420 }; 421 ASSERT_NO_FATAL_FAILURE(RunTest(input_2, std::wstring(), false, fixup_2, 422 arraysize(fixup_2))); 423 EXPECT_EQ(input_2.length(), matches_.front().inline_autocomplete_offset); 424 425 // Adding a TLD to a small number like "56" should result in "www.56.com" 426 // rather than "0.0.0.56.com". 427 const std::string fixup_3[] = {"http://www.56.com/"}; 428 RunTest(L"56", L"com", true, fixup_3, arraysize(fixup_3)); 429 430 // An input looks like a IP address like "127.0.0.1" should result in 431 // "http://127.0.0.1/". 432 const std::string fixup_4[] = {"http://127.0.0.1/"}; 433 RunTest(L"127.0.0.1", std::wstring(), false, fixup_4, arraysize(fixup_4)); 434 435 // An number "17173" should result in "http://www.17173.com/" in db. 436 const std::string fixup_5[] = {"http://www.17173.com/"}; 437 RunTest(L"17173", std::wstring(), false, fixup_5, arraysize(fixup_5)); 438} 439 440TEST_F(HistoryURLProviderTest, AdjustOffset) { 441 RunAdjustOffsetTest(L"http://www.\uAD50\uC721", 13); 442 RunAdjustOffsetTest(L"http://spaces.com/path%20with%20spa", 31); 443 RunAdjustOffsetTest(L"http://ms/c++ s", 15); 444} 445 446TEST_F(HistoryURLProviderTestNoDB, NavigateWithoutDB) { 447 // Ensure that we will still produce matches for navigation when there is no 448 // database. 449 std::string navigation_1[] = {"http://test.com/"}; 450 RunTest(L"test.com", std::wstring(), false, navigation_1, 451 arraysize(navigation_1)); 452 453 std::string navigation_2[] = {"http://slash/"}; 454 RunTest(L"slash", std::wstring(), false, navigation_2, 455 arraysize(navigation_2)); 456 457 RunTest(L"this is a query", std::wstring(), false, NULL, 0); 458} 459