history_url_provider.cc revision 9ab5563a3196760eb381d102cbb2bc0f7abc6a50
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_url_provider.h" 6 7#include <algorithm> 8 9#include "base/basictypes.h" 10#include "base/bind.h" 11#include "base/command_line.h" 12#include "base/message_loop/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/prefs/pref_service.h" 15#include "base/strings/string_util.h" 16#include "base/strings/utf_string_conversions.h" 17#include "chrome/browser/autocomplete/autocomplete_match.h" 18#include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 19#include "chrome/browser/history/history_backend.h" 20#include "chrome/browser/history/history_database.h" 21#include "chrome/browser/history/history_service.h" 22#include "chrome/browser/history/history_service_factory.h" 23#include "chrome/browser/history/history_types.h" 24#include "chrome/browser/net/url_fixer_upper.h" 25#include "chrome/browser/omnibox/omnibox_field_trial.h" 26#include "chrome/browser/profiles/profile.h" 27#include "chrome/browser/search_engines/template_url_service.h" 28#include "chrome/browser/search_engines/template_url_service_factory.h" 29#include "chrome/common/chrome_switches.h" 30#include "chrome/common/pref_names.h" 31#include "chrome/common/url_constants.h" 32#include "net/base/net_util.h" 33#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 34#include "url/gurl.h" 35#include "url/url_parse.h" 36#include "url/url_util.h" 37 38namespace { 39 40// If |create_if_necessary| is true, ensures that |matches| contains an 41// entry for |info|, creating a new such entry if necessary (using 42// |input_location| and |match_in_scheme|). 43// 44// If |promote| is true, this also ensures the entry is the first element in 45// |matches|, moving or adding it to the front as appropriate. When |promote| 46// is false, existing matches are left in place, and newly added matches are 47// placed at the back. 48// 49// It's OK to call this function with both |create_if_necessary| and 50// |promote| false, in which case we'll do nothing. 51void CreateOrPromoteMatch(const history::URLRow& info, 52 size_t input_location, 53 bool match_in_scheme, 54 history::HistoryMatches* matches, 55 bool create_if_necessary, 56 bool promote) { 57 // |matches| may already have an entry for this. 58 for (history::HistoryMatches::iterator i(matches->begin()); 59 i != matches->end(); ++i) { 60 if (i->url_info.url() == info.url()) { 61 // Rotate it to the front if the caller wishes. 62 if (promote) 63 std::rotate(matches->begin(), i, i + 1); 64 return; 65 } 66 } 67 68 if (!create_if_necessary) 69 return; 70 71 // No entry, so create one. 72 history::HistoryMatch match(info, input_location, match_in_scheme, true); 73 if (promote) 74 matches->push_front(match); 75 else 76 matches->push_back(match); 77} 78 79// Given the user's |input| and a |match| created from it, reduce the match's 80// URL to just a host. If this host still matches the user input, return it. 81// Returns the empty string on failure. 82GURL ConvertToHostOnly(const history::HistoryMatch& match, 83 const string16& input) { 84 // See if we should try to do host-only suggestions for this URL. Nonstandard 85 // schemes means there's no authority section, so suggesting the host name 86 // is useless. File URLs are standard, but host suggestion is not useful for 87 // them either. 88 const GURL& url = match.url_info.url(); 89 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) 90 return GURL(); 91 92 // Transform to a host-only match. Bail if the host no longer matches the 93 // user input (e.g. because the user typed more than just a host). 94 GURL host = url.GetWithEmptyPath(); 95 if ((host.spec().length() < (match.input_location + input.length()))) 96 return GURL(); // User typing is longer than this host suggestion. 97 98 const string16 spec = UTF8ToUTF16(host.spec()); 99 if (spec.compare(match.input_location, input.length(), input)) 100 return GURL(); // User typing is no longer a prefix. 101 102 return host; 103} 104 105// Acts like the > operator for URLInfo classes. 106bool CompareHistoryMatch(const history::HistoryMatch& a, 107 const history::HistoryMatch& b) { 108 // A URL that has been typed at all is better than one that has never been 109 // typed. (Note "!"s on each side) 110 if (!a.url_info.typed_count() != !b.url_info.typed_count()) 111 return a.url_info.typed_count() > b.url_info.typed_count(); 112 113 // Innermost matches (matches after any scheme or "www.") are better than 114 // non-innermost matches. 115 if (a.innermost_match != b.innermost_match) 116 return a.innermost_match; 117 118 // URLs that have been typed more often are better. 119 if (a.url_info.typed_count() != b.url_info.typed_count()) 120 return a.url_info.typed_count() > b.url_info.typed_count(); 121 122 // For URLs that have each been typed once, a host (alone) is better than a 123 // page inside. 124 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) 125 return a.IsHostOnly(); 126 127 // URLs that have been visited more often are better. 128 if (a.url_info.visit_count() != b.url_info.visit_count()) 129 return a.url_info.visit_count() > b.url_info.visit_count(); 130 131 // URLs that have been visited more recently are better. 132 return a.url_info.last_visit() > b.url_info.last_visit(); 133} 134 135// Extracts typed_count, visit_count, and last_visited time from the 136// URLRow and puts them in the additional info field of the |match| 137// for display in about:omnibox. 138void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, 139 AutocompleteMatch* match) { 140 match->RecordAdditionalInfo("typed count", info.typed_count()); 141 match->RecordAdditionalInfo("visit count", info.visit_count()); 142 match->RecordAdditionalInfo("last visit", info.last_visit()); 143} 144 145} // namespace 146 147// ----------------------------------------------------------------- 148// SearchTermsDataSnapshot 149 150// Implementation of SearchTermsData that takes a snapshot of another 151// SearchTermsData by copying all the responses to the different getters into 152// member strings, then returning those strings when its own getters are called. 153// This will typically be constructed on the UI thread from 154// UIThreadSearchTermsData but is subsequently safe to use on any thread. 155class SearchTermsDataSnapshot : public SearchTermsData { 156 public: 157 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); 158 virtual ~SearchTermsDataSnapshot(); 159 160 virtual std::string GoogleBaseURLValue() const OVERRIDE; 161 virtual std::string GetApplicationLocale() const OVERRIDE; 162 virtual string16 GetRlzParameterValue() const OVERRIDE; 163 virtual std::string GetSearchClient() const OVERRIDE; 164 virtual std::string InstantEnabledParam() const OVERRIDE; 165 virtual std::string InstantExtendedEnabledParam() const OVERRIDE; 166 virtual std::string NTPIsThemedParam() const OVERRIDE; 167 168 private: 169 std::string google_base_url_value_; 170 std::string application_locale_; 171 string16 rlz_parameter_value_; 172 std::string search_client_; 173 std::string instant_enabled_param_; 174 std::string instant_extended_enabled_param_; 175 std::string ntp_is_themed_param_; 176 177 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); 178}; 179 180SearchTermsDataSnapshot::SearchTermsDataSnapshot( 181 const SearchTermsData& search_terms_data) 182 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), 183 application_locale_(search_terms_data.GetApplicationLocale()), 184 rlz_parameter_value_(search_terms_data.GetRlzParameterValue()), 185 search_client_(search_terms_data.GetSearchClient()), 186 instant_enabled_param_(search_terms_data.InstantEnabledParam()), 187 instant_extended_enabled_param_( 188 search_terms_data.InstantExtendedEnabledParam()), 189 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()) {} 190 191SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { 192} 193 194std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { 195 return google_base_url_value_; 196} 197 198std::string SearchTermsDataSnapshot::GetApplicationLocale() const { 199 return application_locale_; 200} 201 202string16 SearchTermsDataSnapshot::GetRlzParameterValue() const { 203 return rlz_parameter_value_; 204} 205 206std::string SearchTermsDataSnapshot::GetSearchClient() const { 207 return search_client_; 208} 209 210std::string SearchTermsDataSnapshot::InstantEnabledParam() const { 211 return instant_enabled_param_; 212} 213 214std::string SearchTermsDataSnapshot::InstantExtendedEnabledParam() const { 215 return instant_extended_enabled_param_; 216} 217 218std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { 219 return ntp_is_themed_param_; 220} 221 222// ----------------------------------------------------------------- 223// HistoryURLProvider 224 225// These ugly magic numbers will go away once we switch all scoring 226// behavior (including URL-what-you-typed) to HistoryQuick provider. 227const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; 228const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; 229const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; 230const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; 231 232// VisitClassifier is used to classify the type of visit to a particular url. 233class HistoryURLProvider::VisitClassifier { 234 public: 235 enum Type { 236 INVALID, // Navigations to the URL are not allowed. 237 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but 238 // which is known to refer to a visited intranet host. 239 VISITED, // The site has been previously visited. 240 }; 241 242 VisitClassifier(HistoryURLProvider* provider, 243 const AutocompleteInput& input, 244 history::URLDatabase* db); 245 246 // Returns the type of visit for the specified input. 247 Type type() const { return type_; } 248 249 // Returns the URLRow for the visit. 250 const history::URLRow& url_row() const { return url_row_; } 251 252 private: 253 HistoryURLProvider* provider_; 254 history::URLDatabase* db_; 255 Type type_; 256 history::URLRow url_row_; 257 258 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); 259}; 260 261HistoryURLProvider::VisitClassifier::VisitClassifier( 262 HistoryURLProvider* provider, 263 const AutocompleteInput& input, 264 history::URLDatabase* db) 265 : provider_(provider), 266 db_(db), 267 type_(INVALID) { 268 const GURL& url = input.canonicalized_url(); 269 // Detect email addresses. These cases will look like "http://user@site/", 270 // and because the history backend strips auth creds, we'll get a bogus exact 271 // match below if the user has visited "site". 272 if (!url.is_valid() || 273 ((input.type() == AutocompleteInput::UNKNOWN) && 274 input.parts().username.is_nonempty() && 275 !input.parts().password.is_nonempty() && 276 !input.parts().path.is_nonempty())) 277 return; 278 279 if (db_->GetRowForURL(url, &url_row_)) { 280 type_ = VISITED; 281 return; 282 } 283 284 if (provider_->CanFindIntranetURL(db_, input)) { 285 // The user typed an intranet hostname that they've visited (albeit with a 286 // different port and/or path) before. 287 url_row_ = history::URLRow(url); 288 type_ = UNVISITED_INTRANET; 289 } 290} 291 292HistoryURLProviderParams::HistoryURLProviderParams( 293 const AutocompleteInput& input, 294 bool trim_http, 295 const std::string& languages, 296 TemplateURL* default_search_provider, 297 const SearchTermsData& search_terms_data) 298 : message_loop(base::MessageLoop::current()), 299 input(input), 300 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), 301 trim_http(trim_http), 302 failed(false), 303 languages(languages), 304 dont_suggest_exact_input(false), 305 default_search_provider(default_search_provider ? 306 new TemplateURL(default_search_provider->profile(), 307 default_search_provider->data()) : NULL), 308 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { 309} 310 311HistoryURLProviderParams::~HistoryURLProviderParams() { 312} 313 314HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, 315 Profile* profile) 316 : HistoryProvider(listener, profile, 317 AutocompleteProvider::TYPE_HISTORY_URL), 318 params_(NULL), 319 cull_redirects_( 320 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() || 321 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrialExperimentGroup()), 322 create_shorter_match_( 323 !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || 324 !OmniboxFieldTrial:: 325 InHUPCreateShorterMatchFieldTrialExperimentGroup()), 326 search_url_database_(true) { 327} 328 329// static 330AutocompleteMatch HistoryURLProvider::SuggestExactInput( 331 AutocompleteProvider* provider, 332 const AutocompleteInput& input, 333 bool trim_http) { 334 AutocompleteMatch match(provider, 0, false, 335 AutocompleteMatchType::URL_WHAT_YOU_TYPED); 336 337 const GURL& url = input.canonicalized_url(); 338 if (url.is_valid()) { 339 match.destination_url = url; 340 341 // Trim off "http://" if the user didn't type it. 342 // NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to 343 // strip the scheme as we need to know the offset so we can adjust the 344 // |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have 345 // slightly different behavior as well (the latter will strip even without 346 // two slashes after the scheme). 347 string16 display_string(provider->StringForURLDisplay(url, false, false)); 348 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; 349 match.fill_into_edit = 350 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, 351 display_string); 352 // NOTE: Don't set match.inline_autocomplete_offset (to allow inline 353 // autocompletion) here, it's surprising and annoying. 354 355 // Try to highlight "innermost" match location. If we fix up "w" into 356 // "www.w.com", we want to highlight the fifth character, not the first. 357 // This relies on match.destination_url being the non-prefix-trimmed version 358 // of match.contents. 359 match.contents = display_string; 360 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( 361 UTF8ToUTF16(match.destination_url.spec()), input.text()); 362 363 // We only want to trim the HTTP scheme off our match if the user didn't 364 // explicitly type "http:". 365 DCHECK(!trim_http || !HasHTTPScheme(input.text())); 366 367 // Because of the vagaries of GURL, it's possible for match.destination_url 368 // to not contain the user's input at all (so |best_prefix| is NULL). 369 // In this case don't mark anything as a match. 370 const size_t match_location = (best_prefix == NULL) ? 371 string16::npos : best_prefix->prefix.length() - offset; 372 AutocompleteMatch::ClassifyLocationInString(match_location, 373 input.text().length(), 374 match.contents.length(), 375 ACMatchClassification::URL, 376 &match.contents_class); 377 378 match.is_history_what_you_typed_match = true; 379 } 380 381 return match; 382} 383 384void HistoryURLProvider::Start(const AutocompleteInput& input, 385 bool minimal_changes) { 386 // NOTE: We could try hard to do less work in the |minimal_changes| case 387 // here; some clever caching would let us reuse the raw matches from the 388 // history DB without re-querying. However, we'd still have to go back to 389 // the history thread to mark these up properly, and if pass 2 is currently 390 // running, we'd need to wait for it to return to the main thread before 391 // doing this (we can't just write new data for it to read due to thread 392 // safety issues). At that point it's just as fast, and easier, to simply 393 // re-run the query from scratch and ignore |minimal_changes|. 394 395 // Cancel any in-progress query. 396 Stop(false); 397 398 RunAutocompletePasses(input, true); 399} 400 401void HistoryURLProvider::Stop(bool clear_cached_results) { 402 done_ = true; 403 404 if (params_) 405 params_->cancel_flag.Set(); 406} 407 408// Called on the history thread. 409void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, 410 history::URLDatabase* db, 411 HistoryURLProviderParams* params) { 412 // We may get called with a NULL database if it couldn't be properly 413 // initialized. 414 if (!db) { 415 params->failed = true; 416 } else if (!params->cancel_flag.IsSet()) { 417 base::TimeTicks beginning_time = base::TimeTicks::Now(); 418 419 DoAutocomplete(backend, db, params); 420 421 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", 422 base::TimeTicks::Now() - beginning_time); 423 } 424 425 // Return the results (if any) to the main thread. 426 params->message_loop->PostTask(FROM_HERE, base::Bind( 427 &HistoryURLProvider::QueryComplete, this, params)); 428} 429 430// Used by both autocomplete passes, and therefore called on multiple different 431// threads (though not simultaneously). 432void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, 433 history::URLDatabase* db, 434 HistoryURLProviderParams* params) { 435 VisitClassifier classifier(this, params->input, db); 436 // Create a What You Typed match, which we'll need below. 437 // 438 // We display this to the user when there's a reasonable chance they actually 439 // care: 440 // * Their input can be opened as a URL, and 441 // * We parsed the input as a URL, or it starts with an explicit "http:" or 442 // "https:". 443 // that is when their input can be opened as a URL. 444 // Otherwise, this is just low-quality noise. In the cases where we've parsed 445 // as UNKNOWN, we'll still show an accidental search infobar if need be. 446 bool have_what_you_typed_match = 447 params->input.canonicalized_url().is_valid() && 448 (params->input.type() != AutocompleteInput::QUERY) && 449 ((params->input.type() != AutocompleteInput::UNKNOWN) || 450 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || 451 !params->trim_http || 452 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); 453 AutocompleteMatch what_you_typed_match( 454 SuggestExactInput(this, params->input, params->trim_http)); 455 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 456 457 // Get the matching URLs from the DB 458 history::URLRows url_matches; 459 history::HistoryMatches history_matches; 460 461 if (search_url_database_) { 462 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); 463 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); 464 ++i) { 465 if (params->cancel_flag.IsSet()) 466 return; // Canceled in the middle of a query, give up. 467 // We only need kMaxMatches results in the end, but before we 468 // get there we need to promote lower-quality matches that are 469 // prefixes of higher-quality matches, and remove lower-quality 470 // redirects. So we ask for more results than we need, of every 471 // prefix type, in hopes this will give us far more than enough 472 // to work with. CullRedirects() will then reduce the list to 473 // the best kMaxMatches results. 474 db->AutocompleteForPrefix( 475 UTF16ToUTF8(i->prefix + params->input.text()), 476 kMaxMatches * 2, 477 (backend == NULL), 478 &url_matches); 479 for (history::URLRows::const_iterator j(url_matches.begin()); 480 j != url_matches.end(); ++j) { 481 const URLPrefix* best_prefix = 482 URLPrefix::BestURLPrefix(UTF8ToUTF16(j->url().spec()), string16()); 483 DCHECK(best_prefix != NULL); 484 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), 485 i->num_components == 0, 486 i->num_components >= best_prefix->num_components)); 487 } 488 } 489 } 490 491 // Create sorted list of suggestions. 492 CullPoorMatches(&history_matches, params); 493 SortMatches(&history_matches); 494 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, 495 what_you_typed_match, &history_matches); 496 497 // Try to promote a match as an exact/inline autocomplete match. This also 498 // moves it to the front of |history_matches|, so skip over it when 499 // converting the rest of the matches. 500 size_t first_match = 1; 501 size_t exact_suggestion = 0; 502 // Checking |is_history_what_you_typed_match| tells us whether 503 // SuggestExactInput() succeeded in constructing a valid match. 504 if (what_you_typed_match.is_history_what_you_typed_match && 505 (!backend || !params->dont_suggest_exact_input) && 506 FixupExactSuggestion(db, params->input, classifier, &what_you_typed_match, 507 &history_matches)) { 508 // Got an exact match for the user's input. Treat it as the best match 509 // regardless of the input type. 510 exact_suggestion = 1; 511 params->matches.push_back(what_you_typed_match); 512 } else if (params->prevent_inline_autocomplete || 513 history_matches.empty() || 514 !PromoteMatchForInlineAutocomplete(params, history_matches.front())) { 515 // Failed to promote any URLs for inline autocompletion. Use the What You 516 // Typed match, if we have it. 517 first_match = 0; 518 if (have_what_you_typed_match) 519 params->matches.push_back(what_you_typed_match); 520 } 521 522 // This is the end of the synchronous pass. 523 if (!backend) 524 return; 525 // If search_url_database_ is false, we shouldn't have scheduled a second 526 // pass. 527 DCHECK(search_url_database_); 528 529 // Determine relevancy of highest scoring match, if any. 530 int relevance = -1; 531 for (ACMatches::const_iterator it = params->matches.begin(); 532 it != params->matches.end(); ++it) { 533 relevance = std::max(relevance, it->relevance); 534 } 535 536 if (cull_redirects_) { 537 // Remove redirects and trim list to size. We want to provide up to 538 // kMaxMatches results plus the What You Typed result, if it was added to 539 // |history_matches| above. 540 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); 541 } else { 542 // Simply trim the list to size. 543 if (history_matches.size() > kMaxMatches + exact_suggestion) 544 history_matches.resize(kMaxMatches + exact_suggestion); 545 } 546 547 // Convert the history matches to autocomplete matches. 548 for (size_t i = first_match; i < history_matches.size(); ++i) { 549 const history::HistoryMatch& match = history_matches[i]; 550 DCHECK(!have_what_you_typed_match || 551 (match.url_info.url() != 552 GURL(params->matches.front().destination_url))); 553 // If we've assigned a score already, all later matches score one 554 // less than the previous match. 555 relevance = (relevance > 0) ? (relevance - 1) : 556 CalculateRelevance(NORMAL, history_matches.size() - 1 - i); 557 AutocompleteMatch ac_match = HistoryMatchToACMatch(params, match, 558 NORMAL, relevance); 559 params->matches.push_back(ac_match); 560 } 561} 562 563// Called on the main thread when the query is complete. 564void HistoryURLProvider::QueryComplete( 565 HistoryURLProviderParams* params_gets_deleted) { 566 // Ensure |params_gets_deleted| gets deleted on exit. 567 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); 568 569 // If the user hasn't already started another query, clear our member pointer 570 // so we can't write into deleted memory. 571 if (params_ == params_gets_deleted) 572 params_ = NULL; 573 574 // Don't send responses for queries that have been canceled. 575 if (params->cancel_flag.IsSet()) 576 return; // Already set done_ when we canceled, no need to set it again. 577 578 // Don't modify |matches_| if the query failed, since it might have a default 579 // match in it, whereas |params->matches| will be empty. 580 if (!params->failed) { 581 matches_.swap(params->matches); 582 UpdateStarredStateOfMatches(); 583 } 584 585 done_ = true; 586 listener_->OnProviderUpdate(true); 587} 588 589HistoryURLProvider::~HistoryURLProvider() { 590 // Note: This object can get leaked on shutdown if there are pending 591 // requests on the database (which hold a reference to us). Normally, these 592 // messages get flushed for each thread. We do a round trip from main, to 593 // history, back to main while holding a reference. If the main thread 594 // completes before the history thread, the message to delegate back to the 595 // main thread will not run and the reference will leak. Therefore, don't do 596 // anything on destruction. 597} 598 599int HistoryURLProvider::CalculateRelevance(MatchType match_type, 600 size_t match_number) const { 601 switch (match_type) { 602 case INLINE_AUTOCOMPLETE: 603 return kScoreForBestInlineableResult; 604 605 case UNVISITED_INTRANET: 606 return kScoreForUnvisitedIntranetResult; 607 608 case WHAT_YOU_TYPED: 609 return kScoreForWhatYouTypedResult; 610 611 default: // NORMAL 612 return kBaseScoreForNonInlineableResult + 613 static_cast<int>(match_number); 614 } 615} 616 617void HistoryURLProvider::RunAutocompletePasses( 618 const AutocompleteInput& input, 619 bool fixup_input_and_run_pass_1) { 620 matches_.clear(); 621 622 if ((input.type() == AutocompleteInput::INVALID) || 623 (input.type() == AutocompleteInput::FORCED_QUERY)) 624 return; 625 626 // Create a match for exactly what the user typed. This will only be used as 627 // a fallback in case we can't get the history service or URL DB; otherwise, 628 // we'll run this again in DoAutocomplete() and use that result instead. 629 const bool trim_http = !HasHTTPScheme(input.text()); 630 // Don't do this for queries -- while we can sometimes mark up a match for 631 // this, it's not what the user wants, and just adds noise. 632 if ((input.type() != AutocompleteInput::QUERY) && 633 input.canonicalized_url().is_valid()) { 634 AutocompleteMatch what_you_typed(SuggestExactInput(this, input, trim_http)); 635 what_you_typed.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 636 matches_.push_back(what_you_typed); 637 } 638 639 // We'll need the history service to run both passes, so try to obtain it. 640 if (!profile_) 641 return; 642 HistoryService* const history_service = 643 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 644 if (!history_service) 645 return; 646 647 // Get the default search provider and search terms data now since we have to 648 // retrieve these on the UI thread, and the second pass runs on the history 649 // thread. |template_url_service| can be NULL when testing. 650 TemplateURLService* template_url_service = 651 TemplateURLServiceFactory::GetForProfile(profile_); 652 TemplateURL* default_search_provider = template_url_service ? 653 template_url_service->GetDefaultSearchProvider() : NULL; 654 UIThreadSearchTermsData data(profile_); 655 656 // Create the data structure for the autocomplete passes. We'll save this off 657 // onto the |params_| member for later deletion below if we need to run pass 658 // 2. 659 std::string languages(languages_); 660 if (languages.empty()) { 661 languages = 662 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 663 } 664 scoped_ptr<HistoryURLProviderParams> params( 665 new HistoryURLProviderParams(input, trim_http, languages, 666 default_search_provider, data)); 667 668 params->prevent_inline_autocomplete = 669 PreventInlineAutocomplete(input); 670 671 if (fixup_input_and_run_pass_1) { 672 // Do some fixup on the user input before matching against it, so we provide 673 // good results for local file paths, input with spaces, etc. 674 if (!FixupUserInput(¶ms->input)) 675 return; 676 677 // Pass 1: Get the in-memory URL database, and use it to find and promote 678 // the inline autocomplete match, if any. 679 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 680 // url_db can be NULL if it hasn't finished initializing (or failed to 681 // initialize). In this case all we can do is fall back on the second 682 // pass. 683 // 684 // TODO(pkasting): We should just block here until this loads. Any time 685 // someone unloads the history backend, we'll get inconsistent inline 686 // autocomplete behavior here. 687 if (url_db) { 688 DoAutocomplete(NULL, url_db, params.get()); 689 // params->matches now has the matches we should expose to the provider. 690 // Pass 2 expects a "clean slate" set of matches. 691 matches_.clear(); 692 matches_.swap(params->matches); 693 UpdateStarredStateOfMatches(); 694 } 695 } 696 697 // Pass 2: Ask the history service to call us back on the history thread, 698 // where we can read the full on-disk DB. 699 if (search_url_database_ && 700 (input.matches_requested() == AutocompleteInput::ALL_MATCHES)) { 701 done_ = false; 702 params_ = params.release(); // This object will be destroyed in 703 // QueryComplete() once we're done with it. 704 history_service->ScheduleAutocomplete(this, params_); 705 } 706} 707 708bool HistoryURLProvider::FixupExactSuggestion( 709 history::URLDatabase* db, 710 const AutocompleteInput& input, 711 const VisitClassifier& classifier, 712 AutocompleteMatch* match, 713 history::HistoryMatches* matches) const { 714 DCHECK(match != NULL); 715 DCHECK(matches != NULL); 716 717 MatchType type = INLINE_AUTOCOMPLETE; 718 switch (classifier.type()) { 719 case VisitClassifier::INVALID: 720 return false; 721 case VisitClassifier::UNVISITED_INTRANET: 722 type = UNVISITED_INTRANET; 723 break; 724 default: 725 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); 726 // We have data for this match, use it. 727 match->deletable = true; 728 match->description = classifier.url_row().title(); 729 RecordAdditionalInfoFromUrlRow(classifier.url_row(), match); 730 AutocompleteMatch::ClassifyMatchInString( 731 input.text(), 732 classifier.url_row().title(), 733 ACMatchClassification::NONE, &match->description_class); 734 if (!classifier.url_row().typed_count()) { 735 // If we reach here, we must be in the second pass, and we must not have 736 // this row's data available during the first pass. That means we 737 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to 738 // maintain the ordering between passes consistent, we need to score it 739 // the same way here. 740 type = CanFindIntranetURL(db, input) ? 741 UNVISITED_INTRANET : WHAT_YOU_TYPED; 742 } 743 break; 744 } 745 746 const GURL& url = match->destination_url; 747 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 748 // If the what-you-typed result looks like a single word (which can be 749 // interpreted as an intranet address) followed by a pound sign ("#"), 750 // leave the score for the url-what-you-typed result as is. It will be 751 // outscored by a search query from the SearchProvider. This test fixes 752 // cases such as "c#" and "c# foo" where the user has visited an intranet 753 // site "c". We want the search-what-you-typed score to beat the 754 // URL-what-you-typed score in this case. Most of the below test tries to 755 // make sure that this code does not trigger if the user did anything to 756 // indicate the desired match is a URL. For instance, "c/# foo" will not 757 // pass the test because that will be classified as input type URL. The 758 // parsed.CountCharactersBefore() in the test looks for the presence of a 759 // reference fragment in the URL by checking whether the position differs 760 // included the delimiter (pound sign) versus not including the delimiter. 761 // (One cannot simply check url.ref() because it will not distinguish 762 // between the input "c" and the input "c#", both of which will have empty 763 // reference fragments.) 764 if ((type == UNVISITED_INTRANET) && 765 (input.type() != AutocompleteInput::URL) && 766 url.username().empty() && url.password().empty() && url.port().empty() && 767 (url.path() == "/") && url.query().empty() && 768 (parsed.CountCharactersBefore(url_parse::Parsed::REF, true) != 769 parsed.CountCharactersBefore(url_parse::Parsed::REF, false))) { 770 return false; 771 } 772 773 match->relevance = CalculateRelevance(type, 0); 774 775 // If there are any other matches, then don't promote this match here, in 776 // hopes the caller will be able to inline autocomplete a better suggestion. 777 // DoAutocomplete() will fall back on this match if inline autocompletion 778 // fails. This matches how we react to never-visited URL inputs in the non- 779 // intranet case. 780 if (type == UNVISITED_INTRANET && !matches->empty()) 781 return false; 782 783 // Put it on the front of the HistoryMatches for redirect culling. 784 CreateOrPromoteMatch(classifier.url_row(), string16::npos, false, matches, 785 true, true); 786 return true; 787} 788 789bool HistoryURLProvider::CanFindIntranetURL( 790 history::URLDatabase* db, 791 const AutocompleteInput& input) const { 792 // Normally passing the first two conditions below ought to guarantee the 793 // third condition, but because FixupUserInput() can run and modify the 794 // input's text and parts between Parse() and here, it seems better to be 795 // paranoid and check. 796 if ((input.type() != AutocompleteInput::UNKNOWN) || 797 !LowerCaseEqualsASCII(input.scheme(), chrome::kHttpScheme) || 798 !input.parts().host.is_nonempty()) 799 return false; 800 const std::string host(UTF16ToUTF8( 801 input.text().substr(input.parts().host.begin, input.parts().host.len))); 802 const size_t registry_length = 803 net::registry_controlled_domains::GetRegistryLength( 804 host, 805 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 806 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 807 return registry_length == 0 && db->IsTypedHost(host); 808} 809 810bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( 811 HistoryURLProviderParams* params, 812 const history::HistoryMatch& match) { 813 // Promote the first match if it's been typed at least n times, where n == 1 814 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar 815 // for these long URLs because it's less likely that users will want to visit 816 // them again. Even though we don't increment the typed_count for pasted-in 817 // URLs, if the user manually edits the URL or types some long thing in by 818 // hand, we wouldn't want to immediately start autocompleting it. 819 if (!match.url_info.typed_count() || 820 ((match.url_info.typed_count() == 1) && 821 !match.IsHostOnly())) 822 return false; 823 824 // In the case where the user has typed "foo.com" and visited (but not typed) 825 // "foo/", and the input is "foo", we can reach here for "foo.com" during the 826 // first pass but have the second pass suggest the exact input as a better 827 // URL. Since we need both passes to agree, and since during the first pass 828 // there's no way to know about "foo/", make reaching this point prevent any 829 // future pass from suggesting the exact input as a better match. 830 if (params) { 831 params->dont_suggest_exact_input = true; 832 params->matches.push_back(HistoryMatchToACMatch(params, match, 833 INLINE_AUTOCOMPLETE, CalculateRelevance(INLINE_AUTOCOMPLETE, 0))); 834 } 835 return true; 836} 837 838// See if a shorter version of the best match should be created, and if so place 839// it at the front of |matches|. This can suggest history URLs that are 840// prefixes of the best match (if they've been visited enough, compared to the 841// best match), or create host-only suggestions even when they haven't been 842// visited before: if the user visited http://example.com/asdf once, we'll 843// suggest http://example.com/ even if they've never been to it. 844void HistoryURLProvider::PromoteOrCreateShorterSuggestion( 845 history::URLDatabase* db, 846 const HistoryURLProviderParams& params, 847 bool have_what_you_typed_match, 848 const AutocompleteMatch& what_you_typed_match, 849 history::HistoryMatches* matches) { 850 if (matches->empty()) 851 return; // No matches, nothing to do. 852 853 // Determine the base URL from which to search, and whether that URL could 854 // itself be added as a match. We can add the base iff it's not "effectively 855 // the same" as any "what you typed" match. 856 const history::HistoryMatch& match = matches->front(); 857 GURL search_base = ConvertToHostOnly(match, params.input.text()); 858 bool can_add_search_base_to_matches = !have_what_you_typed_match; 859 if (search_base.is_empty()) { 860 // Search from what the user typed when we couldn't reduce the best match 861 // to a host. Careful: use a substring of |match| here, rather than the 862 // first match in |params|, because they might have different prefixes. If 863 // the user typed "google.com", |what_you_typed_match| will hold 864 // "http://google.com/", but |match| might begin with 865 // "http://www.google.com/". 866 // TODO: this should be cleaned up, and is probably incorrect for IDN. 867 std::string new_match = match.url_info.url().possibly_invalid_spec(). 868 substr(0, match.input_location + params.input.text().length()); 869 search_base = GURL(new_match); 870 // TODO(mrossetti): There is a degenerate case where the following may 871 // cause a failure: http://www/~someword/fubar.html. Diagnose. 872 // See: http://crbug.com/50101 873 if (search_base.is_empty()) 874 return; // Can't construct a valid URL from which to start a search. 875 } else if (!can_add_search_base_to_matches) { 876 can_add_search_base_to_matches = 877 (search_base != what_you_typed_match.destination_url); 878 } 879 if (search_base == match.url_info.url()) 880 return; // Couldn't shorten |match|, so no range of URLs to search over. 881 882 // Search the DB for short URLs between our base and |match|. 883 history::URLRow info(search_base); 884 bool promote = true; 885 // A short URL is only worth suggesting if it's been visited at least a third 886 // as often as the longer URL. 887 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; 888 // For stability between the in-memory and on-disk autocomplete passes, when 889 // the long URL has been typed before, only suggest shorter URLs that have 890 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL 891 // (which hasn't been typed) that the in-memory pass doesn't know about, 892 // thereby making the top match, and thus the behavior of inline 893 // autocomplete, unstable. 894 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; 895 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), 896 match.url_info.url().possibly_invalid_spec(), min_visit_count, 897 min_typed_count, can_add_search_base_to_matches, &info)) { 898 if (!can_add_search_base_to_matches) 899 return; // Couldn't find anything and can't add the search base, bail. 900 901 // Try to get info on the search base itself. Promote it to the top if the 902 // original best match isn't good enough to autocomplete. 903 db->GetRowForURL(search_base, &info); 904 promote = match.url_info.typed_count() <= 1; 905 } 906 907 // Promote or add the desired URL to the list of matches. 908 bool ensure_can_inline = 909 promote && PromoteMatchForInlineAutocomplete(NULL, match); 910 CreateOrPromoteMatch(info, match.input_location, match.match_in_scheme, 911 matches, create_shorter_match_, promote); 912 if (ensure_can_inline) { 913 // If |match| was inline-autocompletable and we're promoting something to 914 // replace it, make sure the promoted item is also inline-autocompletable. 915 // Setting the typed_count to 2 is sufficient to guarantee this (and is safe 916 // because by this point all sorting has already happened and the only thing 917 // checking the typed_count will be PromoteMatchForInlineAutocomplete()). 918 // 919 // We have to do this here rather than changing |info| before calling 920 // EnsureMatchPresent() because if EnsureMatchPresent() merely moves an 921 // existing match to the front, it will ignore the typed_count in |info|. 922 // But we set |ensure_can_inline| above because |match| is a reference and 923 // thus checking it here would examine the wrong match. 924 matches->front().url_info.set_typed_count(2); 925 } 926} 927 928void HistoryURLProvider::SortMatches(history::HistoryMatches* matches) const { 929 // Sort by quality, best first. 930 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); 931 932 // Remove duplicate matches (caused by the search string appearing in one of 933 // the prefixes as well as after it). Consider the following scenario: 934 // 935 // User has visited "http://http.com" once and "http://htaccess.com" twice. 936 // User types "http". The autocomplete search with prefix "http://" returns 937 // the first host, while the search with prefix "" returns both hosts. Now 938 // we sort them into rank order: 939 // http://http.com (innermost_match) 940 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) 941 // http://http.com (!innermost_match, url_info.visit_count == 1) 942 // 943 // The above scenario tells us we can't use std::unique(), since our 944 // duplicates are not always sequential. It also tells us we should remove 945 // the lower-quality duplicate(s), since otherwise the returned results won't 946 // be ordered correctly. This is easy to do: we just always remove the later 947 // element of a duplicate pair. 948 // Be careful! Because the vector contents may change as we remove elements, 949 // we use an index instead of an iterator in the outer loop, and don't 950 // precalculate the ending position. 951 for (size_t i = 0; i < matches->size(); ++i) { 952 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); 953 j != matches->end(); ) { 954 if ((*matches)[i].url_info.url() == j->url_info.url()) 955 j = matches->erase(j); 956 else 957 ++j; 958 } 959 } 960} 961 962void HistoryURLProvider::CullPoorMatches( 963 history::HistoryMatches* matches, 964 HistoryURLProviderParams* params) const { 965 const base::Time& threshold(history::AutocompleteAgeThreshold()); 966 for (history::HistoryMatches::iterator i(matches->begin()); 967 i != matches->end(); ) { 968 if (RowQualifiesAsSignificant(i->url_info, threshold) && 969 !(params->default_search_provider && 970 params->default_search_provider->IsSearchURLUsingTermsData( 971 i->url_info.url(), *params->search_terms_data.get()))) { 972 ++i; 973 } else { 974 i = matches->erase(i); 975 } 976 } 977} 978 979void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, 980 history::HistoryMatches* matches, 981 size_t max_results) const { 982 for (size_t source = 0; 983 (source < matches->size()) && (source < max_results); ) { 984 const GURL& url = (*matches)[source].url_info.url(); 985 // TODO(brettw) this should go away when everything uses GURL. 986 history::RedirectList redirects; 987 backend->GetMostRecentRedirectsFrom(url, &redirects); 988 if (!redirects.empty()) { 989 // Remove all but the first occurrence of any of these redirects in the 990 // search results. We also must add the URL we queried for, since it may 991 // not be the first match and we'd want to remove it. 992 // 993 // For example, when A redirects to B and our matches are [A, X, B], 994 // we'll get B as the redirects from, and we want to remove the second 995 // item of that pair, removing B. If A redirects to B and our matches are 996 // [B, X, A], we'll want to remove A instead. 997 redirects.push_back(url); 998 source = RemoveSubsequentMatchesOf(matches, source, redirects); 999 } else { 1000 // Advance to next item. 1001 source++; 1002 } 1003 } 1004 1005 if (matches->size() > max_results) 1006 matches->resize(max_results); 1007} 1008 1009size_t HistoryURLProvider::RemoveSubsequentMatchesOf( 1010 history::HistoryMatches* matches, 1011 size_t source_index, 1012 const std::vector<GURL>& remove) const { 1013 size_t next_index = source_index + 1; // return value = item after source 1014 1015 // Find the first occurrence of any URL in the redirect chain. We want to 1016 // keep this one since it is rated the highest. 1017 history::HistoryMatches::iterator first(std::find_first_of( 1018 matches->begin(), matches->end(), remove.begin(), remove.end(), 1019 history::HistoryMatch::EqualsGURL)); 1020 DCHECK(first != matches->end()) << "We should have always found at least the " 1021 "original URL."; 1022 1023 // Find any following occurrences of any URL in the redirect chain, these 1024 // should be deleted. 1025 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, 1026 matches->end(), remove.begin(), remove.end(), 1027 history::HistoryMatch::EqualsGURL)); 1028 next != matches->end(); next = std::find_first_of(next, matches->end(), 1029 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { 1030 // Remove this item. When we remove an item before the source index, we 1031 // need to shift it to the right and remember that so we can return it. 1032 next = matches->erase(next); 1033 if (static_cast<size_t>(next - matches->begin()) < next_index) 1034 --next_index; 1035 } 1036 return next_index; 1037} 1038 1039AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( 1040 HistoryURLProviderParams* params, 1041 const history::HistoryMatch& history_match, 1042 MatchType match_type, 1043 int relevance) { 1044 const history::URLRow& info = history_match.url_info; 1045 AutocompleteMatch match(this, relevance, 1046 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); 1047 match.typed_count = info.typed_count(); 1048 match.destination_url = info.url(); 1049 DCHECK(match.destination_url.is_valid()); 1050 size_t inline_autocomplete_offset = 1051 history_match.input_location + params->input.text().length(); 1052 std::string languages = (match_type == WHAT_YOU_TYPED) ? 1053 std::string() : params->languages; 1054 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 1055 ~((params->trim_http && !history_match.match_in_scheme) ? 1056 0 : net::kFormatUrlOmitHTTP); 1057 match.fill_into_edit = 1058 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 1059 net::FormatUrl(info.url(), languages, format_types, 1060 net::UnescapeRule::SPACES, NULL, NULL, 1061 &inline_autocomplete_offset)); 1062 if (!params->prevent_inline_autocomplete) 1063 match.inline_autocomplete_offset = inline_autocomplete_offset; 1064 DCHECK((match.inline_autocomplete_offset == string16::npos) || 1065 (match.inline_autocomplete_offset <= match.fill_into_edit.length())); 1066 1067 size_t match_start = history_match.input_location; 1068 match.contents = net::FormatUrl(info.url(), languages, 1069 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); 1070 if ((match_start != string16::npos) && 1071 (inline_autocomplete_offset != string16::npos) && 1072 (inline_autocomplete_offset != match_start)) { 1073 DCHECK(inline_autocomplete_offset > match_start); 1074 AutocompleteMatch::ClassifyLocationInString(match_start, 1075 inline_autocomplete_offset - match_start, match.contents.length(), 1076 ACMatchClassification::URL, &match.contents_class); 1077 } else { 1078 AutocompleteMatch::ClassifyLocationInString(string16::npos, 0, 1079 match.contents.length(), ACMatchClassification::URL, 1080 &match.contents_class); 1081 } 1082 match.description = info.title(); 1083 AutocompleteMatch::ClassifyMatchInString(params->input.text(), 1084 info.title(), 1085 ACMatchClassification::NONE, 1086 &match.description_class); 1087 RecordAdditionalInfoFromUrlRow(info, &match); 1088 return match; 1089} 1090