history_url_provider.cc revision e5d81f57cb97b3b6b7fccc9c5610d21eb81db09d
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_url_provider.h" 6 7#include <algorithm> 8 9#include "base/basictypes.h" 10#include "base/bind.h" 11#include "base/command_line.h" 12#include "base/message_loop/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/prefs/pref_service.h" 15#include "base/strings/string_util.h" 16#include "base/strings/utf_string_conversions.h" 17#include "base/time/time.h" 18#include "chrome/browser/autocomplete/autocomplete_match.h" 19#include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 20#include "chrome/browser/autocomplete/autocomplete_result.h" 21#include "chrome/browser/history/history_backend.h" 22#include "chrome/browser/history/history_database.h" 23#include "chrome/browser/history/history_service.h" 24#include "chrome/browser/history/history_service_factory.h" 25#include "chrome/browser/history/history_types.h" 26#include "chrome/browser/history/in_memory_url_index_types.h" 27#include "chrome/browser/history/scored_history_match.h" 28#include "chrome/browser/omnibox/omnibox_field_trial.h" 29#include "chrome/browser/profiles/profile.h" 30#include "chrome/browser/search_engines/template_url_service.h" 31#include "chrome/browser/search_engines/template_url_service_factory.h" 32#include "chrome/common/chrome_switches.h" 33#include "chrome/common/net/url_fixer_upper.h" 34#include "chrome/common/pref_names.h" 35#include "chrome/common/url_constants.h" 36#include "net/base/net_util.h" 37#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 38#include "url/gurl.h" 39#include "url/url_parse.h" 40#include "url/url_util.h" 41 42namespace { 43 44// If |create_if_necessary| is true, ensures that |matches| contains an 45// entry for |info|, creating a new such entry if necessary (using 46// |input_location| and |match_in_scheme|). 47// 48// If |promote| is true, this also ensures the entry is the first element in 49// |matches|, moving or adding it to the front as appropriate. When |promote| 50// is false, existing matches are left in place, and newly added matches are 51// placed at the back. 52// 53// It's OK to call this function with both |create_if_necessary| and 54// |promote| false, in which case we'll do nothing. 55// 56// Returns whether the match exists regardless if it was promoted/created. 57bool CreateOrPromoteMatch(const history::URLRow& info, 58 size_t input_location, 59 bool match_in_scheme, 60 history::HistoryMatches* matches, 61 bool create_if_necessary, 62 bool promote) { 63 // |matches| may already have an entry for this. 64 for (history::HistoryMatches::iterator i(matches->begin()); 65 i != matches->end(); ++i) { 66 if (i->url_info.url() == info.url()) { 67 // Rotate it to the front if the caller wishes. 68 if (promote) 69 std::rotate(matches->begin(), i, i + 1); 70 return true; 71 } 72 } 73 74 if (!create_if_necessary) 75 return false; 76 77 // No entry, so create one. 78 history::HistoryMatch match(info, input_location, match_in_scheme, true); 79 if (promote) 80 matches->push_front(match); 81 else 82 matches->push_back(match); 83 84 return true; 85} 86 87// Given the user's |input| and a |match| created from it, reduce the match's 88// URL to just a host. If this host still matches the user input, return it. 89// Returns the empty string on failure. 90GURL ConvertToHostOnly(const history::HistoryMatch& match, 91 const base::string16& input) { 92 // See if we should try to do host-only suggestions for this URL. Nonstandard 93 // schemes means there's no authority section, so suggesting the host name 94 // is useless. File URLs are standard, but host suggestion is not useful for 95 // them either. 96 const GURL& url = match.url_info.url(); 97 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) 98 return GURL(); 99 100 // Transform to a host-only match. Bail if the host no longer matches the 101 // user input (e.g. because the user typed more than just a host). 102 GURL host = url.GetWithEmptyPath(); 103 if ((host.spec().length() < (match.input_location + input.length()))) 104 return GURL(); // User typing is longer than this host suggestion. 105 106 const base::string16 spec = base::UTF8ToUTF16(host.spec()); 107 if (spec.compare(match.input_location, input.length(), input)) 108 return GURL(); // User typing is no longer a prefix. 109 110 return host; 111} 112 113// Acts like the > operator for URLInfo classes. 114bool CompareHistoryMatch(const history::HistoryMatch& a, 115 const history::HistoryMatch& b) { 116 // A promoted match is better than non-promoted. 117 if (a.promoted != b.promoted) 118 return a.promoted; 119 120 // A URL that has been typed at all is better than one that has never been 121 // typed. (Note "!"s on each side) 122 if (!a.url_info.typed_count() != !b.url_info.typed_count()) 123 return a.url_info.typed_count() > b.url_info.typed_count(); 124 125 // Innermost matches (matches after any scheme or "www.") are better than 126 // non-innermost matches. 127 if (a.innermost_match != b.innermost_match) 128 return a.innermost_match; 129 130 // URLs that have been typed more often are better. 131 if (a.url_info.typed_count() != b.url_info.typed_count()) 132 return a.url_info.typed_count() > b.url_info.typed_count(); 133 134 // For URLs that have each been typed once, a host (alone) is better than a 135 // page inside. 136 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) 137 return a.IsHostOnly(); 138 139 // URLs that have been visited more often are better. 140 if (a.url_info.visit_count() != b.url_info.visit_count()) 141 return a.url_info.visit_count() > b.url_info.visit_count(); 142 143 // URLs that have been visited more recently are better. 144 return a.url_info.last_visit() > b.url_info.last_visit(); 145} 146 147// Sorts and dedups the given list of matches. 148void SortAndDedupMatches(history::HistoryMatches* matches) { 149 // Sort by quality, best first. 150 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); 151 152 // Remove duplicate matches (caused by the search string appearing in one of 153 // the prefixes as well as after it). Consider the following scenario: 154 // 155 // User has visited "http://http.com" once and "http://htaccess.com" twice. 156 // User types "http". The autocomplete search with prefix "http://" returns 157 // the first host, while the search with prefix "" returns both hosts. Now 158 // we sort them into rank order: 159 // http://http.com (innermost_match) 160 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) 161 // http://http.com (!innermost_match, url_info.visit_count == 1) 162 // 163 // The above scenario tells us we can't use std::unique(), since our 164 // duplicates are not always sequential. It also tells us we should remove 165 // the lower-quality duplicate(s), since otherwise the returned results won't 166 // be ordered correctly. This is easy to do: we just always remove the later 167 // element of a duplicate pair. 168 // Be careful! Because the vector contents may change as we remove elements, 169 // we use an index instead of an iterator in the outer loop, and don't 170 // precalculate the ending position. 171 for (size_t i = 0; i < matches->size(); ++i) { 172 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); 173 j != matches->end(); ) { 174 if ((*matches)[i].url_info.url() == j->url_info.url()) 175 j = matches->erase(j); 176 else 177 ++j; 178 } 179 } 180} 181 182// Extracts typed_count, visit_count, and last_visited time from the 183// URLRow and puts them in the additional info field of the |match| 184// for display in about:omnibox. 185void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, 186 AutocompleteMatch* match) { 187 match->RecordAdditionalInfo("typed count", info.typed_count()); 188 match->RecordAdditionalInfo("visit count", info.visit_count()); 189 match->RecordAdditionalInfo("last visit", info.last_visit()); 190} 191 192// Calculates a new relevance score applying half-life time decaying to |count| 193// using |time_since_last_visit| and |score_buckets|. 194// This function will never return a score higher than |undecayed_relevance|. 195// In other words, it can only demote the old score. 196double CalculateRelevanceUsingScoreBuckets( 197 const HUPScoringParams::ScoreBuckets& score_buckets, 198 const base::TimeDelta& time_since_last_visit, 199 int undecayed_relevance, 200 int count) { 201 // Back off if above relevance cap. 202 if ((score_buckets.relevance_cap() != -1) && 203 (undecayed_relevance >= score_buckets.relevance_cap())) 204 return undecayed_relevance; 205 206 // Time based decay using half-life time. 207 double decayed_count = count; 208 if (decayed_count > 0) 209 decayed_count *= score_buckets.HalfLifeTimeDecay(time_since_last_visit); 210 211 // Find a threshold where decayed_count >= bucket. 212 const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket = NULL; 213 for (size_t i = 0; i < score_buckets.buckets().size(); ++i) { 214 score_bucket = &score_buckets.buckets()[i]; 215 if (decayed_count >= score_bucket->first) 216 break; // Buckets are in descending order, so we can ignore the rest. 217 } 218 219 return (score_bucket && (undecayed_relevance > score_bucket->second)) ? 220 score_bucket->second : undecayed_relevance; 221} 222 223} // namespace 224 225// ----------------------------------------------------------------- 226// SearchTermsDataSnapshot 227 228// Implementation of SearchTermsData that takes a snapshot of another 229// SearchTermsData by copying all the responses to the different getters into 230// member strings, then returning those strings when its own getters are called. 231// This will typically be constructed on the UI thread from 232// UIThreadSearchTermsData but is subsequently safe to use on any thread. 233class SearchTermsDataSnapshot : public SearchTermsData { 234 public: 235 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); 236 virtual ~SearchTermsDataSnapshot(); 237 238 virtual std::string GoogleBaseURLValue() const OVERRIDE; 239 virtual std::string GetApplicationLocale() const OVERRIDE; 240 virtual base::string16 GetRlzParameterValue() const OVERRIDE; 241 virtual std::string GetSearchClient() const OVERRIDE; 242 virtual std::string NTPIsThemedParam() const OVERRIDE; 243 244 private: 245 std::string google_base_url_value_; 246 std::string application_locale_; 247 base::string16 rlz_parameter_value_; 248 std::string search_client_; 249 std::string ntp_is_themed_param_; 250 251 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); 252}; 253 254SearchTermsDataSnapshot::SearchTermsDataSnapshot( 255 const SearchTermsData& search_terms_data) 256 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), 257 application_locale_(search_terms_data.GetApplicationLocale()), 258 rlz_parameter_value_(search_terms_data.GetRlzParameterValue()), 259 search_client_(search_terms_data.GetSearchClient()), 260 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()) {} 261 262SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { 263} 264 265std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { 266 return google_base_url_value_; 267} 268 269std::string SearchTermsDataSnapshot::GetApplicationLocale() const { 270 return application_locale_; 271} 272 273base::string16 SearchTermsDataSnapshot::GetRlzParameterValue() const { 274 return rlz_parameter_value_; 275} 276 277std::string SearchTermsDataSnapshot::GetSearchClient() const { 278 return search_client_; 279} 280 281std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { 282 return ntp_is_themed_param_; 283} 284 285// ----------------------------------------------------------------- 286// HistoryURLProvider 287 288// These ugly magic numbers will go away once we switch all scoring 289// behavior (including URL-what-you-typed) to HistoryQuick provider. 290const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; 291const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; 292const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; 293const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; 294 295// VisitClassifier is used to classify the type of visit to a particular url. 296class HistoryURLProvider::VisitClassifier { 297 public: 298 enum Type { 299 INVALID, // Navigations to the URL are not allowed. 300 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but 301 // which is known to refer to a visited intranet host. 302 VISITED, // The site has been previously visited. 303 }; 304 305 VisitClassifier(HistoryURLProvider* provider, 306 const AutocompleteInput& input, 307 history::URLDatabase* db); 308 309 // Returns the type of visit for the specified input. 310 Type type() const { return type_; } 311 312 // Returns the URLRow for the visit. 313 const history::URLRow& url_row() const { return url_row_; } 314 315 private: 316 HistoryURLProvider* provider_; 317 history::URLDatabase* db_; 318 Type type_; 319 history::URLRow url_row_; 320 321 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); 322}; 323 324HistoryURLProvider::VisitClassifier::VisitClassifier( 325 HistoryURLProvider* provider, 326 const AutocompleteInput& input, 327 history::URLDatabase* db) 328 : provider_(provider), 329 db_(db), 330 type_(INVALID) { 331 const GURL& url = input.canonicalized_url(); 332 // Detect email addresses. These cases will look like "http://user@site/", 333 // and because the history backend strips auth creds, we'll get a bogus exact 334 // match below if the user has visited "site". 335 if (!url.is_valid() || 336 ((input.type() == AutocompleteInput::UNKNOWN) && 337 input.parts().username.is_nonempty() && 338 !input.parts().password.is_nonempty() && 339 !input.parts().path.is_nonempty())) 340 return; 341 342 if (db_->GetRowForURL(url, &url_row_)) { 343 type_ = VISITED; 344 return; 345 } 346 347 if (provider_->CanFindIntranetURL(db_, input)) { 348 // The user typed an intranet hostname that they've visited (albeit with a 349 // different port and/or path) before. 350 url_row_ = history::URLRow(url); 351 type_ = UNVISITED_INTRANET; 352 } 353} 354 355HistoryURLProviderParams::HistoryURLProviderParams( 356 const AutocompleteInput& input, 357 bool trim_http, 358 const std::string& languages, 359 TemplateURL* default_search_provider, 360 const SearchTermsData& search_terms_data) 361 : message_loop(base::MessageLoop::current()), 362 input(input), 363 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), 364 trim_http(trim_http), 365 failed(false), 366 languages(languages), 367 dont_suggest_exact_input(false), 368 default_search_provider(default_search_provider ? 369 new TemplateURL(default_search_provider->profile(), 370 default_search_provider->data()) : NULL), 371 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { 372} 373 374HistoryURLProviderParams::~HistoryURLProviderParams() { 375} 376 377HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, 378 Profile* profile) 379 : HistoryProvider(listener, profile, 380 AutocompleteProvider::TYPE_HISTORY_URL), 381 params_(NULL), 382 cull_redirects_( 383 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() || 384 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrialExperimentGroup()), 385 create_shorter_match_( 386 !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || 387 !OmniboxFieldTrial:: 388 InHUPCreateShorterMatchFieldTrialExperimentGroup()), 389 search_url_database_(true) { 390 // Initialize HUP scoring params based on the current experiment. 391 OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_); 392} 393 394void HistoryURLProvider::Start(const AutocompleteInput& input, 395 bool minimal_changes) { 396 // NOTE: We could try hard to do less work in the |minimal_changes| case 397 // here; some clever caching would let us reuse the raw matches from the 398 // history DB without re-querying. However, we'd still have to go back to 399 // the history thread to mark these up properly, and if pass 2 is currently 400 // running, we'd need to wait for it to return to the main thread before 401 // doing this (we can't just write new data for it to read due to thread 402 // safety issues). At that point it's just as fast, and easier, to simply 403 // re-run the query from scratch and ignore |minimal_changes|. 404 405 // Cancel any in-progress query. 406 Stop(false); 407 408 RunAutocompletePasses(input, true); 409} 410 411void HistoryURLProvider::Stop(bool clear_cached_results) { 412 done_ = true; 413 414 if (params_) 415 params_->cancel_flag.Set(); 416} 417 418AutocompleteMatch HistoryURLProvider::SuggestExactInput( 419 const base::string16& text, 420 const GURL& destination_url, 421 bool trim_http) { 422 AutocompleteMatch match(this, 0, false, 423 AutocompleteMatchType::URL_WHAT_YOU_TYPED); 424 425 if (destination_url.is_valid()) { 426 match.destination_url = destination_url; 427 428 // Trim off "http://" if the user didn't type it. 429 // NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to 430 // strip the scheme as we need to know the offset so we can adjust the 431 // |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have 432 // slightly different behavior as well (the latter will strip even without 433 // two slashes after the scheme). 434 DCHECK(!trim_http || !AutocompleteInput::HasHTTPScheme(text)); 435 base::string16 display_string( 436 StringForURLDisplay(destination_url, false, false)); 437 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; 438 match.fill_into_edit = 439 AutocompleteInput::FormattedStringWithEquivalentMeaning(destination_url, 440 display_string); 441 match.allowed_to_be_default_match = true; 442 // NOTE: Don't set match.inline_autocompletion to something non-empty here; 443 // it's surprising and annoying. 444 445 // Try to highlight "innermost" match location. If we fix up "w" into 446 // "www.w.com", we want to highlight the fifth character, not the first. 447 // This relies on match.destination_url being the non-prefix-trimmed version 448 // of match.contents. 449 match.contents = display_string; 450 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( 451 base::UTF8ToUTF16(destination_url.spec()), text); 452 // It's possible for match.destination_url to not contain the user's input 453 // at all (so |best_prefix| is NULL), for example if the input is 454 // "view-source:x" and |destination_url| has an inserted "http://" in the 455 // middle. 456 if (best_prefix == NULL) { 457 AutocompleteMatch::ClassifyMatchInString(text, match.contents, 458 ACMatchClassification::URL, 459 &match.contents_class); 460 } else { 461 AutocompleteMatch::ClassifyLocationInString( 462 best_prefix->prefix.length() - offset, text.length(), 463 match.contents.length(), ACMatchClassification::URL, 464 &match.contents_class); 465 } 466 467 match.is_history_what_you_typed_match = true; 468 } 469 470 return match; 471} 472 473// Called on the history thread. 474void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, 475 history::URLDatabase* db, 476 HistoryURLProviderParams* params) { 477 // We may get called with a NULL database if it couldn't be properly 478 // initialized. 479 if (!db) { 480 params->failed = true; 481 } else if (!params->cancel_flag.IsSet()) { 482 base::TimeTicks beginning_time = base::TimeTicks::Now(); 483 484 DoAutocomplete(backend, db, params); 485 486 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", 487 base::TimeTicks::Now() - beginning_time); 488 } 489 490 // Return the results (if any) to the main thread. 491 params->message_loop->PostTask(FROM_HERE, base::Bind( 492 &HistoryURLProvider::QueryComplete, this, params)); 493} 494 495// Used by both autocomplete passes, and therefore called on multiple different 496// threads (though not simultaneously). 497void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, 498 history::URLDatabase* db, 499 HistoryURLProviderParams* params) { 500 VisitClassifier classifier(this, params->input, db); 501 // Create a What You Typed match, which we'll need below. 502 // 503 // We display this to the user when there's a reasonable chance they actually 504 // care: 505 // * Their input can be opened as a URL, and 506 // * We parsed the input as a URL, or it starts with an explicit "http:" or 507 // "https:". 508 // that is when their input can be opened as a URL. 509 // Otherwise, this is just low-quality noise. In the cases where we've parsed 510 // as UNKNOWN, we'll still show an accidental search infobar if need be. 511 bool have_what_you_typed_match = 512 params->input.canonicalized_url().is_valid() && 513 (params->input.type() != AutocompleteInput::QUERY) && 514 ((params->input.type() != AutocompleteInput::UNKNOWN) || 515 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || 516 !params->trim_http || 517 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); 518 AutocompleteMatch what_you_typed_match(SuggestExactInput( 519 params->input.text(), params->input.canonicalized_url(), 520 params->trim_http)); 521 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 522 523 // Get the matching URLs from the DB 524 history::URLRows url_matches; 525 history::HistoryMatches history_matches; 526 527 if (search_url_database_) { 528 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); 529 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); 530 ++i) { 531 if (params->cancel_flag.IsSet()) 532 return; // Canceled in the middle of a query, give up. 533 // We only need kMaxMatches results in the end, but before we 534 // get there we need to promote lower-quality matches that are 535 // prefixes of higher-quality matches, and remove lower-quality 536 // redirects. So we ask for more results than we need, of every 537 // prefix type, in hopes this will give us far more than enough 538 // to work with. CullRedirects() will then reduce the list to 539 // the best kMaxMatches results. 540 db->AutocompleteForPrefix( 541 base::UTF16ToUTF8(i->prefix + params->input.text()), 542 kMaxMatches * 2, 543 (backend == NULL), 544 &url_matches); 545 for (history::URLRows::const_iterator j(url_matches.begin()); 546 j != url_matches.end(); ++j) { 547 const URLPrefix* best_prefix = 548 URLPrefix::BestURLPrefix(base::UTF8ToUTF16(j->url().spec()), 549 base::string16()); 550 DCHECK(best_prefix != NULL); 551 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), 552 i->num_components == 0, 553 i->num_components >= best_prefix->num_components)); 554 } 555 } 556 } 557 558 // Create sorted list of suggestions. 559 CullPoorMatches(*params, &history_matches); 560 SortAndDedupMatches(&history_matches); 561 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, 562 what_you_typed_match, &history_matches); 563 564 // Try to promote a match as an exact/inline autocomplete match. This also 565 // moves it to the front of |history_matches|, so skip over it when 566 // converting the rest of the matches. 567 size_t first_match = 1; 568 size_t exact_suggestion = 0; 569 // Checking |is_history_what_you_typed_match| tells us whether 570 // SuggestExactInput() succeeded in constructing a valid match. 571 if (what_you_typed_match.is_history_what_you_typed_match && 572 (!backend || !params->dont_suggest_exact_input) && 573 FixupExactSuggestion(db, params->input, classifier, &what_you_typed_match, 574 &history_matches)) { 575 // Got an exact match for the user's input. Treat it as the best match 576 // regardless of the input type. 577 exact_suggestion = 1; 578 params->matches.push_back(what_you_typed_match); 579 } else if (params->prevent_inline_autocomplete || 580 history_matches.empty() || 581 !PromoteMatchForInlineAutocomplete(history_matches.front(), params)) { 582 // Failed to promote any URLs for inline autocompletion. Use the What You 583 // Typed match, if we have it. 584 first_match = 0; 585 if (have_what_you_typed_match) 586 params->matches.push_back(what_you_typed_match); 587 } 588 589 // This is the end of the synchronous pass. 590 if (!backend) 591 return; 592 // If search_url_database_ is false, we shouldn't have scheduled a second 593 // pass. 594 DCHECK(search_url_database_); 595 596 // Determine relevancy of highest scoring match, if any. 597 int relevance = -1; 598 for (ACMatches::const_iterator it = params->matches.begin(); 599 it != params->matches.end(); ++it) { 600 relevance = std::max(relevance, it->relevance); 601 } 602 603 if (cull_redirects_) { 604 // Remove redirects and trim list to size. We want to provide up to 605 // kMaxMatches results plus the What You Typed result, if it was added to 606 // |history_matches| above. 607 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); 608 } else { 609 // Simply trim the list to size. 610 if (history_matches.size() > kMaxMatches + exact_suggestion) 611 history_matches.resize(kMaxMatches + exact_suggestion); 612 } 613 614 // Convert the history matches to autocomplete matches. 615 for (size_t i = first_match; i < history_matches.size(); ++i) { 616 const history::HistoryMatch& match = history_matches[i]; 617 DCHECK(!have_what_you_typed_match || 618 (match.url_info.url() != 619 GURL(params->matches.front().destination_url))); 620 // If we've assigned a score already, all later matches score one 621 // less than the previous match. 622 relevance = (relevance > 0) ? (relevance - 1) : 623 CalculateRelevance(NORMAL, history_matches.size() - 1 - i); 624 AutocompleteMatch ac_match = HistoryMatchToACMatch(*params, match, 625 NORMAL, relevance); 626 // The experimental scoring must not change the top result's score. 627 if (!params->matches.empty()) { 628 relevance = CalculateRelevanceScoreUsingScoringParams(match, relevance); 629 ac_match.relevance = relevance; 630 } 631 params->matches.push_back(ac_match); 632 } 633} 634 635// Called on the main thread when the query is complete. 636void HistoryURLProvider::QueryComplete( 637 HistoryURLProviderParams* params_gets_deleted) { 638 // Ensure |params_gets_deleted| gets deleted on exit. 639 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); 640 641 // If the user hasn't already started another query, clear our member pointer 642 // so we can't write into deleted memory. 643 if (params_ == params_gets_deleted) 644 params_ = NULL; 645 646 // Don't send responses for queries that have been canceled. 647 if (params->cancel_flag.IsSet()) 648 return; // Already set done_ when we canceled, no need to set it again. 649 650 // Don't modify |matches_| if the query failed, since it might have a default 651 // match in it, whereas |params->matches| will be empty. 652 if (!params->failed) { 653 matches_.swap(params->matches); 654 UpdateStarredStateOfMatches(); 655 } 656 657 done_ = true; 658 listener_->OnProviderUpdate(true); 659} 660 661HistoryURLProvider::~HistoryURLProvider() { 662 // Note: This object can get leaked on shutdown if there are pending 663 // requests on the database (which hold a reference to us). Normally, these 664 // messages get flushed for each thread. We do a round trip from main, to 665 // history, back to main while holding a reference. If the main thread 666 // completes before the history thread, the message to delegate back to the 667 // main thread will not run and the reference will leak. Therefore, don't do 668 // anything on destruction. 669} 670 671int HistoryURLProvider::CalculateRelevance(MatchType match_type, 672 size_t match_number) const { 673 switch (match_type) { 674 case INLINE_AUTOCOMPLETE: 675 return kScoreForBestInlineableResult; 676 677 case UNVISITED_INTRANET: 678 return kScoreForUnvisitedIntranetResult; 679 680 case WHAT_YOU_TYPED: 681 return kScoreForWhatYouTypedResult; 682 683 default: // NORMAL 684 return kBaseScoreForNonInlineableResult + 685 static_cast<int>(match_number); 686 } 687} 688 689void HistoryURLProvider::RunAutocompletePasses( 690 const AutocompleteInput& input, 691 bool fixup_input_and_run_pass_1) { 692 matches_.clear(); 693 694 if ((input.type() == AutocompleteInput::INVALID) || 695 (input.type() == AutocompleteInput::FORCED_QUERY)) 696 return; 697 698 // Create a match for exactly what the user typed. This will only be used as 699 // a fallback in case we can't get the history service or URL DB; otherwise, 700 // we'll run this again in DoAutocomplete() and use that result instead. 701 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()); 702 // Don't do this for queries -- while we can sometimes mark up a match for 703 // this, it's not what the user wants, and just adds noise. 704 if ((input.type() != AutocompleteInput::QUERY) && 705 input.canonicalized_url().is_valid()) { 706 AutocompleteMatch what_you_typed(SuggestExactInput( 707 input.text(), input.canonicalized_url(), trim_http)); 708 what_you_typed.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 709 matches_.push_back(what_you_typed); 710 } 711 712 // We'll need the history service to run both passes, so try to obtain it. 713 if (!profile_) 714 return; 715 HistoryService* const history_service = 716 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 717 if (!history_service) 718 return; 719 720 // Get the default search provider and search terms data now since we have to 721 // retrieve these on the UI thread, and the second pass runs on the history 722 // thread. |template_url_service| can be NULL when testing. 723 TemplateURLService* template_url_service = 724 TemplateURLServiceFactory::GetForProfile(profile_); 725 TemplateURL* default_search_provider = template_url_service ? 726 template_url_service->GetDefaultSearchProvider() : NULL; 727 UIThreadSearchTermsData data(profile_); 728 729 // Create the data structure for the autocomplete passes. We'll save this off 730 // onto the |params_| member for later deletion below if we need to run pass 731 // 2. 732 scoped_ptr<HistoryURLProviderParams> params( 733 new HistoryURLProviderParams( 734 input, trim_http, 735 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages), 736 default_search_provider, data)); 737 738 params->prevent_inline_autocomplete = 739 PreventInlineAutocomplete(input); 740 741 if (fixup_input_and_run_pass_1) { 742 // Do some fixup on the user input before matching against it, so we provide 743 // good results for local file paths, input with spaces, etc. 744 if (!FixupUserInput(¶ms->input)) 745 return; 746 747 // Pass 1: Get the in-memory URL database, and use it to find and promote 748 // the inline autocomplete match, if any. 749 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 750 // url_db can be NULL if it hasn't finished initializing (or failed to 751 // initialize). In this case all we can do is fall back on the second 752 // pass. 753 // 754 // TODO(pkasting): We should just block here until this loads. Any time 755 // someone unloads the history backend, we'll get inconsistent inline 756 // autocomplete behavior here. 757 if (url_db) { 758 DoAutocomplete(NULL, url_db, params.get()); 759 // params->matches now has the matches we should expose to the provider. 760 // Pass 2 expects a "clean slate" set of matches. 761 matches_.clear(); 762 matches_.swap(params->matches); 763 UpdateStarredStateOfMatches(); 764 } 765 } 766 767 // Pass 2: Ask the history service to call us back on the history thread, 768 // where we can read the full on-disk DB. 769 if (search_url_database_ && 770 (input.matches_requested() == AutocompleteInput::ALL_MATCHES)) { 771 done_ = false; 772 params_ = params.release(); // This object will be destroyed in 773 // QueryComplete() once we're done with it. 774 history_service->ScheduleAutocomplete(this, params_); 775 } 776} 777 778bool HistoryURLProvider::FixupExactSuggestion( 779 history::URLDatabase* db, 780 const AutocompleteInput& input, 781 const VisitClassifier& classifier, 782 AutocompleteMatch* match, 783 history::HistoryMatches* matches) const { 784 DCHECK(match != NULL); 785 DCHECK(matches != NULL); 786 787 MatchType type = INLINE_AUTOCOMPLETE; 788 switch (classifier.type()) { 789 case VisitClassifier::INVALID: 790 return false; 791 case VisitClassifier::UNVISITED_INTRANET: 792 type = UNVISITED_INTRANET; 793 break; 794 default: 795 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); 796 // We have data for this match, use it. 797 match->deletable = true; 798 match->description = classifier.url_row().title(); 799 RecordAdditionalInfoFromUrlRow(classifier.url_row(), match); 800 match->description_class = 801 ClassifyDescription(input.text(), match->description); 802 if (!classifier.url_row().typed_count()) { 803 // If we reach here, we must be in the second pass, and we must not have 804 // this row's data available during the first pass. That means we 805 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to 806 // maintain the ordering between passes consistent, we need to score it 807 // the same way here. 808 type = CanFindIntranetURL(db, input) ? 809 UNVISITED_INTRANET : WHAT_YOU_TYPED; 810 } 811 break; 812 } 813 814 const GURL& url = match->destination_url; 815 const url_parse::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 816 // If the what-you-typed result looks like a single word (which can be 817 // interpreted as an intranet address) followed by a pound sign ("#"), 818 // leave the score for the url-what-you-typed result as is. It will be 819 // outscored by a search query from the SearchProvider. This test fixes 820 // cases such as "c#" and "c# foo" where the user has visited an intranet 821 // site "c". We want the search-what-you-typed score to beat the 822 // URL-what-you-typed score in this case. Most of the below test tries to 823 // make sure that this code does not trigger if the user did anything to 824 // indicate the desired match is a URL. For instance, "c/# foo" will not 825 // pass the test because that will be classified as input type URL. The 826 // parsed.CountCharactersBefore() in the test looks for the presence of a 827 // reference fragment in the URL by checking whether the position differs 828 // included the delimiter (pound sign) versus not including the delimiter. 829 // (One cannot simply check url.ref() because it will not distinguish 830 // between the input "c" and the input "c#", both of which will have empty 831 // reference fragments.) 832 if ((type == UNVISITED_INTRANET) && 833 (input.type() != AutocompleteInput::URL) && 834 url.username().empty() && url.password().empty() && url.port().empty() && 835 (url.path() == "/") && url.query().empty() && 836 (parsed.CountCharactersBefore(url_parse::Parsed::REF, true) != 837 parsed.CountCharactersBefore(url_parse::Parsed::REF, false))) { 838 return false; 839 } 840 841 match->relevance = CalculateRelevance(type, 0); 842 843 // If there are any other matches, then don't promote this match here, in 844 // hopes the caller will be able to inline autocomplete a better suggestion. 845 // DoAutocomplete() will fall back on this match if inline autocompletion 846 // fails. This matches how we react to never-visited URL inputs in the non- 847 // intranet case. 848 if (type == UNVISITED_INTRANET && !matches->empty()) 849 return false; 850 851 // Put it on the front of the HistoryMatches for redirect culling. 852 CreateOrPromoteMatch(classifier.url_row(), base::string16::npos, false, 853 matches, true, true); 854 return true; 855} 856 857bool HistoryURLProvider::CanFindIntranetURL( 858 history::URLDatabase* db, 859 const AutocompleteInput& input) const { 860 // Normally passing the first two conditions below ought to guarantee the 861 // third condition, but because FixupUserInput() can run and modify the 862 // input's text and parts between Parse() and here, it seems better to be 863 // paranoid and check. 864 if ((input.type() != AutocompleteInput::UNKNOWN) || 865 !LowerCaseEqualsASCII(input.scheme(), content::kHttpScheme) || 866 !input.parts().host.is_nonempty()) 867 return false; 868 const std::string host(base::UTF16ToUTF8( 869 input.text().substr(input.parts().host.begin, input.parts().host.len))); 870 const size_t registry_length = 871 net::registry_controlled_domains::GetRegistryLength( 872 host, 873 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 874 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 875 return registry_length == 0 && db->IsTypedHost(host); 876} 877 878bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( 879 const history::HistoryMatch& match, 880 HistoryURLProviderParams* params) { 881 // Promote the first match if it's been marked for promotion or typed at least 882 // n times, where n == 1 for "simple" (host-only) URLs and n == 2 for others. 883 // We set a higher bar for these long URLs because it's less likely that users 884 // will want to visit them again. Even though we don't increment the 885 // typed_count for pasted-in URLs, if the user manually edits the URL or types 886 // some long thing in by hand, we wouldn't want to immediately start 887 // autocompleting it. 888 if (!match.promoted && 889 (!match.url_info.typed_count() || 890 ((match.url_info.typed_count() == 1) && 891 !match.IsHostOnly()))) 892 return false; 893 894 // In the case where the user has typed "foo.com" and visited (but not typed) 895 // "foo/", and the input is "foo", we can reach here for "foo.com" during the 896 // first pass but have the second pass suggest the exact input as a better 897 // URL. Since we need both passes to agree, and since during the first pass 898 // there's no way to know about "foo/", make reaching this point prevent any 899 // future pass from suggesting the exact input as a better match. 900 if (params) { 901 params->dont_suggest_exact_input = true; 902 AutocompleteMatch ac_match = HistoryMatchToACMatch( 903 *params, match, INLINE_AUTOCOMPLETE, 904 CalculateRelevance(INLINE_AUTOCOMPLETE, 0)); 905 params->matches.push_back(ac_match); 906 } 907 return true; 908} 909 910// See if a shorter version of the best match should be created, and if so place 911// it at the front of |matches|. This can suggest history URLs that are 912// prefixes of the best match (if they've been visited enough, compared to the 913// best match), or create host-only suggestions even when they haven't been 914// visited before: if the user visited http://example.com/asdf once, we'll 915// suggest http://example.com/ even if they've never been to it. 916void HistoryURLProvider::PromoteOrCreateShorterSuggestion( 917 history::URLDatabase* db, 918 const HistoryURLProviderParams& params, 919 bool have_what_you_typed_match, 920 const AutocompleteMatch& what_you_typed_match, 921 history::HistoryMatches* matches) { 922 if (matches->empty()) 923 return; // No matches, nothing to do. 924 925 // Determine the base URL from which to search, and whether that URL could 926 // itself be added as a match. We can add the base iff it's not "effectively 927 // the same" as any "what you typed" match. 928 const history::HistoryMatch& match = matches->front(); 929 GURL search_base = ConvertToHostOnly(match, params.input.text()); 930 bool can_add_search_base_to_matches = !have_what_you_typed_match; 931 if (search_base.is_empty()) { 932 // Search from what the user typed when we couldn't reduce the best match 933 // to a host. Careful: use a substring of |match| here, rather than the 934 // first match in |params|, because they might have different prefixes. If 935 // the user typed "google.com", |what_you_typed_match| will hold 936 // "http://google.com/", but |match| might begin with 937 // "http://www.google.com/". 938 // TODO: this should be cleaned up, and is probably incorrect for IDN. 939 std::string new_match = match.url_info.url().possibly_invalid_spec(). 940 substr(0, match.input_location + params.input.text().length()); 941 search_base = GURL(new_match); 942 // TODO(mrossetti): There is a degenerate case where the following may 943 // cause a failure: http://www/~someword/fubar.html. Diagnose. 944 // See: http://crbug.com/50101 945 if (search_base.is_empty()) 946 return; // Can't construct a valid URL from which to start a search. 947 } else if (!can_add_search_base_to_matches) { 948 can_add_search_base_to_matches = 949 (search_base != what_you_typed_match.destination_url); 950 } 951 if (search_base == match.url_info.url()) 952 return; // Couldn't shorten |match|, so no range of URLs to search over. 953 954 // Search the DB for short URLs between our base and |match|. 955 history::URLRow info(search_base); 956 bool promote = true; 957 // A short URL is only worth suggesting if it's been visited at least a third 958 // as often as the longer URL. 959 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; 960 // For stability between the in-memory and on-disk autocomplete passes, when 961 // the long URL has been typed before, only suggest shorter URLs that have 962 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL 963 // (which hasn't been typed) that the in-memory pass doesn't know about, 964 // thereby making the top match, and thus the behavior of inline 965 // autocomplete, unstable. 966 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; 967 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), 968 match.url_info.url().possibly_invalid_spec(), min_visit_count, 969 min_typed_count, can_add_search_base_to_matches, &info)) { 970 if (!can_add_search_base_to_matches) 971 return; // Couldn't find anything and can't add the search base, bail. 972 973 // Try to get info on the search base itself. Promote it to the top if the 974 // original best match isn't good enough to autocomplete. 975 db->GetRowForURL(search_base, &info); 976 promote = match.url_info.typed_count() <= 1; 977 } 978 979 // Promote or add the desired URL to the list of matches. 980 bool ensure_can_inline = 981 promote && PromoteMatchForInlineAutocomplete(match, NULL); 982 ensure_can_inline &= CreateOrPromoteMatch(info, match.input_location, 983 match.match_in_scheme, matches, create_shorter_match_, promote); 984 if (ensure_can_inline) 985 matches->front().promoted = true; 986} 987 988void HistoryURLProvider::CullPoorMatches( 989 const HistoryURLProviderParams& params, 990 history::HistoryMatches* matches) const { 991 const base::Time& threshold(history::AutocompleteAgeThreshold()); 992 for (history::HistoryMatches::iterator i(matches->begin()); 993 i != matches->end(); ) { 994 if (RowQualifiesAsSignificant(i->url_info, threshold) && 995 !(params.default_search_provider && 996 params.default_search_provider->IsSearchURLUsingTermsData( 997 i->url_info.url(), *params.search_terms_data.get()))) { 998 ++i; 999 } else { 1000 i = matches->erase(i); 1001 } 1002 } 1003} 1004 1005void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, 1006 history::HistoryMatches* matches, 1007 size_t max_results) const { 1008 for (size_t source = 0; 1009 (source < matches->size()) && (source < max_results); ) { 1010 const GURL& url = (*matches)[source].url_info.url(); 1011 // TODO(brettw) this should go away when everything uses GURL. 1012 history::RedirectList redirects; 1013 backend->GetMostRecentRedirectsFrom(url, &redirects); 1014 if (!redirects.empty()) { 1015 // Remove all but the first occurrence of any of these redirects in the 1016 // search results. We also must add the URL we queried for, since it may 1017 // not be the first match and we'd want to remove it. 1018 // 1019 // For example, when A redirects to B and our matches are [A, X, B], 1020 // we'll get B as the redirects from, and we want to remove the second 1021 // item of that pair, removing B. If A redirects to B and our matches are 1022 // [B, X, A], we'll want to remove A instead. 1023 redirects.push_back(url); 1024 source = RemoveSubsequentMatchesOf(matches, source, redirects); 1025 } else { 1026 // Advance to next item. 1027 source++; 1028 } 1029 } 1030 1031 if (matches->size() > max_results) 1032 matches->resize(max_results); 1033} 1034 1035size_t HistoryURLProvider::RemoveSubsequentMatchesOf( 1036 history::HistoryMatches* matches, 1037 size_t source_index, 1038 const std::vector<GURL>& remove) const { 1039 size_t next_index = source_index + 1; // return value = item after source 1040 1041 // Find the first occurrence of any URL in the redirect chain. We want to 1042 // keep this one since it is rated the highest. 1043 history::HistoryMatches::iterator first(std::find_first_of( 1044 matches->begin(), matches->end(), remove.begin(), remove.end(), 1045 history::HistoryMatch::EqualsGURL)); 1046 DCHECK(first != matches->end()) << "We should have always found at least the " 1047 "original URL."; 1048 1049 // Find any following occurrences of any URL in the redirect chain, these 1050 // should be deleted. 1051 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, 1052 matches->end(), remove.begin(), remove.end(), 1053 history::HistoryMatch::EqualsGURL)); 1054 next != matches->end(); next = std::find_first_of(next, matches->end(), 1055 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { 1056 // Remove this item. When we remove an item before the source index, we 1057 // need to shift it to the right and remember that so we can return it. 1058 next = matches->erase(next); 1059 if (static_cast<size_t>(next - matches->begin()) < next_index) 1060 --next_index; 1061 } 1062 return next_index; 1063} 1064 1065AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( 1066 const HistoryURLProviderParams& params, 1067 const history::HistoryMatch& history_match, 1068 MatchType match_type, 1069 int relevance) { 1070 const history::URLRow& info = history_match.url_info; 1071 AutocompleteMatch match(this, relevance, 1072 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); 1073 match.typed_count = info.typed_count(); 1074 match.destination_url = info.url(); 1075 DCHECK(match.destination_url.is_valid()); 1076 size_t inline_autocomplete_offset = 1077 history_match.input_location + params.input.text().length(); 1078 std::string languages = (match_type == WHAT_YOU_TYPED) ? 1079 std::string() : params.languages; 1080 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 1081 ~((params.trim_http && !history_match.match_in_scheme) ? 1082 0 : net::kFormatUrlOmitHTTP); 1083 match.fill_into_edit = 1084 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 1085 net::FormatUrl(info.url(), languages, format_types, 1086 net::UnescapeRule::SPACES, NULL, NULL, 1087 &inline_autocomplete_offset)); 1088 if (!params.prevent_inline_autocomplete && 1089 (inline_autocomplete_offset != base::string16::npos)) { 1090 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length()); 1091 match.inline_autocompletion = 1092 match.fill_into_edit.substr(inline_autocomplete_offset); 1093 } 1094 // The latter part of the test effectively asks "is the inline completion 1095 // empty?" (i.e., is this match effectively the what-you-typed match?). 1096 match.allowed_to_be_default_match = !params.prevent_inline_autocomplete || 1097 ((inline_autocomplete_offset != base::string16::npos) && 1098 (inline_autocomplete_offset >= match.fill_into_edit.length())); 1099 1100 size_t match_start = history_match.input_location; 1101 match.contents = net::FormatUrl(info.url(), languages, 1102 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); 1103 if ((match_start != base::string16::npos) && 1104 (inline_autocomplete_offset != base::string16::npos) && 1105 (inline_autocomplete_offset != match_start)) { 1106 DCHECK(inline_autocomplete_offset > match_start); 1107 AutocompleteMatch::ClassifyLocationInString(match_start, 1108 inline_autocomplete_offset - match_start, match.contents.length(), 1109 ACMatchClassification::URL, &match.contents_class); 1110 } else { 1111 AutocompleteMatch::ClassifyLocationInString(base::string16::npos, 0, 1112 match.contents.length(), ACMatchClassification::URL, 1113 &match.contents_class); 1114 } 1115 match.description = info.title(); 1116 match.description_class = 1117 ClassifyDescription(params.input.text(), match.description); 1118 RecordAdditionalInfoFromUrlRow(info, &match); 1119 return match; 1120} 1121 1122int HistoryURLProvider::CalculateRelevanceScoreUsingScoringParams( 1123 const history::HistoryMatch& match, 1124 int old_relevance) const { 1125 if (!scoring_params_.experimental_scoring_enabled) 1126 return old_relevance; 1127 1128 const base::TimeDelta time_since_last_visit = 1129 base::Time::Now() - match.url_info.last_visit(); 1130 1131 int relevance = CalculateRelevanceUsingScoreBuckets( 1132 scoring_params_.typed_count_buckets, time_since_last_visit, old_relevance, 1133 match.url_info.typed_count()); 1134 1135 // Additional demotion (on top of typed_count demotion) of URLs that were 1136 // never typed. 1137 if (match.url_info.typed_count() == 0) { 1138 relevance = CalculateRelevanceUsingScoreBuckets( 1139 scoring_params_.visited_count_buckets, time_since_last_visit, relevance, 1140 match.url_info.visit_count()); 1141 } 1142 1143 DCHECK_LE(relevance, old_relevance); 1144 return relevance; 1145} 1146 1147// static 1148ACMatchClassifications HistoryURLProvider::ClassifyDescription( 1149 const base::string16& input_text, 1150 const base::string16& description) { 1151 base::string16 clean_description = history::CleanUpTitleForMatching( 1152 description); 1153 history::TermMatches description_matches(SortAndDeoverlapMatches( 1154 history::MatchTermInString(input_text, clean_description, 0))); 1155 history::WordStarts description_word_starts; 1156 history::String16VectorFromString16( 1157 clean_description, false, &description_word_starts); 1158 // If HistoryURL retrieves any matches (and hence we reach this code), we 1159 // are guaranteed that the beginning of input_text must be a word break. 1160 history::WordStarts offsets(1, 0u); 1161 description_matches = 1162 history::ScoredHistoryMatch::FilterTermMatchesByWordStarts( 1163 description_matches, offsets, description_word_starts, 0, 1164 std::string::npos); 1165 return SpansFromTermMatch( 1166 description_matches, clean_description.length(), false); 1167} 1168