history_url_provider.cc revision 5c02ac1a9c1b504631c0a3d2b6e737b5d738bae1
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_url_provider.h" 6 7#include <algorithm> 8 9#include "base/basictypes.h" 10#include "base/bind.h" 11#include "base/command_line.h" 12#include "base/message_loop/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/prefs/pref_service.h" 15#include "base/strings/string_util.h" 16#include "base/strings/utf_string_conversions.h" 17#include "base/time/time.h" 18#include "chrome/browser/autocomplete/autocomplete_match.h" 19#include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 20#include "chrome/browser/autocomplete/autocomplete_result.h" 21#include "chrome/browser/history/history_backend.h" 22#include "chrome/browser/history/history_database.h" 23#include "chrome/browser/history/history_service.h" 24#include "chrome/browser/history/history_service_factory.h" 25#include "chrome/browser/history/history_types.h" 26#include "chrome/browser/history/in_memory_url_index_types.h" 27#include "chrome/browser/history/scored_history_match.h" 28#include "chrome/browser/omnibox/omnibox_field_trial.h" 29#include "chrome/browser/profiles/profile.h" 30#include "chrome/browser/search_engines/template_url_service.h" 31#include "chrome/browser/search_engines/template_url_service_factory.h" 32#include "chrome/common/chrome_switches.h" 33#include "chrome/common/net/url_fixer_upper.h" 34#include "chrome/common/pref_names.h" 35#include "chrome/common/url_constants.h" 36#include "components/bookmarks/core/browser/bookmark_utils.h" 37#include "net/base/net_util.h" 38#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 39#include "url/gurl.h" 40#include "url/url_parse.h" 41#include "url/url_util.h" 42 43namespace { 44 45// If |create_if_necessary| is true, ensures that |matches| contains an 46// entry for |info|, creating a new such entry if necessary (using 47// |input_location| and |match_in_scheme|). 48// 49// If |promote| is true, this also ensures the entry is the first element in 50// |matches|, moving or adding it to the front as appropriate. When |promote| 51// is false, existing matches are left in place, and newly added matches are 52// placed at the back. 53// 54// It's OK to call this function with both |create_if_necessary| and 55// |promote| false, in which case we'll do nothing. 56// 57// Returns whether the match exists regardless if it was promoted/created. 58bool CreateOrPromoteMatch(const history::URLRow& info, 59 size_t input_location, 60 bool match_in_scheme, 61 history::HistoryMatches* matches, 62 bool create_if_necessary, 63 bool promote) { 64 // |matches| may already have an entry for this. 65 for (history::HistoryMatches::iterator i(matches->begin()); 66 i != matches->end(); ++i) { 67 if (i->url_info.url() == info.url()) { 68 // Rotate it to the front if the caller wishes. 69 if (promote) 70 std::rotate(matches->begin(), i, i + 1); 71 return true; 72 } 73 } 74 75 if (!create_if_necessary) 76 return false; 77 78 // No entry, so create one. 79 history::HistoryMatch match(info, input_location, match_in_scheme, true); 80 if (promote) 81 matches->push_front(match); 82 else 83 matches->push_back(match); 84 85 return true; 86} 87 88// Given the user's |input| and a |match| created from it, reduce the match's 89// URL to just a host. If this host still matches the user input, return it. 90// Returns the empty string on failure. 91GURL ConvertToHostOnly(const history::HistoryMatch& match, 92 const base::string16& input) { 93 // See if we should try to do host-only suggestions for this URL. Nonstandard 94 // schemes means there's no authority section, so suggesting the host name 95 // is useless. File URLs are standard, but host suggestion is not useful for 96 // them either. 97 const GURL& url = match.url_info.url(); 98 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) 99 return GURL(); 100 101 // Transform to a host-only match. Bail if the host no longer matches the 102 // user input (e.g. because the user typed more than just a host). 103 GURL host = url.GetWithEmptyPath(); 104 if ((host.spec().length() < (match.input_location + input.length()))) 105 return GURL(); // User typing is longer than this host suggestion. 106 107 const base::string16 spec = base::UTF8ToUTF16(host.spec()); 108 if (spec.compare(match.input_location, input.length(), input)) 109 return GURL(); // User typing is no longer a prefix. 110 111 return host; 112} 113 114// Acts like the > operator for URLInfo classes. 115bool CompareHistoryMatch(const history::HistoryMatch& a, 116 const history::HistoryMatch& b) { 117 // A promoted match is better than non-promoted. 118 if (a.promoted != b.promoted) 119 return a.promoted; 120 121 // A URL that has been typed at all is better than one that has never been 122 // typed. (Note "!"s on each side) 123 if (!a.url_info.typed_count() != !b.url_info.typed_count()) 124 return a.url_info.typed_count() > b.url_info.typed_count(); 125 126 // Innermost matches (matches after any scheme or "www.") are better than 127 // non-innermost matches. 128 if (a.innermost_match != b.innermost_match) 129 return a.innermost_match; 130 131 // URLs that have been typed more often are better. 132 if (a.url_info.typed_count() != b.url_info.typed_count()) 133 return a.url_info.typed_count() > b.url_info.typed_count(); 134 135 // For URLs that have each been typed once, a host (alone) is better than a 136 // page inside. 137 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) 138 return a.IsHostOnly(); 139 140 // URLs that have been visited more often are better. 141 if (a.url_info.visit_count() != b.url_info.visit_count()) 142 return a.url_info.visit_count() > b.url_info.visit_count(); 143 144 // URLs that have been visited more recently are better. 145 return a.url_info.last_visit() > b.url_info.last_visit(); 146} 147 148// Sorts and dedups the given list of matches. 149void SortAndDedupMatches(history::HistoryMatches* matches) { 150 // Sort by quality, best first. 151 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); 152 153 // Remove duplicate matches (caused by the search string appearing in one of 154 // the prefixes as well as after it). Consider the following scenario: 155 // 156 // User has visited "http://http.com" once and "http://htaccess.com" twice. 157 // User types "http". The autocomplete search with prefix "http://" returns 158 // the first host, while the search with prefix "" returns both hosts. Now 159 // we sort them into rank order: 160 // http://http.com (innermost_match) 161 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) 162 // http://http.com (!innermost_match, url_info.visit_count == 1) 163 // 164 // The above scenario tells us we can't use std::unique(), since our 165 // duplicates are not always sequential. It also tells us we should remove 166 // the lower-quality duplicate(s), since otherwise the returned results won't 167 // be ordered correctly. This is easy to do: we just always remove the later 168 // element of a duplicate pair. 169 // Be careful! Because the vector contents may change as we remove elements, 170 // we use an index instead of an iterator in the outer loop, and don't 171 // precalculate the ending position. 172 for (size_t i = 0; i < matches->size(); ++i) { 173 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); 174 j != matches->end(); ) { 175 if ((*matches)[i].url_info.url() == j->url_info.url()) 176 j = matches->erase(j); 177 else 178 ++j; 179 } 180 } 181} 182 183// Extracts typed_count, visit_count, and last_visited time from the 184// URLRow and puts them in the additional info field of the |match| 185// for display in about:omnibox. 186void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, 187 AutocompleteMatch* match) { 188 match->RecordAdditionalInfo("typed count", info.typed_count()); 189 match->RecordAdditionalInfo("visit count", info.visit_count()); 190 match->RecordAdditionalInfo("last visit", info.last_visit()); 191} 192 193// Calculates a new relevance score applying half-life time decaying to |count| 194// using |time_since_last_visit| and |score_buckets|. 195// This function will never return a score higher than |undecayed_relevance|. 196// In other words, it can only demote the old score. 197double CalculateRelevanceUsingScoreBuckets( 198 const HUPScoringParams::ScoreBuckets& score_buckets, 199 const base::TimeDelta& time_since_last_visit, 200 int undecayed_relevance, 201 int count) { 202 // Back off if above relevance cap. 203 if ((score_buckets.relevance_cap() != -1) && 204 (undecayed_relevance >= score_buckets.relevance_cap())) 205 return undecayed_relevance; 206 207 // Time based decay using half-life time. 208 double decayed_count = count; 209 if (decayed_count > 0) 210 decayed_count *= score_buckets.HalfLifeTimeDecay(time_since_last_visit); 211 212 // Find a threshold where decayed_count >= bucket. 213 const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket = NULL; 214 for (size_t i = 0; i < score_buckets.buckets().size(); ++i) { 215 score_bucket = &score_buckets.buckets()[i]; 216 if (decayed_count >= score_bucket->first) 217 break; // Buckets are in descending order, so we can ignore the rest. 218 } 219 220 return (score_bucket && (undecayed_relevance > score_bucket->second)) ? 221 score_bucket->second : undecayed_relevance; 222} 223 224} // namespace 225 226// ----------------------------------------------------------------- 227// SearchTermsDataSnapshot 228 229// Implementation of SearchTermsData that takes a snapshot of another 230// SearchTermsData by copying all the responses to the different getters into 231// member strings, then returning those strings when its own getters are called. 232// This will typically be constructed on the UI thread from 233// UIThreadSearchTermsData but is subsequently safe to use on any thread. 234class SearchTermsDataSnapshot : public SearchTermsData { 235 public: 236 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); 237 virtual ~SearchTermsDataSnapshot(); 238 239 virtual std::string GoogleBaseURLValue() const OVERRIDE; 240 virtual std::string GetApplicationLocale() const OVERRIDE; 241 virtual base::string16 GetRlzParameterValue( 242 bool from_app_list) const OVERRIDE; 243 virtual std::string GetSearchClient() const OVERRIDE; 244 virtual std::string NTPIsThemedParam() const OVERRIDE; 245 246 private: 247 std::string google_base_url_value_; 248 std::string application_locale_; 249 base::string16 rlz_parameter_value_; 250 std::string search_client_; 251 std::string ntp_is_themed_param_; 252 253 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); 254}; 255 256SearchTermsDataSnapshot::SearchTermsDataSnapshot( 257 const SearchTermsData& search_terms_data) 258 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), 259 application_locale_(search_terms_data.GetApplicationLocale()), 260 rlz_parameter_value_(search_terms_data.GetRlzParameterValue(false)), 261 search_client_(search_terms_data.GetSearchClient()), 262 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()) {} 263 264SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { 265} 266 267std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { 268 return google_base_url_value_; 269} 270 271std::string SearchTermsDataSnapshot::GetApplicationLocale() const { 272 return application_locale_; 273} 274 275base::string16 SearchTermsDataSnapshot::GetRlzParameterValue( 276 bool from_app_list) const { 277 return rlz_parameter_value_; 278} 279 280std::string SearchTermsDataSnapshot::GetSearchClient() const { 281 return search_client_; 282} 283 284std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { 285 return ntp_is_themed_param_; 286} 287 288// ----------------------------------------------------------------- 289// HistoryURLProvider 290 291// These ugly magic numbers will go away once we switch all scoring 292// behavior (including URL-what-you-typed) to HistoryQuick provider. 293const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; 294const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; 295const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; 296const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; 297 298// VisitClassifier is used to classify the type of visit to a particular url. 299class HistoryURLProvider::VisitClassifier { 300 public: 301 enum Type { 302 INVALID, // Navigations to the URL are not allowed. 303 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but 304 // which is known to refer to a visited intranet host. 305 VISITED, // The site has been previously visited. 306 }; 307 308 VisitClassifier(HistoryURLProvider* provider, 309 const AutocompleteInput& input, 310 history::URLDatabase* db); 311 312 // Returns the type of visit for the specified input. 313 Type type() const { return type_; } 314 315 // Returns the URLRow for the visit. 316 const history::URLRow& url_row() const { return url_row_; } 317 318 private: 319 HistoryURLProvider* provider_; 320 history::URLDatabase* db_; 321 Type type_; 322 history::URLRow url_row_; 323 324 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); 325}; 326 327HistoryURLProvider::VisitClassifier::VisitClassifier( 328 HistoryURLProvider* provider, 329 const AutocompleteInput& input, 330 history::URLDatabase* db) 331 : provider_(provider), 332 db_(db), 333 type_(INVALID) { 334 const GURL& url = input.canonicalized_url(); 335 // Detect email addresses. These cases will look like "http://user@site/", 336 // and because the history backend strips auth creds, we'll get a bogus exact 337 // match below if the user has visited "site". 338 if (!url.is_valid() || 339 ((input.type() == AutocompleteInput::UNKNOWN) && 340 input.parts().username.is_nonempty() && 341 !input.parts().password.is_nonempty() && 342 !input.parts().path.is_nonempty())) 343 return; 344 345 if (db_->GetRowForURL(url, &url_row_)) { 346 type_ = VISITED; 347 return; 348 } 349 350 if (provider_->CanFindIntranetURL(db_, input)) { 351 // The user typed an intranet hostname that they've visited (albeit with a 352 // different port and/or path) before. 353 url_row_ = history::URLRow(url); 354 type_ = UNVISITED_INTRANET; 355 } 356} 357 358HistoryURLProviderParams::HistoryURLProviderParams( 359 const AutocompleteInput& input, 360 bool trim_http, 361 const std::string& languages, 362 TemplateURL* default_search_provider, 363 const SearchTermsData& search_terms_data) 364 : message_loop(base::MessageLoop::current()), 365 input(input), 366 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), 367 trim_http(trim_http), 368 failed(false), 369 languages(languages), 370 dont_suggest_exact_input(false), 371 default_search_provider(default_search_provider ? 372 new TemplateURL(default_search_provider->profile(), 373 default_search_provider->data()) : NULL), 374 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { 375} 376 377HistoryURLProviderParams::~HistoryURLProviderParams() { 378} 379 380HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, 381 Profile* profile) 382 : HistoryProvider(listener, profile, 383 AutocompleteProvider::TYPE_HISTORY_URL), 384 params_(NULL), 385 cull_redirects_( 386 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() || 387 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrialExperimentGroup()), 388 create_shorter_match_( 389 !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || 390 !OmniboxFieldTrial:: 391 InHUPCreateShorterMatchFieldTrialExperimentGroup()), 392 search_url_database_(true) { 393 // Initialize HUP scoring params based on the current experiment. 394 OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_); 395} 396 397void HistoryURLProvider::Start(const AutocompleteInput& input, 398 bool minimal_changes) { 399 // NOTE: We could try hard to do less work in the |minimal_changes| case 400 // here; some clever caching would let us reuse the raw matches from the 401 // history DB without re-querying. However, we'd still have to go back to 402 // the history thread to mark these up properly, and if pass 2 is currently 403 // running, we'd need to wait for it to return to the main thread before 404 // doing this (we can't just write new data for it to read due to thread 405 // safety issues). At that point it's just as fast, and easier, to simply 406 // re-run the query from scratch and ignore |minimal_changes|. 407 408 // Cancel any in-progress query. 409 Stop(false); 410 411 RunAutocompletePasses(input, true); 412} 413 414void HistoryURLProvider::Stop(bool clear_cached_results) { 415 done_ = true; 416 417 if (params_) 418 params_->cancel_flag.Set(); 419} 420 421AutocompleteMatch HistoryURLProvider::SuggestExactInput( 422 const base::string16& text, 423 const GURL& destination_url, 424 bool trim_http) { 425 AutocompleteMatch match(this, 0, false, 426 AutocompleteMatchType::URL_WHAT_YOU_TYPED); 427 428 if (destination_url.is_valid()) { 429 match.destination_url = destination_url; 430 431 // Trim off "http://" if the user didn't type it. 432 // NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to 433 // strip the scheme as we need to know the offset so we can adjust the 434 // |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have 435 // slightly different behavior as well (the latter will strip even without 436 // two slashes after the scheme). 437 DCHECK(!trim_http || !AutocompleteInput::HasHTTPScheme(text)); 438 base::string16 display_string( 439 StringForURLDisplay(destination_url, false, false)); 440 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; 441 match.fill_into_edit = 442 AutocompleteInput::FormattedStringWithEquivalentMeaning(destination_url, 443 display_string); 444 match.allowed_to_be_default_match = true; 445 // NOTE: Don't set match.inline_autocompletion to something non-empty here; 446 // it's surprising and annoying. 447 448 // Try to highlight "innermost" match location. If we fix up "w" into 449 // "www.w.com", we want to highlight the fifth character, not the first. 450 // This relies on match.destination_url being the non-prefix-trimmed version 451 // of match.contents. 452 match.contents = display_string; 453 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( 454 base::UTF8ToUTF16(destination_url.spec()), text); 455 // It's possible for match.destination_url to not contain the user's input 456 // at all (so |best_prefix| is NULL), for example if the input is 457 // "view-source:x" and |destination_url| has an inserted "http://" in the 458 // middle. 459 if (best_prefix == NULL) { 460 AutocompleteMatch::ClassifyMatchInString(text, match.contents, 461 ACMatchClassification::URL, 462 &match.contents_class); 463 } else { 464 AutocompleteMatch::ClassifyLocationInString( 465 best_prefix->prefix.length() - offset, text.length(), 466 match.contents.length(), ACMatchClassification::URL, 467 &match.contents_class); 468 } 469 470 match.is_history_what_you_typed_match = true; 471 } 472 473 return match; 474} 475 476// Called on the history thread. 477void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, 478 history::URLDatabase* db, 479 HistoryURLProviderParams* params) { 480 // We may get called with a NULL database if it couldn't be properly 481 // initialized. 482 if (!db) { 483 params->failed = true; 484 } else if (!params->cancel_flag.IsSet()) { 485 base::TimeTicks beginning_time = base::TimeTicks::Now(); 486 487 DoAutocomplete(backend, db, params); 488 489 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", 490 base::TimeTicks::Now() - beginning_time); 491 } 492 493 // Return the results (if any) to the main thread. 494 params->message_loop->PostTask(FROM_HERE, base::Bind( 495 &HistoryURLProvider::QueryComplete, this, params)); 496} 497 498// Used by both autocomplete passes, and therefore called on multiple different 499// threads (though not simultaneously). 500void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, 501 history::URLDatabase* db, 502 HistoryURLProviderParams* params) { 503 VisitClassifier classifier(this, params->input, db); 504 // Create a What You Typed match, which we'll need below. 505 // 506 // We display this to the user when there's a reasonable chance they actually 507 // care: 508 // * Their input can be opened as a URL, and 509 // * We parsed the input as a URL, or it starts with an explicit "http:" or 510 // "https:". 511 // that is when their input can be opened as a URL. 512 // Otherwise, this is just low-quality noise. In the cases where we've parsed 513 // as UNKNOWN, we'll still show an accidental search infobar if need be. 514 bool have_what_you_typed_match = 515 (params->input.type() != AutocompleteInput::QUERY) && 516 ((params->input.type() != AutocompleteInput::UNKNOWN) || 517 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || 518 !params->trim_http || 519 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); 520 AutocompleteMatch what_you_typed_match(SuggestExactInput( 521 params->input.text(), params->input.canonicalized_url(), 522 params->trim_http)); 523 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 524 525 // Get the matching URLs from the DB 526 history::URLRows url_matches; 527 history::HistoryMatches history_matches; 528 529 if (search_url_database_) { 530 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); 531 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); 532 ++i) { 533 if (params->cancel_flag.IsSet()) 534 return; // Canceled in the middle of a query, give up. 535 // We only need kMaxMatches results in the end, but before we 536 // get there we need to promote lower-quality matches that are 537 // prefixes of higher-quality matches, and remove lower-quality 538 // redirects. So we ask for more results than we need, of every 539 // prefix type, in hopes this will give us far more than enough 540 // to work with. CullRedirects() will then reduce the list to 541 // the best kMaxMatches results. 542 db->AutocompleteForPrefix( 543 base::UTF16ToUTF8(i->prefix + params->input.text()), 544 kMaxMatches * 2, 545 (backend == NULL), 546 &url_matches); 547 for (history::URLRows::const_iterator j(url_matches.begin()); 548 j != url_matches.end(); ++j) { 549 const URLPrefix* best_prefix = 550 URLPrefix::BestURLPrefix(base::UTF8ToUTF16(j->url().spec()), 551 base::string16()); 552 DCHECK(best_prefix != NULL); 553 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), 554 i->num_components == 0, 555 i->num_components >= best_prefix->num_components)); 556 } 557 } 558 } 559 560 // Create sorted list of suggestions. 561 CullPoorMatches(*params, &history_matches); 562 SortAndDedupMatches(&history_matches); 563 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, 564 what_you_typed_match, &history_matches); 565 566 // Try to promote a match as an exact/inline autocomplete match. This also 567 // moves it to the front of |history_matches|, so skip over it when 568 // converting the rest of the matches. 569 size_t first_match = 1; 570 size_t exact_suggestion = 0; 571 // Checking |is_history_what_you_typed_match| tells us whether 572 // SuggestExactInput() succeeded in constructing a valid match. 573 if (what_you_typed_match.is_history_what_you_typed_match && 574 (!backend || !params->dont_suggest_exact_input) && 575 FixupExactSuggestion(db, params->input, classifier, &what_you_typed_match, 576 &history_matches)) { 577 // Got an exact match for the user's input. Treat it as the best match 578 // regardless of the input type. 579 exact_suggestion = 1; 580 params->matches.push_back(what_you_typed_match); 581 } else if (params->prevent_inline_autocomplete || 582 history_matches.empty() || 583 !PromoteMatchForInlineAutocomplete(history_matches.front(), params)) { 584 // Failed to promote any URLs for inline autocompletion. Use the What You 585 // Typed match, if we have it. 586 first_match = 0; 587 if (have_what_you_typed_match) 588 params->matches.push_back(what_you_typed_match); 589 } 590 591 // This is the end of the synchronous pass. 592 if (!backend) 593 return; 594 // If search_url_database_ is false, we shouldn't have scheduled a second 595 // pass. 596 DCHECK(search_url_database_); 597 598 // Determine relevancy of highest scoring match, if any. 599 int relevance = -1; 600 for (ACMatches::const_iterator it = params->matches.begin(); 601 it != params->matches.end(); ++it) { 602 relevance = std::max(relevance, it->relevance); 603 } 604 605 if (cull_redirects_) { 606 // Remove redirects and trim list to size. We want to provide up to 607 // kMaxMatches results plus the What You Typed result, if it was added to 608 // |history_matches| above. 609 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); 610 } else { 611 // Simply trim the list to size. 612 if (history_matches.size() > kMaxMatches + exact_suggestion) 613 history_matches.resize(kMaxMatches + exact_suggestion); 614 } 615 616 // Convert the history matches to autocomplete matches. 617 for (size_t i = first_match; i < history_matches.size(); ++i) { 618 const history::HistoryMatch& match = history_matches[i]; 619 DCHECK(!have_what_you_typed_match || 620 (match.url_info.url() != 621 GURL(params->matches.front().destination_url))); 622 // If we've assigned a score already, all later matches score one 623 // less than the previous match. 624 relevance = (relevance > 0) ? (relevance - 1) : 625 CalculateRelevance(NORMAL, history_matches.size() - 1 - i); 626 AutocompleteMatch ac_match = HistoryMatchToACMatch(*params, match, 627 NORMAL, relevance); 628 // The experimental scoring must not change the top result's score. 629 if (!params->matches.empty()) { 630 relevance = CalculateRelevanceScoreUsingScoringParams(match, relevance); 631 ac_match.relevance = relevance; 632 } 633 params->matches.push_back(ac_match); 634 } 635} 636 637// Called on the main thread when the query is complete. 638void HistoryURLProvider::QueryComplete( 639 HistoryURLProviderParams* params_gets_deleted) { 640 // Ensure |params_gets_deleted| gets deleted on exit. 641 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); 642 643 // If the user hasn't already started another query, clear our member pointer 644 // so we can't write into deleted memory. 645 if (params_ == params_gets_deleted) 646 params_ = NULL; 647 648 // Don't send responses for queries that have been canceled. 649 if (params->cancel_flag.IsSet()) 650 return; // Already set done_ when we canceled, no need to set it again. 651 652 // Don't modify |matches_| if the query failed, since it might have a default 653 // match in it, whereas |params->matches| will be empty. 654 if (!params->failed) { 655 matches_.swap(params->matches); 656 UpdateStarredStateOfMatches(); 657 } 658 659 done_ = true; 660 listener_->OnProviderUpdate(true); 661} 662 663HistoryURLProvider::~HistoryURLProvider() { 664 // Note: This object can get leaked on shutdown if there are pending 665 // requests on the database (which hold a reference to us). Normally, these 666 // messages get flushed for each thread. We do a round trip from main, to 667 // history, back to main while holding a reference. If the main thread 668 // completes before the history thread, the message to delegate back to the 669 // main thread will not run and the reference will leak. Therefore, don't do 670 // anything on destruction. 671} 672 673int HistoryURLProvider::CalculateRelevance(MatchType match_type, 674 size_t match_number) const { 675 switch (match_type) { 676 case INLINE_AUTOCOMPLETE: 677 return kScoreForBestInlineableResult; 678 679 case UNVISITED_INTRANET: 680 return kScoreForUnvisitedIntranetResult; 681 682 case WHAT_YOU_TYPED: 683 return kScoreForWhatYouTypedResult; 684 685 default: // NORMAL 686 return kBaseScoreForNonInlineableResult + 687 static_cast<int>(match_number); 688 } 689} 690 691void HistoryURLProvider::RunAutocompletePasses( 692 const AutocompleteInput& input, 693 bool fixup_input_and_run_pass_1) { 694 matches_.clear(); 695 696 if ((input.type() == AutocompleteInput::INVALID) || 697 (input.type() == AutocompleteInput::FORCED_QUERY)) 698 return; 699 700 // Create a match for exactly what the user typed. This will only be used as 701 // a fallback in case we can't get the history service or URL DB; otherwise, 702 // we'll run this again in DoAutocomplete() and use that result instead. 703 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()); 704 // Don't do this for queries -- while we can sometimes mark up a match for 705 // this, it's not what the user wants, and just adds noise. 706 if (input.type() != AutocompleteInput::QUERY) { 707 AutocompleteMatch what_you_typed(SuggestExactInput( 708 input.text(), input.canonicalized_url(), trim_http)); 709 what_you_typed.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 710 matches_.push_back(what_you_typed); 711 } 712 713 // We'll need the history service to run both passes, so try to obtain it. 714 if (!profile_) 715 return; 716 HistoryService* const history_service = 717 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 718 if (!history_service) 719 return; 720 721 // Get the default search provider and search terms data now since we have to 722 // retrieve these on the UI thread, and the second pass runs on the history 723 // thread. |template_url_service| can be NULL when testing. 724 TemplateURLService* template_url_service = 725 TemplateURLServiceFactory::GetForProfile(profile_); 726 TemplateURL* default_search_provider = template_url_service ? 727 template_url_service->GetDefaultSearchProvider() : NULL; 728 UIThreadSearchTermsData data(profile_); 729 730 // Create the data structure for the autocomplete passes. We'll save this off 731 // onto the |params_| member for later deletion below if we need to run pass 732 // 2. 733 scoped_ptr<HistoryURLProviderParams> params( 734 new HistoryURLProviderParams( 735 input, trim_http, 736 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages), 737 default_search_provider, data)); 738 739 params->prevent_inline_autocomplete = 740 PreventInlineAutocomplete(input); 741 742 if (fixup_input_and_run_pass_1) { 743 // Do some fixup on the user input before matching against it, so we provide 744 // good results for local file paths, input with spaces, etc. 745 if (!FixupUserInput(¶ms->input)) 746 return; 747 748 // Pass 1: Get the in-memory URL database, and use it to find and promote 749 // the inline autocomplete match, if any. 750 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 751 // url_db can be NULL if it hasn't finished initializing (or failed to 752 // initialize). In this case all we can do is fall back on the second 753 // pass. 754 // 755 // TODO(pkasting): We should just block here until this loads. Any time 756 // someone unloads the history backend, we'll get inconsistent inline 757 // autocomplete behavior here. 758 if (url_db) { 759 DoAutocomplete(NULL, url_db, params.get()); 760 // params->matches now has the matches we should expose to the provider. 761 // Pass 2 expects a "clean slate" set of matches. 762 matches_.clear(); 763 matches_.swap(params->matches); 764 UpdateStarredStateOfMatches(); 765 } 766 } 767 768 // Pass 2: Ask the history service to call us back on the history thread, 769 // where we can read the full on-disk DB. 770 if (search_url_database_ && input.want_asynchronous_matches()) { 771 done_ = false; 772 params_ = params.release(); // This object will be destroyed in 773 // QueryComplete() once we're done with it. 774 history_service->ScheduleAutocomplete(this, params_); 775 } 776} 777 778bool HistoryURLProvider::FixupExactSuggestion( 779 history::URLDatabase* db, 780 const AutocompleteInput& input, 781 const VisitClassifier& classifier, 782 AutocompleteMatch* match, 783 history::HistoryMatches* matches) const { 784 DCHECK(match != NULL); 785 DCHECK(matches != NULL); 786 787 MatchType type = INLINE_AUTOCOMPLETE; 788 switch (classifier.type()) { 789 case VisitClassifier::INVALID: 790 return false; 791 case VisitClassifier::UNVISITED_INTRANET: 792 type = UNVISITED_INTRANET; 793 break; 794 default: 795 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); 796 // We have data for this match, use it. 797 match->deletable = true; 798 match->description = classifier.url_row().title(); 799 RecordAdditionalInfoFromUrlRow(classifier.url_row(), match); 800 match->description_class = 801 ClassifyDescription(input.text(), match->description); 802 if (!classifier.url_row().typed_count()) { 803 // If we reach here, we must be in the second pass, and we must not have 804 // this row's data available during the first pass. That means we 805 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to 806 // maintain the ordering between passes consistent, we need to score it 807 // the same way here. 808 type = CanFindIntranetURL(db, input) ? 809 UNVISITED_INTRANET : WHAT_YOU_TYPED; 810 } 811 break; 812 } 813 814 const GURL& url = match->destination_url; 815 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 816 // If the what-you-typed result looks like a single word (which can be 817 // interpreted as an intranet address) followed by a pound sign ("#"), 818 // leave the score for the url-what-you-typed result as is. It will be 819 // outscored by a search query from the SearchProvider. This test fixes 820 // cases such as "c#" and "c# foo" where the user has visited an intranet 821 // site "c". We want the search-what-you-typed score to beat the 822 // URL-what-you-typed score in this case. Most of the below test tries to 823 // make sure that this code does not trigger if the user did anything to 824 // indicate the desired match is a URL. For instance, "c/# foo" will not 825 // pass the test because that will be classified as input type URL. The 826 // parsed.CountCharactersBefore() in the test looks for the presence of a 827 // reference fragment in the URL by checking whether the position differs 828 // included the delimiter (pound sign) versus not including the delimiter. 829 // (One cannot simply check url.ref() because it will not distinguish 830 // between the input "c" and the input "c#", both of which will have empty 831 // reference fragments.) 832 if ((type == UNVISITED_INTRANET) && 833 (input.type() != AutocompleteInput::URL) && url.username().empty() && 834 url.password().empty() && url.port().empty() && (url.path() == "/") && 835 url.query().empty() && 836 (parsed.CountCharactersBefore(url::Parsed::REF, true) != 837 parsed.CountCharactersBefore(url::Parsed::REF, false))) { 838 return false; 839 } 840 841 match->relevance = CalculateRelevance(type, 0); 842 843 // If there are any other matches, then don't promote this match here, in 844 // hopes the caller will be able to inline autocomplete a better suggestion. 845 // DoAutocomplete() will fall back on this match if inline autocompletion 846 // fails. This matches how we react to never-visited URL inputs in the non- 847 // intranet case. 848 if (type == UNVISITED_INTRANET && !matches->empty()) 849 return false; 850 851 // Put it on the front of the HistoryMatches for redirect culling. 852 CreateOrPromoteMatch(classifier.url_row(), base::string16::npos, false, 853 matches, true, true); 854 return true; 855} 856 857bool HistoryURLProvider::CanFindIntranetURL( 858 history::URLDatabase* db, 859 const AutocompleteInput& input) const { 860 // Normally passing the first two conditions below ought to guarantee the 861 // third condition, but because FixupUserInput() can run and modify the 862 // input's text and parts between Parse() and here, it seems better to be 863 // paranoid and check. 864 if ((input.type() != AutocompleteInput::UNKNOWN) || 865 !LowerCaseEqualsASCII(input.scheme(), content::kHttpScheme) || 866 !input.parts().host.is_nonempty()) 867 return false; 868 const std::string host(base::UTF16ToUTF8( 869 input.text().substr(input.parts().host.begin, input.parts().host.len))); 870 const size_t registry_length = 871 net::registry_controlled_domains::GetRegistryLength( 872 host, 873 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 874 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 875 return registry_length == 0 && db->IsTypedHost(host); 876} 877 878bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( 879 const history::HistoryMatch& match, 880 HistoryURLProviderParams* params) { 881 // Promote the first match if it's been marked for promotion or typed at least 882 // n times, where n == 1 for "simple" (host-only) URLs and n == 2 for others. 883 // We set a higher bar for these long URLs because it's less likely that users 884 // will want to visit them again. Even though we don't increment the 885 // typed_count for pasted-in URLs, if the user manually edits the URL or types 886 // some long thing in by hand, we wouldn't want to immediately start 887 // autocompleting it. 888 if (!match.promoted && 889 (!match.url_info.typed_count() || 890 ((match.url_info.typed_count() == 1) && 891 !match.IsHostOnly()))) 892 return false; 893 894 // In the case where the user has typed "foo.com" and visited (but not typed) 895 // "foo/", and the input is "foo", we can reach here for "foo.com" during the 896 // first pass but have the second pass suggest the exact input as a better 897 // URL. Since we need both passes to agree, and since during the first pass 898 // there's no way to know about "foo/", make reaching this point prevent any 899 // future pass from suggesting the exact input as a better match. 900 if (params) { 901 params->dont_suggest_exact_input = true; 902 AutocompleteMatch ac_match = HistoryMatchToACMatch( 903 *params, match, INLINE_AUTOCOMPLETE, 904 CalculateRelevance(INLINE_AUTOCOMPLETE, 0)); 905 params->matches.push_back(ac_match); 906 } 907 return true; 908} 909 910// See if a shorter version of the best match should be created, and if so place 911// it at the front of |matches|. This can suggest history URLs that are 912// prefixes of the best match (if they've been visited enough, compared to the 913// best match), or create host-only suggestions even when they haven't been 914// visited before: if the user visited http://example.com/asdf once, we'll 915// suggest http://example.com/ even if they've never been to it. 916void HistoryURLProvider::PromoteOrCreateShorterSuggestion( 917 history::URLDatabase* db, 918 const HistoryURLProviderParams& params, 919 bool have_what_you_typed_match, 920 const AutocompleteMatch& what_you_typed_match, 921 history::HistoryMatches* matches) { 922 if (matches->empty()) 923 return; // No matches, nothing to do. 924 925 // Determine the base URL from which to search, and whether that URL could 926 // itself be added as a match. We can add the base iff it's not "effectively 927 // the same" as any "what you typed" match. 928 const history::HistoryMatch& match = matches->front(); 929 GURL search_base = ConvertToHostOnly(match, params.input.text()); 930 bool can_add_search_base_to_matches = !have_what_you_typed_match; 931 if (search_base.is_empty()) { 932 // Search from what the user typed when we couldn't reduce the best match 933 // to a host. Careful: use a substring of |match| here, rather than the 934 // first match in |params|, because they might have different prefixes. If 935 // the user typed "google.com", |what_you_typed_match| will hold 936 // "http://google.com/", but |match| might begin with 937 // "http://www.google.com/". 938 // TODO: this should be cleaned up, and is probably incorrect for IDN. 939 std::string new_match = match.url_info.url().possibly_invalid_spec(). 940 substr(0, match.input_location + params.input.text().length()); 941 search_base = GURL(new_match); 942 // TODO(mrossetti): There is a degenerate case where the following may 943 // cause a failure: http://www/~someword/fubar.html. Diagnose. 944 // See: http://crbug.com/50101 945 if (search_base.is_empty()) 946 return; // Can't construct a valid URL from which to start a search. 947 } else if (!can_add_search_base_to_matches) { 948 can_add_search_base_to_matches = 949 (search_base != what_you_typed_match.destination_url); 950 } 951 if (search_base == match.url_info.url()) 952 return; // Couldn't shorten |match|, so no range of URLs to search over. 953 954 // Search the DB for short URLs between our base and |match|. 955 history::URLRow info(search_base); 956 bool promote = true; 957 // A short URL is only worth suggesting if it's been visited at least a third 958 // as often as the longer URL. 959 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; 960 // For stability between the in-memory and on-disk autocomplete passes, when 961 // the long URL has been typed before, only suggest shorter URLs that have 962 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL 963 // (which hasn't been typed) that the in-memory pass doesn't know about, 964 // thereby making the top match, and thus the behavior of inline 965 // autocomplete, unstable. 966 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; 967 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), 968 match.url_info.url().possibly_invalid_spec(), min_visit_count, 969 min_typed_count, can_add_search_base_to_matches, &info)) { 970 if (!can_add_search_base_to_matches) 971 return; // Couldn't find anything and can't add the search base, bail. 972 973 // Try to get info on the search base itself. Promote it to the top if the 974 // original best match isn't good enough to autocomplete. 975 db->GetRowForURL(search_base, &info); 976 promote = match.url_info.typed_count() <= 1; 977 } 978 979 // Promote or add the desired URL to the list of matches. 980 bool ensure_can_inline = 981 promote && PromoteMatchForInlineAutocomplete(match, NULL); 982 ensure_can_inline &= CreateOrPromoteMatch(info, match.input_location, 983 match.match_in_scheme, matches, create_shorter_match_, promote); 984 if (ensure_can_inline) 985 matches->front().promoted = true; 986} 987 988void HistoryURLProvider::CullPoorMatches( 989 const HistoryURLProviderParams& params, 990 history::HistoryMatches* matches) const { 991 const base::Time& threshold(history::AutocompleteAgeThreshold()); 992 for (history::HistoryMatches::iterator i(matches->begin()); 993 i != matches->end(); ) { 994 if (RowQualifiesAsSignificant(i->url_info, threshold) && 995 !(params.default_search_provider && 996 params.default_search_provider->IsSearchURLUsingTermsData( 997 i->url_info.url(), *params.search_terms_data.get()))) { 998 ++i; 999 } else { 1000 i = matches->erase(i); 1001 } 1002 } 1003} 1004 1005void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, 1006 history::HistoryMatches* matches, 1007 size_t max_results) const { 1008 for (size_t source = 0; 1009 (source < matches->size()) && (source < max_results); ) { 1010 const GURL& url = (*matches)[source].url_info.url(); 1011 // TODO(brettw) this should go away when everything uses GURL. 1012 history::RedirectList redirects; 1013 backend->GetMostRecentRedirectsFrom(url, &redirects); 1014 if (!redirects.empty()) { 1015 // Remove all but the first occurrence of any of these redirects in the 1016 // search results. We also must add the URL we queried for, since it may 1017 // not be the first match and we'd want to remove it. 1018 // 1019 // For example, when A redirects to B and our matches are [A, X, B], 1020 // we'll get B as the redirects from, and we want to remove the second 1021 // item of that pair, removing B. If A redirects to B and our matches are 1022 // [B, X, A], we'll want to remove A instead. 1023 redirects.push_back(url); 1024 source = RemoveSubsequentMatchesOf(matches, source, redirects); 1025 } else { 1026 // Advance to next item. 1027 source++; 1028 } 1029 } 1030 1031 if (matches->size() > max_results) 1032 matches->resize(max_results); 1033} 1034 1035size_t HistoryURLProvider::RemoveSubsequentMatchesOf( 1036 history::HistoryMatches* matches, 1037 size_t source_index, 1038 const std::vector<GURL>& remove) const { 1039 size_t next_index = source_index + 1; // return value = item after source 1040 1041 // Find the first occurrence of any URL in the redirect chain. We want to 1042 // keep this one since it is rated the highest. 1043 history::HistoryMatches::iterator first(std::find_first_of( 1044 matches->begin(), matches->end(), remove.begin(), remove.end(), 1045 history::HistoryMatch::EqualsGURL)); 1046 DCHECK(first != matches->end()) << "We should have always found at least the " 1047 "original URL."; 1048 1049 // Find any following occurrences of any URL in the redirect chain, these 1050 // should be deleted. 1051 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, 1052 matches->end(), remove.begin(), remove.end(), 1053 history::HistoryMatch::EqualsGURL)); 1054 next != matches->end(); next = std::find_first_of(next, matches->end(), 1055 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { 1056 // Remove this item. When we remove an item before the source index, we 1057 // need to shift it to the right and remember that so we can return it. 1058 next = matches->erase(next); 1059 if (static_cast<size_t>(next - matches->begin()) < next_index) 1060 --next_index; 1061 } 1062 return next_index; 1063} 1064 1065AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( 1066 const HistoryURLProviderParams& params, 1067 const history::HistoryMatch& history_match, 1068 MatchType match_type, 1069 int relevance) { 1070 const history::URLRow& info = history_match.url_info; 1071 AutocompleteMatch match(this, relevance, 1072 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); 1073 match.typed_count = info.typed_count(); 1074 match.destination_url = info.url(); 1075 DCHECK(match.destination_url.is_valid()); 1076 size_t inline_autocomplete_offset = 1077 history_match.input_location + params.input.text().length(); 1078 std::string languages = (match_type == WHAT_YOU_TYPED) ? 1079 std::string() : params.languages; 1080 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 1081 ~((params.trim_http && !history_match.match_in_scheme) ? 1082 0 : net::kFormatUrlOmitHTTP); 1083 match.fill_into_edit = 1084 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 1085 net::FormatUrl(info.url(), languages, format_types, 1086 net::UnescapeRule::SPACES, NULL, NULL, 1087 &inline_autocomplete_offset)); 1088 if (!params.prevent_inline_autocomplete && 1089 (inline_autocomplete_offset != base::string16::npos)) { 1090 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length()); 1091 match.inline_autocompletion = 1092 match.fill_into_edit.substr(inline_autocomplete_offset); 1093 } 1094 // The latter part of the test effectively asks "is the inline completion 1095 // empty?" (i.e., is this match effectively the what-you-typed match?). 1096 match.allowed_to_be_default_match = !params.prevent_inline_autocomplete || 1097 ((inline_autocomplete_offset != base::string16::npos) && 1098 (inline_autocomplete_offset >= match.fill_into_edit.length())); 1099 1100 size_t match_start = history_match.input_location; 1101 match.contents = net::FormatUrl(info.url(), languages, 1102 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); 1103 if ((match_start != base::string16::npos) && 1104 (inline_autocomplete_offset != base::string16::npos) && 1105 (inline_autocomplete_offset != match_start)) { 1106 DCHECK(inline_autocomplete_offset > match_start); 1107 AutocompleteMatch::ClassifyLocationInString(match_start, 1108 inline_autocomplete_offset - match_start, match.contents.length(), 1109 ACMatchClassification::URL, &match.contents_class); 1110 } else { 1111 AutocompleteMatch::ClassifyLocationInString(base::string16::npos, 0, 1112 match.contents.length(), ACMatchClassification::URL, 1113 &match.contents_class); 1114 } 1115 match.description = info.title(); 1116 match.description_class = 1117 ClassifyDescription(params.input.text(), match.description); 1118 RecordAdditionalInfoFromUrlRow(info, &match); 1119 return match; 1120} 1121 1122int HistoryURLProvider::CalculateRelevanceScoreUsingScoringParams( 1123 const history::HistoryMatch& match, 1124 int old_relevance) const { 1125 if (!scoring_params_.experimental_scoring_enabled) 1126 return old_relevance; 1127 1128 const base::TimeDelta time_since_last_visit = 1129 base::Time::Now() - match.url_info.last_visit(); 1130 1131 int relevance = CalculateRelevanceUsingScoreBuckets( 1132 scoring_params_.typed_count_buckets, time_since_last_visit, old_relevance, 1133 match.url_info.typed_count()); 1134 1135 // Additional demotion (on top of typed_count demotion) of URLs that were 1136 // never typed. 1137 if (match.url_info.typed_count() == 0) { 1138 relevance = CalculateRelevanceUsingScoreBuckets( 1139 scoring_params_.visited_count_buckets, time_since_last_visit, relevance, 1140 match.url_info.visit_count()); 1141 } 1142 1143 DCHECK_LE(relevance, old_relevance); 1144 return relevance; 1145} 1146 1147// static 1148ACMatchClassifications HistoryURLProvider::ClassifyDescription( 1149 const base::string16& input_text, 1150 const base::string16& description) { 1151 base::string16 clean_description = bookmark_utils::CleanUpTitleForMatching( 1152 description); 1153 history::TermMatches description_matches(SortAndDeoverlapMatches( 1154 history::MatchTermInString(input_text, clean_description, 0))); 1155 history::WordStarts description_word_starts; 1156 history::String16VectorFromString16( 1157 clean_description, false, &description_word_starts); 1158 // If HistoryURL retrieves any matches (and hence we reach this code), we 1159 // are guaranteed that the beginning of input_text must be a word break. 1160 history::WordStarts offsets(1, 0u); 1161 description_matches = 1162 history::ScoredHistoryMatch::FilterTermMatchesByWordStarts( 1163 description_matches, offsets, description_word_starts, 0, 1164 std::string::npos); 1165 return SpansFromTermMatch( 1166 description_matches, clean_description.length(), false); 1167} 1168