history_url_provider.cc revision 46d4c2bc3267f3f028f39e7e311b0f89aba2e4fd
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_url_provider.h" 6 7#include <algorithm> 8 9#include "base/basictypes.h" 10#include "base/bind.h" 11#include "base/command_line.h" 12#include "base/message_loop/message_loop.h" 13#include "base/metrics/histogram.h" 14#include "base/prefs/pref_service.h" 15#include "base/strings/string_util.h" 16#include "base/strings/utf_string_conversions.h" 17#include "base/time/time.h" 18#include "chrome/browser/autocomplete/autocomplete_match.h" 19#include "chrome/browser/autocomplete/autocomplete_provider_listener.h" 20#include "chrome/browser/autocomplete/autocomplete_result.h" 21#include "chrome/browser/history/history_backend.h" 22#include "chrome/browser/history/history_database.h" 23#include "chrome/browser/history/history_service.h" 24#include "chrome/browser/history/history_service_factory.h" 25#include "chrome/browser/history/history_types.h" 26#include "chrome/browser/history/in_memory_url_index_types.h" 27#include "chrome/browser/history/scored_history_match.h" 28#include "chrome/browser/omnibox/omnibox_field_trial.h" 29#include "chrome/browser/profiles/profile.h" 30#include "chrome/browser/search_engines/template_url_service.h" 31#include "chrome/browser/search_engines/template_url_service_factory.h" 32#include "chrome/common/chrome_switches.h" 33#include "chrome/common/net/url_fixer_upper.h" 34#include "chrome/common/pref_names.h" 35#include "chrome/common/url_constants.h" 36#include "components/bookmarks/browser/bookmark_utils.h" 37#include "net/base/net_util.h" 38#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 39#include "url/gurl.h" 40#include "url/url_parse.h" 41#include "url/url_util.h" 42 43namespace { 44 45// If |create_if_necessary| is true, ensures that |matches| contains an 46// entry for |info|, creating a new such entry if necessary (using 47// |input_location| and |match_in_scheme|). 48// 49// If |promote| is true, this also ensures the entry is the first element in 50// |matches|, moving or adding it to the front as appropriate. When |promote| 51// is false, existing matches are left in place, and newly added matches are 52// placed at the back. 53// 54// It's OK to call this function with both |create_if_necessary| and 55// |promote| false, in which case we'll do nothing. 56// 57// Returns whether the match exists regardless if it was promoted/created. 58bool CreateOrPromoteMatch(const history::URLRow& info, 59 size_t input_location, 60 bool match_in_scheme, 61 history::HistoryMatches* matches, 62 bool create_if_necessary, 63 bool promote) { 64 // |matches| may already have an entry for this. 65 for (history::HistoryMatches::iterator i(matches->begin()); 66 i != matches->end(); ++i) { 67 if (i->url_info.url() == info.url()) { 68 // Rotate it to the front if the caller wishes. 69 if (promote) 70 std::rotate(matches->begin(), i, i + 1); 71 return true; 72 } 73 } 74 75 if (!create_if_necessary) 76 return false; 77 78 // No entry, so create one. 79 history::HistoryMatch match(info, input_location, match_in_scheme, true); 80 if (promote) 81 matches->push_front(match); 82 else 83 matches->push_back(match); 84 85 return true; 86} 87 88// Given the user's |input| and a |match| created from it, reduce the match's 89// URL to just a host. If this host still matches the user input, return it. 90// Returns the empty string on failure. 91GURL ConvertToHostOnly(const history::HistoryMatch& match, 92 const base::string16& input) { 93 // See if we should try to do host-only suggestions for this URL. Nonstandard 94 // schemes means there's no authority section, so suggesting the host name 95 // is useless. File URLs are standard, but host suggestion is not useful for 96 // them either. 97 const GURL& url = match.url_info.url(); 98 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) 99 return GURL(); 100 101 // Transform to a host-only match. Bail if the host no longer matches the 102 // user input (e.g. because the user typed more than just a host). 103 GURL host = url.GetWithEmptyPath(); 104 if ((host.spec().length() < (match.input_location + input.length()))) 105 return GURL(); // User typing is longer than this host suggestion. 106 107 const base::string16 spec = base::UTF8ToUTF16(host.spec()); 108 if (spec.compare(match.input_location, input.length(), input)) 109 return GURL(); // User typing is no longer a prefix. 110 111 return host; 112} 113 114// Acts like the > operator for URLInfo classes. 115bool CompareHistoryMatch(const history::HistoryMatch& a, 116 const history::HistoryMatch& b) { 117 // A promoted match is better than non-promoted. 118 if (a.promoted != b.promoted) 119 return a.promoted; 120 121 // A URL that has been typed at all is better than one that has never been 122 // typed. (Note "!"s on each side) 123 if (!a.url_info.typed_count() != !b.url_info.typed_count()) 124 return a.url_info.typed_count() > b.url_info.typed_count(); 125 126 // Innermost matches (matches after any scheme or "www.") are better than 127 // non-innermost matches. 128 if (a.innermost_match != b.innermost_match) 129 return a.innermost_match; 130 131 // URLs that have been typed more often are better. 132 if (a.url_info.typed_count() != b.url_info.typed_count()) 133 return a.url_info.typed_count() > b.url_info.typed_count(); 134 135 // For URLs that have each been typed once, a host (alone) is better than a 136 // page inside. 137 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) 138 return a.IsHostOnly(); 139 140 // URLs that have been visited more often are better. 141 if (a.url_info.visit_count() != b.url_info.visit_count()) 142 return a.url_info.visit_count() > b.url_info.visit_count(); 143 144 // URLs that have been visited more recently are better. 145 return a.url_info.last_visit() > b.url_info.last_visit(); 146} 147 148// Sorts and dedups the given list of matches. 149void SortAndDedupMatches(history::HistoryMatches* matches) { 150 // Sort by quality, best first. 151 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); 152 153 // Remove duplicate matches (caused by the search string appearing in one of 154 // the prefixes as well as after it). Consider the following scenario: 155 // 156 // User has visited "http://http.com" once and "http://htaccess.com" twice. 157 // User types "http". The autocomplete search with prefix "http://" returns 158 // the first host, while the search with prefix "" returns both hosts. Now 159 // we sort them into rank order: 160 // http://http.com (innermost_match) 161 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) 162 // http://http.com (!innermost_match, url_info.visit_count == 1) 163 // 164 // The above scenario tells us we can't use std::unique(), since our 165 // duplicates are not always sequential. It also tells us we should remove 166 // the lower-quality duplicate(s), since otherwise the returned results won't 167 // be ordered correctly. This is easy to do: we just always remove the later 168 // element of a duplicate pair. 169 // Be careful! Because the vector contents may change as we remove elements, 170 // we use an index instead of an iterator in the outer loop, and don't 171 // precalculate the ending position. 172 for (size_t i = 0; i < matches->size(); ++i) { 173 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); 174 j != matches->end(); ) { 175 if ((*matches)[i].url_info.url() == j->url_info.url()) 176 j = matches->erase(j); 177 else 178 ++j; 179 } 180 } 181} 182 183// Extracts typed_count, visit_count, and last_visited time from the 184// URLRow and puts them in the additional info field of the |match| 185// for display in about:omnibox. 186void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, 187 AutocompleteMatch* match) { 188 match->RecordAdditionalInfo("typed count", info.typed_count()); 189 match->RecordAdditionalInfo("visit count", info.visit_count()); 190 match->RecordAdditionalInfo("last visit", info.last_visit()); 191} 192 193// Calculates a new relevance score applying half-life time decaying to |count| 194// using |time_since_last_visit| and |score_buckets|. 195// This function will never return a score higher than |undecayed_relevance|. 196// In other words, it can only demote the old score. 197double CalculateRelevanceUsingScoreBuckets( 198 const HUPScoringParams::ScoreBuckets& score_buckets, 199 const base::TimeDelta& time_since_last_visit, 200 int undecayed_relevance, 201 int count) { 202 // Back off if above relevance cap. 203 if ((score_buckets.relevance_cap() != -1) && 204 (undecayed_relevance >= score_buckets.relevance_cap())) 205 return undecayed_relevance; 206 207 // Time based decay using half-life time. 208 double decayed_count = count; 209 if (decayed_count > 0) 210 decayed_count *= score_buckets.HalfLifeTimeDecay(time_since_last_visit); 211 212 // Find a threshold where decayed_count >= bucket. 213 const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket = NULL; 214 for (size_t i = 0; i < score_buckets.buckets().size(); ++i) { 215 score_bucket = &score_buckets.buckets()[i]; 216 if (decayed_count >= score_bucket->first) 217 break; // Buckets are in descending order, so we can ignore the rest. 218 } 219 220 return (score_bucket && (undecayed_relevance > score_bucket->second)) ? 221 score_bucket->second : undecayed_relevance; 222} 223 224} // namespace 225 226// ----------------------------------------------------------------- 227// SearchTermsDataSnapshot 228 229// Implementation of SearchTermsData that takes a snapshot of another 230// SearchTermsData by copying all the responses to the different getters into 231// member strings, then returning those strings when its own getters are called. 232// This will typically be constructed on the UI thread from 233// UIThreadSearchTermsData but is subsequently safe to use on any thread. 234class SearchTermsDataSnapshot : public SearchTermsData { 235 public: 236 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); 237 virtual ~SearchTermsDataSnapshot(); 238 239 virtual std::string GoogleBaseURLValue() const OVERRIDE; 240 virtual std::string GetApplicationLocale() const OVERRIDE; 241 virtual base::string16 GetRlzParameterValue( 242 bool from_app_list) const OVERRIDE; 243 virtual std::string GetSearchClient() const OVERRIDE; 244 virtual std::string NTPIsThemedParam() const OVERRIDE; 245 246 private: 247 std::string google_base_url_value_; 248 std::string application_locale_; 249 base::string16 rlz_parameter_value_; 250 std::string search_client_; 251 std::string ntp_is_themed_param_; 252 253 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); 254}; 255 256SearchTermsDataSnapshot::SearchTermsDataSnapshot( 257 const SearchTermsData& search_terms_data) 258 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), 259 application_locale_(search_terms_data.GetApplicationLocale()), 260 rlz_parameter_value_(search_terms_data.GetRlzParameterValue(false)), 261 search_client_(search_terms_data.GetSearchClient()), 262 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()) {} 263 264SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { 265} 266 267std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { 268 return google_base_url_value_; 269} 270 271std::string SearchTermsDataSnapshot::GetApplicationLocale() const { 272 return application_locale_; 273} 274 275base::string16 SearchTermsDataSnapshot::GetRlzParameterValue( 276 bool from_app_list) const { 277 return rlz_parameter_value_; 278} 279 280std::string SearchTermsDataSnapshot::GetSearchClient() const { 281 return search_client_; 282} 283 284std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { 285 return ntp_is_themed_param_; 286} 287 288// ----------------------------------------------------------------- 289// HistoryURLProvider 290 291// These ugly magic numbers will go away once we switch all scoring 292// behavior (including URL-what-you-typed) to HistoryQuick provider. 293const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; 294const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; 295const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; 296const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; 297 298// VisitClassifier is used to classify the type of visit to a particular url. 299class HistoryURLProvider::VisitClassifier { 300 public: 301 enum Type { 302 INVALID, // Navigations to the URL are not allowed. 303 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but 304 // which is known to refer to a visited intranet host. 305 VISITED, // The site has been previously visited. 306 }; 307 308 VisitClassifier(HistoryURLProvider* provider, 309 const AutocompleteInput& input, 310 history::URLDatabase* db); 311 312 // Returns the type of visit for the specified input. 313 Type type() const { return type_; } 314 315 // Returns the URLRow for the visit. 316 const history::URLRow& url_row() const { return url_row_; } 317 318 private: 319 HistoryURLProvider* provider_; 320 history::URLDatabase* db_; 321 Type type_; 322 history::URLRow url_row_; 323 324 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); 325}; 326 327HistoryURLProvider::VisitClassifier::VisitClassifier( 328 HistoryURLProvider* provider, 329 const AutocompleteInput& input, 330 history::URLDatabase* db) 331 : provider_(provider), 332 db_(db), 333 type_(INVALID) { 334 const GURL& url = input.canonicalized_url(); 335 // Detect email addresses. These cases will look like "http://user@site/", 336 // and because the history backend strips auth creds, we'll get a bogus exact 337 // match below if the user has visited "site". 338 if (!url.is_valid() || 339 ((input.type() == AutocompleteInput::UNKNOWN) && 340 input.parts().username.is_nonempty() && 341 !input.parts().password.is_nonempty() && 342 !input.parts().path.is_nonempty())) 343 return; 344 345 if (db_->GetRowForURL(url, &url_row_)) { 346 type_ = VISITED; 347 return; 348 } 349 350 if (provider_->CanFindIntranetURL(db_, input)) { 351 // The user typed an intranet hostname that they've visited (albeit with a 352 // different port and/or path) before. 353 url_row_ = history::URLRow(url); 354 type_ = UNVISITED_INTRANET; 355 } 356} 357 358HistoryURLProviderParams::HistoryURLProviderParams( 359 const AutocompleteInput& input, 360 bool trim_http, 361 const std::string& languages, 362 TemplateURL* default_search_provider, 363 const SearchTermsData& search_terms_data) 364 : message_loop(base::MessageLoop::current()), 365 input(input), 366 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), 367 trim_http(trim_http), 368 failed(false), 369 languages(languages), 370 dont_suggest_exact_input(false), 371 default_search_provider(default_search_provider ? 372 new TemplateURL(default_search_provider->profile(), 373 default_search_provider->data()) : NULL), 374 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { 375} 376 377HistoryURLProviderParams::~HistoryURLProviderParams() { 378} 379 380HistoryURLProvider::HistoryURLProvider(AutocompleteProviderListener* listener, 381 Profile* profile) 382 : HistoryProvider(listener, profile, 383 AutocompleteProvider::TYPE_HISTORY_URL), 384 params_(NULL), 385 cull_redirects_( 386 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrial() || 387 !OmniboxFieldTrial::InHUPCullRedirectsFieldTrialExperimentGroup()), 388 create_shorter_match_( 389 !OmniboxFieldTrial::InHUPCreateShorterMatchFieldTrial() || 390 !OmniboxFieldTrial:: 391 InHUPCreateShorterMatchFieldTrialExperimentGroup()), 392 search_url_database_(true) { 393 // Initialize HUP scoring params based on the current experiment. 394 OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_); 395} 396 397void HistoryURLProvider::Start(const AutocompleteInput& input, 398 bool minimal_changes) { 399 // NOTE: We could try hard to do less work in the |minimal_changes| case 400 // here; some clever caching would let us reuse the raw matches from the 401 // history DB without re-querying. However, we'd still have to go back to 402 // the history thread to mark these up properly, and if pass 2 is currently 403 // running, we'd need to wait for it to return to the main thread before 404 // doing this (we can't just write new data for it to read due to thread 405 // safety issues). At that point it's just as fast, and easier, to simply 406 // re-run the query from scratch and ignore |minimal_changes|. 407 408 // Cancel any in-progress query. 409 Stop(false); 410 411 matches_.clear(); 412 413 if ((input.type() == AutocompleteInput::INVALID) || 414 (input.type() == AutocompleteInput::FORCED_QUERY)) 415 return; 416 417 // Create a match for exactly what the user typed. This will only be used as 418 // a fallback in case we can't get the history service or URL DB; otherwise, 419 // we'll run this again in DoAutocomplete() and use that result instead. 420 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()); 421 // Don't do this for queries -- while we can sometimes mark up a match for 422 // this, it's not what the user wants, and just adds noise. 423 if (input.type() != AutocompleteInput::QUERY) { 424 AutocompleteMatch what_you_typed(SuggestExactInput( 425 input.text(), input.canonicalized_url(), trim_http)); 426 what_you_typed.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 427 matches_.push_back(what_you_typed); 428 } 429 430 // We'll need the history service to run both passes, so try to obtain it. 431 if (!profile_) 432 return; 433 HistoryService* const history_service = 434 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 435 if (!history_service) 436 return; 437 438 // Get the default search provider and search terms data now since we have to 439 // retrieve these on the UI thread, and the second pass runs on the history 440 // thread. |template_url_service| can be NULL when testing. 441 TemplateURLService* template_url_service = 442 TemplateURLServiceFactory::GetForProfile(profile_); 443 TemplateURL* default_search_provider = template_url_service ? 444 template_url_service->GetDefaultSearchProvider() : NULL; 445 UIThreadSearchTermsData data(profile_); 446 447 // Do some fixup on the user input before matching against it, so we provide 448 // good results for local file paths, input with spaces, etc. 449 const FixupReturn fixup_return(FixupUserInput(input)); 450 if (!fixup_return.first) 451 return; 452 url::Parsed parts; 453 URLFixerUpper::SegmentURL(fixup_return.second, &parts); 454 AutocompleteInput fixed_up_input(input); 455 fixed_up_input.UpdateText(fixup_return.second, base::string16::npos, parts); 456 457 // Create the data structure for the autocomplete passes. We'll save this off 458 // onto the |params_| member for later deletion below if we need to run pass 459 // 2. 460 scoped_ptr<HistoryURLProviderParams> params( 461 new HistoryURLProviderParams( 462 fixed_up_input, trim_http, 463 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages), 464 default_search_provider, data)); 465 // Note that we use the non-fixed-up input here, since fixup may strip 466 // trailing whitespace. 467 params->prevent_inline_autocomplete = PreventInlineAutocomplete(input); 468 469 // Pass 1: Get the in-memory URL database, and use it to find and promote 470 // the inline autocomplete match, if any. 471 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 472 // url_db can be NULL if it hasn't finished initializing (or failed to 473 // initialize). In this case all we can do is fall back on the second 474 // pass. 475 // 476 // TODO(pkasting): We should just block here until this loads. Any time 477 // someone unloads the history backend, we'll get inconsistent inline 478 // autocomplete behavior here. 479 if (url_db) { 480 DoAutocomplete(NULL, url_db, params.get()); 481 // params->matches now has the matches we should expose to the provider. 482 // Pass 2 expects a "clean slate" set of matches. 483 matches_.clear(); 484 matches_.swap(params->matches); 485 UpdateStarredStateOfMatches(); 486 } 487 488 // Pass 2: Ask the history service to call us back on the history thread, 489 // where we can read the full on-disk DB. 490 if (search_url_database_ && input.want_asynchronous_matches()) { 491 done_ = false; 492 params_ = params.release(); // This object will be destroyed in 493 // QueryComplete() once we're done with it. 494 history_service->ScheduleAutocomplete(this, params_); 495 } 496} 497 498void HistoryURLProvider::Stop(bool clear_cached_results) { 499 done_ = true; 500 501 if (params_) 502 params_->cancel_flag.Set(); 503} 504 505AutocompleteMatch HistoryURLProvider::SuggestExactInput( 506 const base::string16& text, 507 const GURL& destination_url, 508 bool trim_http) { 509 AutocompleteMatch match(this, 0, false, 510 AutocompleteMatchType::URL_WHAT_YOU_TYPED); 511 512 if (destination_url.is_valid()) { 513 match.destination_url = destination_url; 514 515 // Trim off "http://" if the user didn't type it. 516 // NOTE: We use TrimHttpPrefix() here rather than StringForURLDisplay() to 517 // strip the scheme as we need to know the offset so we can adjust the 518 // |match_location| below. StringForURLDisplay() and TrimHttpPrefix() have 519 // slightly different behavior as well (the latter will strip even without 520 // two slashes after the scheme). 521 DCHECK(!trim_http || !AutocompleteInput::HasHTTPScheme(text)); 522 base::string16 display_string( 523 StringForURLDisplay(destination_url, false, false)); 524 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; 525 match.fill_into_edit = 526 AutocompleteInput::FormattedStringWithEquivalentMeaning(destination_url, 527 display_string); 528 match.allowed_to_be_default_match = true; 529 // NOTE: Don't set match.inline_autocompletion to something non-empty here; 530 // it's surprising and annoying. 531 532 // Try to highlight "innermost" match location. If we fix up "w" into 533 // "www.w.com", we want to highlight the fifth character, not the first. 534 // This relies on match.destination_url being the non-prefix-trimmed version 535 // of match.contents. 536 match.contents = display_string; 537 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( 538 base::UTF8ToUTF16(destination_url.spec()), text); 539 // It's possible for match.destination_url to not contain the user's input 540 // at all (so |best_prefix| is NULL), for example if the input is 541 // "view-source:x" and |destination_url| has an inserted "http://" in the 542 // middle. 543 if (best_prefix == NULL) { 544 AutocompleteMatch::ClassifyMatchInString(text, match.contents, 545 ACMatchClassification::URL, 546 &match.contents_class); 547 } else { 548 AutocompleteMatch::ClassifyLocationInString( 549 best_prefix->prefix.length() - offset, text.length(), 550 match.contents.length(), ACMatchClassification::URL, 551 &match.contents_class); 552 } 553 554 match.is_history_what_you_typed_match = true; 555 } 556 557 return match; 558} 559 560// Called on the history thread. 561void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, 562 history::URLDatabase* db, 563 HistoryURLProviderParams* params) { 564 // We may get called with a NULL database if it couldn't be properly 565 // initialized. 566 if (!db) { 567 params->failed = true; 568 } else if (!params->cancel_flag.IsSet()) { 569 base::TimeTicks beginning_time = base::TimeTicks::Now(); 570 571 DoAutocomplete(backend, db, params); 572 573 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", 574 base::TimeTicks::Now() - beginning_time); 575 } 576 577 // Return the results (if any) to the main thread. 578 params->message_loop->PostTask(FROM_HERE, base::Bind( 579 &HistoryURLProvider::QueryComplete, this, params)); 580} 581 582// Used by both autocomplete passes, and therefore called on multiple different 583// threads (though not simultaneously). 584void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, 585 history::URLDatabase* db, 586 HistoryURLProviderParams* params) { 587 VisitClassifier classifier(this, params->input, db); 588 // Create a What You Typed match, which we'll need below. 589 // 590 // We display this to the user when there's a reasonable chance they actually 591 // care: 592 // * Their input can be opened as a URL, and 593 // * We parsed the input as a URL, or it starts with an explicit "http:" or 594 // "https:". 595 // that is when their input can be opened as a URL. 596 // Otherwise, this is just low-quality noise. In the cases where we've parsed 597 // as UNKNOWN, we'll still show an accidental search infobar if need be. 598 bool have_what_you_typed_match = 599 (params->input.type() != AutocompleteInput::QUERY) && 600 ((params->input.type() != AutocompleteInput::UNKNOWN) || 601 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || 602 !params->trim_http || 603 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0)); 604 AutocompleteMatch what_you_typed_match(SuggestExactInput( 605 params->input.text(), params->input.canonicalized_url(), 606 params->trim_http)); 607 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); 608 609 // Get the matching URLs from the DB 610 history::URLRows url_matches; 611 history::HistoryMatches history_matches; 612 613 if (search_url_database_) { 614 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); 615 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); 616 ++i) { 617 if (params->cancel_flag.IsSet()) 618 return; // Canceled in the middle of a query, give up. 619 // We only need kMaxMatches results in the end, but before we 620 // get there we need to promote lower-quality matches that are 621 // prefixes of higher-quality matches, and remove lower-quality 622 // redirects. So we ask for more results than we need, of every 623 // prefix type, in hopes this will give us far more than enough 624 // to work with. CullRedirects() will then reduce the list to 625 // the best kMaxMatches results. 626 db->AutocompleteForPrefix( 627 base::UTF16ToUTF8(i->prefix + params->input.text()), 628 kMaxMatches * 2, 629 (backend == NULL), 630 &url_matches); 631 for (history::URLRows::const_iterator j(url_matches.begin()); 632 j != url_matches.end(); ++j) { 633 const URLPrefix* best_prefix = 634 URLPrefix::BestURLPrefix(base::UTF8ToUTF16(j->url().spec()), 635 base::string16()); 636 DCHECK(best_prefix != NULL); 637 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), 638 i->num_components == 0, 639 i->num_components >= best_prefix->num_components)); 640 } 641 } 642 } 643 644 // Create sorted list of suggestions. 645 CullPoorMatches(*params, &history_matches); 646 SortAndDedupMatches(&history_matches); 647 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, 648 what_you_typed_match, &history_matches); 649 650 // Try to promote a match as an exact/inline autocomplete match. This also 651 // moves it to the front of |history_matches|, so skip over it when 652 // converting the rest of the matches. 653 size_t first_match = 1; 654 size_t exact_suggestion = 0; 655 // Checking |is_history_what_you_typed_match| tells us whether 656 // SuggestExactInput() succeeded in constructing a valid match. 657 if (what_you_typed_match.is_history_what_you_typed_match && 658 (!backend || !params->dont_suggest_exact_input) && 659 FixupExactSuggestion(db, params->input, classifier, &what_you_typed_match, 660 &history_matches)) { 661 // Got an exact match for the user's input. Treat it as the best match 662 // regardless of the input type. 663 exact_suggestion = 1; 664 params->matches.push_back(what_you_typed_match); 665 } else if (params->prevent_inline_autocomplete || 666 history_matches.empty() || 667 !PromoteMatchForInlineAutocomplete(history_matches.front(), params)) { 668 // Failed to promote any URLs for inline autocompletion. Use the What You 669 // Typed match, if we have it. 670 first_match = 0; 671 if (have_what_you_typed_match) 672 params->matches.push_back(what_you_typed_match); 673 } 674 675 // This is the end of the synchronous pass. 676 if (!backend) 677 return; 678 // If search_url_database_ is false, we shouldn't have scheduled a second 679 // pass. 680 DCHECK(search_url_database_); 681 682 // Determine relevancy of highest scoring match, if any. 683 int relevance = -1; 684 for (ACMatches::const_iterator it = params->matches.begin(); 685 it != params->matches.end(); ++it) { 686 relevance = std::max(relevance, it->relevance); 687 } 688 689 if (cull_redirects_) { 690 // Remove redirects and trim list to size. We want to provide up to 691 // kMaxMatches results plus the What You Typed result, if it was added to 692 // |history_matches| above. 693 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); 694 } else { 695 // Simply trim the list to size. 696 if (history_matches.size() > kMaxMatches + exact_suggestion) 697 history_matches.resize(kMaxMatches + exact_suggestion); 698 } 699 700 // Convert the history matches to autocomplete matches. 701 for (size_t i = first_match; i < history_matches.size(); ++i) { 702 const history::HistoryMatch& match = history_matches[i]; 703 DCHECK(!have_what_you_typed_match || 704 (match.url_info.url() != 705 GURL(params->matches.front().destination_url))); 706 // If we've assigned a score already, all later matches score one 707 // less than the previous match. 708 relevance = (relevance > 0) ? (relevance - 1) : 709 CalculateRelevance(NORMAL, history_matches.size() - 1 - i); 710 AutocompleteMatch ac_match = HistoryMatchToACMatch(*params, match, 711 NORMAL, relevance); 712 // The experimental scoring must not change the top result's score. 713 if (!params->matches.empty()) { 714 relevance = CalculateRelevanceScoreUsingScoringParams(match, relevance); 715 ac_match.relevance = relevance; 716 } 717 params->matches.push_back(ac_match); 718 } 719} 720 721// Called on the main thread when the query is complete. 722void HistoryURLProvider::QueryComplete( 723 HistoryURLProviderParams* params_gets_deleted) { 724 // Ensure |params_gets_deleted| gets deleted on exit. 725 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); 726 727 // If the user hasn't already started another query, clear our member pointer 728 // so we can't write into deleted memory. 729 if (params_ == params_gets_deleted) 730 params_ = NULL; 731 732 // Don't send responses for queries that have been canceled. 733 if (params->cancel_flag.IsSet()) 734 return; // Already set done_ when we canceled, no need to set it again. 735 736 // Don't modify |matches_| if the query failed, since it might have a default 737 // match in it, whereas |params->matches| will be empty. 738 if (!params->failed) { 739 matches_.swap(params->matches); 740 UpdateStarredStateOfMatches(); 741 } 742 743 done_ = true; 744 listener_->OnProviderUpdate(true); 745} 746 747HistoryURLProvider::~HistoryURLProvider() { 748 // Note: This object can get leaked on shutdown if there are pending 749 // requests on the database (which hold a reference to us). Normally, these 750 // messages get flushed for each thread. We do a round trip from main, to 751 // history, back to main while holding a reference. If the main thread 752 // completes before the history thread, the message to delegate back to the 753 // main thread will not run and the reference will leak. Therefore, don't do 754 // anything on destruction. 755} 756 757int HistoryURLProvider::CalculateRelevance(MatchType match_type, 758 size_t match_number) const { 759 switch (match_type) { 760 case INLINE_AUTOCOMPLETE: 761 return kScoreForBestInlineableResult; 762 763 case UNVISITED_INTRANET: 764 return kScoreForUnvisitedIntranetResult; 765 766 case WHAT_YOU_TYPED: 767 return kScoreForWhatYouTypedResult; 768 769 default: // NORMAL 770 return kBaseScoreForNonInlineableResult + 771 static_cast<int>(match_number); 772 } 773} 774 775bool HistoryURLProvider::FixupExactSuggestion( 776 history::URLDatabase* db, 777 const AutocompleteInput& input, 778 const VisitClassifier& classifier, 779 AutocompleteMatch* match, 780 history::HistoryMatches* matches) const { 781 DCHECK(match != NULL); 782 DCHECK(matches != NULL); 783 784 MatchType type = INLINE_AUTOCOMPLETE; 785 switch (classifier.type()) { 786 case VisitClassifier::INVALID: 787 return false; 788 case VisitClassifier::UNVISITED_INTRANET: 789 type = UNVISITED_INTRANET; 790 break; 791 default: 792 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); 793 // We have data for this match, use it. 794 match->deletable = true; 795 match->description = classifier.url_row().title(); 796 RecordAdditionalInfoFromUrlRow(classifier.url_row(), match); 797 match->description_class = 798 ClassifyDescription(input.text(), match->description); 799 if (!classifier.url_row().typed_count()) { 800 // If we reach here, we must be in the second pass, and we must not have 801 // this row's data available during the first pass. That means we 802 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to 803 // maintain the ordering between passes consistent, we need to score it 804 // the same way here. 805 type = CanFindIntranetURL(db, input) ? 806 UNVISITED_INTRANET : WHAT_YOU_TYPED; 807 } 808 break; 809 } 810 811 const GURL& url = match->destination_url; 812 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec(); 813 // If the what-you-typed result looks like a single word (which can be 814 // interpreted as an intranet address) followed by a pound sign ("#"), 815 // leave the score for the url-what-you-typed result as is. It will be 816 // outscored by a search query from the SearchProvider. This test fixes 817 // cases such as "c#" and "c# foo" where the user has visited an intranet 818 // site "c". We want the search-what-you-typed score to beat the 819 // URL-what-you-typed score in this case. Most of the below test tries to 820 // make sure that this code does not trigger if the user did anything to 821 // indicate the desired match is a URL. For instance, "c/# foo" will not 822 // pass the test because that will be classified as input type URL. The 823 // parsed.CountCharactersBefore() in the test looks for the presence of a 824 // reference fragment in the URL by checking whether the position differs 825 // included the delimiter (pound sign) versus not including the delimiter. 826 // (One cannot simply check url.ref() because it will not distinguish 827 // between the input "c" and the input "c#", both of which will have empty 828 // reference fragments.) 829 if ((type == UNVISITED_INTRANET) && 830 (input.type() != AutocompleteInput::URL) && url.username().empty() && 831 url.password().empty() && url.port().empty() && (url.path() == "/") && 832 url.query().empty() && 833 (parsed.CountCharactersBefore(url::Parsed::REF, true) != 834 parsed.CountCharactersBefore(url::Parsed::REF, false))) { 835 return false; 836 } 837 838 match->relevance = CalculateRelevance(type, 0); 839 840 // If there are any other matches, then don't promote this match here, in 841 // hopes the caller will be able to inline autocomplete a better suggestion. 842 // DoAutocomplete() will fall back on this match if inline autocompletion 843 // fails. This matches how we react to never-visited URL inputs in the non- 844 // intranet case. 845 if (type == UNVISITED_INTRANET && !matches->empty()) 846 return false; 847 848 // Put it on the front of the HistoryMatches for redirect culling. 849 CreateOrPromoteMatch(classifier.url_row(), base::string16::npos, false, 850 matches, true, true); 851 return true; 852} 853 854bool HistoryURLProvider::CanFindIntranetURL( 855 history::URLDatabase* db, 856 const AutocompleteInput& input) const { 857 // Normally passing the first two conditions below ought to guarantee the 858 // third condition, but because FixupUserInput() can run and modify the 859 // input's text and parts between Parse() and here, it seems better to be 860 // paranoid and check. 861 if ((input.type() != AutocompleteInput::UNKNOWN) || 862 !LowerCaseEqualsASCII(input.scheme(), url::kHttpScheme) || 863 !input.parts().host.is_nonempty()) 864 return false; 865 const std::string host(base::UTF16ToUTF8( 866 input.text().substr(input.parts().host.begin, input.parts().host.len))); 867 const size_t registry_length = 868 net::registry_controlled_domains::GetRegistryLength( 869 host, 870 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 871 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 872 return registry_length == 0 && db->IsTypedHost(host); 873} 874 875bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( 876 const history::HistoryMatch& match, 877 HistoryURLProviderParams* params) { 878 // Promote the first match if it's been marked for promotion or typed at least 879 // n times, where n == 1 for "simple" (host-only) URLs and n == 2 for others. 880 // We set a higher bar for these long URLs because it's less likely that users 881 // will want to visit them again. Even though we don't increment the 882 // typed_count for pasted-in URLs, if the user manually edits the URL or types 883 // some long thing in by hand, we wouldn't want to immediately start 884 // autocompleting it. 885 if (!match.promoted && 886 (!match.url_info.typed_count() || 887 ((match.url_info.typed_count() == 1) && 888 !match.IsHostOnly()))) 889 return false; 890 891 // In the case where the user has typed "foo.com" and visited (but not typed) 892 // "foo/", and the input is "foo", we can reach here for "foo.com" during the 893 // first pass but have the second pass suggest the exact input as a better 894 // URL. Since we need both passes to agree, and since during the first pass 895 // there's no way to know about "foo/", make reaching this point prevent any 896 // future pass from suggesting the exact input as a better match. 897 if (params) { 898 params->dont_suggest_exact_input = true; 899 AutocompleteMatch ac_match = HistoryMatchToACMatch( 900 *params, match, INLINE_AUTOCOMPLETE, 901 CalculateRelevance(INLINE_AUTOCOMPLETE, 0)); 902 params->matches.push_back(ac_match); 903 } 904 return true; 905} 906 907// See if a shorter version of the best match should be created, and if so place 908// it at the front of |matches|. This can suggest history URLs that are 909// prefixes of the best match (if they've been visited enough, compared to the 910// best match), or create host-only suggestions even when they haven't been 911// visited before: if the user visited http://example.com/asdf once, we'll 912// suggest http://example.com/ even if they've never been to it. 913void HistoryURLProvider::PromoteOrCreateShorterSuggestion( 914 history::URLDatabase* db, 915 const HistoryURLProviderParams& params, 916 bool have_what_you_typed_match, 917 const AutocompleteMatch& what_you_typed_match, 918 history::HistoryMatches* matches) { 919 if (matches->empty()) 920 return; // No matches, nothing to do. 921 922 // Determine the base URL from which to search, and whether that URL could 923 // itself be added as a match. We can add the base iff it's not "effectively 924 // the same" as any "what you typed" match. 925 const history::HistoryMatch& match = matches->front(); 926 GURL search_base = ConvertToHostOnly(match, params.input.text()); 927 bool can_add_search_base_to_matches = !have_what_you_typed_match; 928 if (search_base.is_empty()) { 929 // Search from what the user typed when we couldn't reduce the best match 930 // to a host. Careful: use a substring of |match| here, rather than the 931 // first match in |params|, because they might have different prefixes. If 932 // the user typed "google.com", |what_you_typed_match| will hold 933 // "http://google.com/", but |match| might begin with 934 // "http://www.google.com/". 935 // TODO: this should be cleaned up, and is probably incorrect for IDN. 936 std::string new_match = match.url_info.url().possibly_invalid_spec(). 937 substr(0, match.input_location + params.input.text().length()); 938 search_base = GURL(new_match); 939 // TODO(mrossetti): There is a degenerate case where the following may 940 // cause a failure: http://www/~someword/fubar.html. Diagnose. 941 // See: http://crbug.com/50101 942 if (search_base.is_empty()) 943 return; // Can't construct a valid URL from which to start a search. 944 } else if (!can_add_search_base_to_matches) { 945 can_add_search_base_to_matches = 946 (search_base != what_you_typed_match.destination_url); 947 } 948 if (search_base == match.url_info.url()) 949 return; // Couldn't shorten |match|, so no range of URLs to search over. 950 951 // Search the DB for short URLs between our base and |match|. 952 history::URLRow info(search_base); 953 bool promote = true; 954 // A short URL is only worth suggesting if it's been visited at least a third 955 // as often as the longer URL. 956 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; 957 // For stability between the in-memory and on-disk autocomplete passes, when 958 // the long URL has been typed before, only suggest shorter URLs that have 959 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL 960 // (which hasn't been typed) that the in-memory pass doesn't know about, 961 // thereby making the top match, and thus the behavior of inline 962 // autocomplete, unstable. 963 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; 964 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), 965 match.url_info.url().possibly_invalid_spec(), min_visit_count, 966 min_typed_count, can_add_search_base_to_matches, &info)) { 967 if (!can_add_search_base_to_matches) 968 return; // Couldn't find anything and can't add the search base, bail. 969 970 // Try to get info on the search base itself. Promote it to the top if the 971 // original best match isn't good enough to autocomplete. 972 db->GetRowForURL(search_base, &info); 973 promote = match.url_info.typed_count() <= 1; 974 } 975 976 // Promote or add the desired URL to the list of matches. 977 bool ensure_can_inline = 978 promote && PromoteMatchForInlineAutocomplete(match, NULL); 979 ensure_can_inline &= CreateOrPromoteMatch(info, match.input_location, 980 match.match_in_scheme, matches, create_shorter_match_, promote); 981 if (ensure_can_inline) 982 matches->front().promoted = true; 983} 984 985void HistoryURLProvider::CullPoorMatches( 986 const HistoryURLProviderParams& params, 987 history::HistoryMatches* matches) const { 988 const base::Time& threshold(history::AutocompleteAgeThreshold()); 989 for (history::HistoryMatches::iterator i(matches->begin()); 990 i != matches->end(); ) { 991 if (RowQualifiesAsSignificant(i->url_info, threshold) && 992 !(params.default_search_provider && 993 params.default_search_provider->IsSearchURLUsingTermsData( 994 i->url_info.url(), *params.search_terms_data.get()))) { 995 ++i; 996 } else { 997 i = matches->erase(i); 998 } 999 } 1000} 1001 1002void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, 1003 history::HistoryMatches* matches, 1004 size_t max_results) const { 1005 for (size_t source = 0; 1006 (source < matches->size()) && (source < max_results); ) { 1007 const GURL& url = (*matches)[source].url_info.url(); 1008 // TODO(brettw) this should go away when everything uses GURL. 1009 history::RedirectList redirects; 1010 backend->GetMostRecentRedirectsFrom(url, &redirects); 1011 if (!redirects.empty()) { 1012 // Remove all but the first occurrence of any of these redirects in the 1013 // search results. We also must add the URL we queried for, since it may 1014 // not be the first match and we'd want to remove it. 1015 // 1016 // For example, when A redirects to B and our matches are [A, X, B], 1017 // we'll get B as the redirects from, and we want to remove the second 1018 // item of that pair, removing B. If A redirects to B and our matches are 1019 // [B, X, A], we'll want to remove A instead. 1020 redirects.push_back(url); 1021 source = RemoveSubsequentMatchesOf(matches, source, redirects); 1022 } else { 1023 // Advance to next item. 1024 source++; 1025 } 1026 } 1027 1028 if (matches->size() > max_results) 1029 matches->resize(max_results); 1030} 1031 1032size_t HistoryURLProvider::RemoveSubsequentMatchesOf( 1033 history::HistoryMatches* matches, 1034 size_t source_index, 1035 const std::vector<GURL>& remove) const { 1036 size_t next_index = source_index + 1; // return value = item after source 1037 1038 // Find the first occurrence of any URL in the redirect chain. We want to 1039 // keep this one since it is rated the highest. 1040 history::HistoryMatches::iterator first(std::find_first_of( 1041 matches->begin(), matches->end(), remove.begin(), remove.end(), 1042 history::HistoryMatch::EqualsGURL)); 1043 DCHECK(first != matches->end()) << "We should have always found at least the " 1044 "original URL."; 1045 1046 // Find any following occurrences of any URL in the redirect chain, these 1047 // should be deleted. 1048 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, 1049 matches->end(), remove.begin(), remove.end(), 1050 history::HistoryMatch::EqualsGURL)); 1051 next != matches->end(); next = std::find_first_of(next, matches->end(), 1052 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { 1053 // Remove this item. When we remove an item before the source index, we 1054 // need to shift it to the right and remember that so we can return it. 1055 next = matches->erase(next); 1056 if (static_cast<size_t>(next - matches->begin()) < next_index) 1057 --next_index; 1058 } 1059 return next_index; 1060} 1061 1062AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( 1063 const HistoryURLProviderParams& params, 1064 const history::HistoryMatch& history_match, 1065 MatchType match_type, 1066 int relevance) { 1067 const history::URLRow& info = history_match.url_info; 1068 AutocompleteMatch match(this, relevance, 1069 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); 1070 match.typed_count = info.typed_count(); 1071 match.destination_url = info.url(); 1072 DCHECK(match.destination_url.is_valid()); 1073 size_t inline_autocomplete_offset = 1074 history_match.input_location + params.input.text().length(); 1075 std::string languages = (match_type == WHAT_YOU_TYPED) ? 1076 std::string() : params.languages; 1077 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 1078 ~((params.trim_http && !history_match.match_in_scheme) ? 1079 0 : net::kFormatUrlOmitHTTP); 1080 match.fill_into_edit = 1081 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 1082 net::FormatUrl(info.url(), languages, format_types, 1083 net::UnescapeRule::SPACES, NULL, NULL, 1084 &inline_autocomplete_offset)); 1085 if (!params.prevent_inline_autocomplete && 1086 (inline_autocomplete_offset != base::string16::npos)) { 1087 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length()); 1088 match.inline_autocompletion = 1089 match.fill_into_edit.substr(inline_autocomplete_offset); 1090 } 1091 // The latter part of the test effectively asks "is the inline completion 1092 // empty?" (i.e., is this match effectively the what-you-typed match?). 1093 match.allowed_to_be_default_match = !params.prevent_inline_autocomplete || 1094 ((inline_autocomplete_offset != base::string16::npos) && 1095 (inline_autocomplete_offset >= match.fill_into_edit.length())); 1096 1097 size_t match_start = history_match.input_location; 1098 match.contents = net::FormatUrl(info.url(), languages, 1099 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); 1100 if ((match_start != base::string16::npos) && 1101 (inline_autocomplete_offset != base::string16::npos) && 1102 (inline_autocomplete_offset != match_start)) { 1103 DCHECK(inline_autocomplete_offset > match_start); 1104 AutocompleteMatch::ClassifyLocationInString(match_start, 1105 inline_autocomplete_offset - match_start, match.contents.length(), 1106 ACMatchClassification::URL, &match.contents_class); 1107 } else { 1108 AutocompleteMatch::ClassifyLocationInString(base::string16::npos, 0, 1109 match.contents.length(), ACMatchClassification::URL, 1110 &match.contents_class); 1111 } 1112 match.description = info.title(); 1113 match.description_class = 1114 ClassifyDescription(params.input.text(), match.description); 1115 RecordAdditionalInfoFromUrlRow(info, &match); 1116 return match; 1117} 1118 1119int HistoryURLProvider::CalculateRelevanceScoreUsingScoringParams( 1120 const history::HistoryMatch& match, 1121 int old_relevance) const { 1122 if (!scoring_params_.experimental_scoring_enabled) 1123 return old_relevance; 1124 1125 const base::TimeDelta time_since_last_visit = 1126 base::Time::Now() - match.url_info.last_visit(); 1127 1128 int relevance = CalculateRelevanceUsingScoreBuckets( 1129 scoring_params_.typed_count_buckets, time_since_last_visit, old_relevance, 1130 match.url_info.typed_count()); 1131 1132 // Additional demotion (on top of typed_count demotion) of URLs that were 1133 // never typed. 1134 if (match.url_info.typed_count() == 0) { 1135 relevance = CalculateRelevanceUsingScoreBuckets( 1136 scoring_params_.visited_count_buckets, time_since_last_visit, relevance, 1137 match.url_info.visit_count()); 1138 } 1139 1140 DCHECK_LE(relevance, old_relevance); 1141 return relevance; 1142} 1143 1144// static 1145ACMatchClassifications HistoryURLProvider::ClassifyDescription( 1146 const base::string16& input_text, 1147 const base::string16& description) { 1148 base::string16 clean_description = bookmark_utils::CleanUpTitleForMatching( 1149 description); 1150 history::TermMatches description_matches(SortAndDeoverlapMatches( 1151 history::MatchTermInString(input_text, clean_description, 0))); 1152 history::WordStarts description_word_starts; 1153 history::String16VectorFromString16( 1154 clean_description, false, &description_word_starts); 1155 // If HistoryURL retrieves any matches (and hence we reach this code), we 1156 // are guaranteed that the beginning of input_text must be a word break. 1157 history::WordStarts offsets(1, 0u); 1158 description_matches = 1159 history::ScoredHistoryMatch::FilterTermMatchesByWordStarts( 1160 description_matches, offsets, description_word_starts, 0, 1161 std::string::npos); 1162 return SpansFromTermMatch( 1163 description_matches, clean_description.length(), false); 1164} 1165