history_quick_provider.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_quick_provider.h" 6 7#include <vector> 8 9#include "base/basictypes.h" 10#include "base/command_line.h" 11#include "base/i18n/break_iterator.h" 12#include "base/logging.h" 13#include "base/metrics/field_trial.h" 14#include "base/metrics/histogram.h" 15#include "base/prefs/pref_service.h" 16#include "base/strings/string_number_conversions.h" 17#include "base/strings/string_util.h" 18#include "base/strings/utf_string_conversions.h" 19#include "base/time/time.h" 20#include "chrome/browser/autocomplete/autocomplete_result.h" 21#include "chrome/browser/autocomplete/history_url_provider.h" 22#include "chrome/browser/history/history_database.h" 23#include "chrome/browser/history/history_service.h" 24#include "chrome/browser/history/history_service_factory.h" 25#include "chrome/browser/history/in_memory_url_index.h" 26#include "chrome/browser/history/in_memory_url_index_types.h" 27#include "chrome/browser/history/scored_history_match.h" 28#include "chrome/browser/net/url_fixer_upper.h" 29#include "chrome/browser/omnibox/omnibox_field_trial.h" 30#include "chrome/browser/profiles/profile.h" 31#include "chrome/browser/search/search.h" 32#include "chrome/browser/search_engines/template_url.h" 33#include "chrome/browser/search_engines/template_url_service.h" 34#include "chrome/browser/search_engines/template_url_service_factory.h" 35#include "chrome/common/autocomplete_match_type.h" 36#include "chrome/common/chrome_switches.h" 37#include "chrome/common/pref_names.h" 38#include "chrome/common/url_constants.h" 39#include "content/public/browser/browser_thread.h" 40#include "content/public/browser/notification_source.h" 41#include "content/public/browser/notification_types.h" 42#include "net/base/escape.h" 43#include "net/base/net_util.h" 44#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 45#include "url/url_parse.h" 46#include "url/url_util.h" 47 48using history::InMemoryURLIndex; 49using history::ScoredHistoryMatch; 50using history::ScoredHistoryMatches; 51 52bool HistoryQuickProvider::disabled_ = false; 53 54HistoryQuickProvider::HistoryQuickProvider( 55 AutocompleteProviderListener* listener, 56 Profile* profile) 57 : HistoryProvider(listener, profile, 58 AutocompleteProvider::TYPE_HISTORY_QUICK), 59 languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)), 60 reorder_for_inlining_(false) { 61 enum InliningOption { 62 INLINING_PROHIBITED = 0, 63 INLINING_ALLOWED = 1, 64 INLINING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, 65 INLINING_FIELD_TRIAL_DEFAULT_GROUP = 3, 66 INLINING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, 67 NUM_OPTIONS = 5 68 }; 69 // should always be overwritten 70 InliningOption inlining_option = NUM_OPTIONS; 71 72 const std::string switch_value = CommandLine::ForCurrentProcess()-> 73 GetSwitchValueASCII(switches::kOmniboxInlineHistoryQuickProvider); 74 if (switch_value == switches::kOmniboxInlineHistoryQuickProviderAllowed) { 75 inlining_option = INLINING_ALLOWED; 76 always_prevent_inline_autocomplete_ = false; 77 } else if (switch_value == 78 switches::kOmniboxInlineHistoryQuickProviderProhibited) { 79 inlining_option = INLINING_PROHIBITED; 80 always_prevent_inline_autocomplete_ = true; 81 } else { 82 // We'll assume any other flag means automatic. 83 // Automatic means eligible for the field trial. 84 85 // For the field trial stuff to work correctly, we must be running 86 // on the same thread as the thread that created the field trial, 87 // which happens via a call to OmniboxFieldTrial::Active in 88 // chrome_browser_main.cc on the main thread. Let's check this to 89 // be sure. We check "if we've heard of the UI thread then we'd better 90 // be on it." The first part is necessary so unit tests pass. (Many 91 // unit tests don't set up the threading naming system; hence 92 // CurrentlyOn(UI thread) will fail.) 93 DCHECK(!content::BrowserThread::IsWellKnownThread( 94 content::BrowserThread::UI) || 95 content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 96 if (OmniboxFieldTrial::InDisallowInlineHQPFieldTrial()) { 97 if (OmniboxFieldTrial::InDisallowInlineHQPFieldTrialExperimentGroup()) { 98 always_prevent_inline_autocomplete_ = true; 99 inlining_option = INLINING_FIELD_TRIAL_EXPERIMENT_GROUP; 100 } else { 101 always_prevent_inline_autocomplete_ = false; 102 inlining_option = INLINING_FIELD_TRIAL_DEFAULT_GROUP; 103 } 104 } else { 105 always_prevent_inline_autocomplete_ = false; 106 inlining_option = INLINING_AUTO_BUT_NOT_IN_FIELD_TRIAL; 107 } 108 } 109 110 // Add a beacon to the logs that'll allow us to identify later what 111 // inlining state a user is in. Do this by incrementing a bucket in 112 // a histogram, where the bucket represents the user's inlining state. 113 UMA_HISTOGRAM_ENUMERATION( 114 "Omnibox.InlineHistoryQuickProviderFieldTrialBeacon", 115 inlining_option, NUM_OPTIONS); 116 117 reorder_for_inlining_ = CommandLine::ForCurrentProcess()-> 118 GetSwitchValueASCII(switches:: 119 kOmniboxHistoryQuickProviderReorderForInlining) == 120 switches::kOmniboxHistoryQuickProviderReorderForInliningEnabled; 121} 122 123void HistoryQuickProvider::Start(const AutocompleteInput& input, 124 bool minimal_changes) { 125 matches_.clear(); 126 if (disabled_) 127 return; 128 129 // Don't bother with INVALID and FORCED_QUERY. Also pass when looking for 130 // BEST_MATCH and there is no inline autocompletion because none of the HQP 131 // matches can score highly enough to qualify. 132 if ((input.type() == AutocompleteInput::INVALID) || 133 (input.type() == AutocompleteInput::FORCED_QUERY) || 134 (input.matches_requested() == AutocompleteInput::BEST_MATCH && 135 input.prevent_inline_autocomplete())) 136 return; 137 138 autocomplete_input_ = input; 139 140 // TODO(pkasting): We should just block here until this loads. Any time 141 // someone unloads the history backend, we'll get inconsistent inline 142 // autocomplete behavior here. 143 if (GetIndex()) { 144 base::TimeTicks start_time = base::TimeTicks::Now(); 145 DoAutocomplete(); 146 if (input.text().length() < 6) { 147 base::TimeTicks end_time = base::TimeTicks::Now(); 148 std::string name = "HistoryQuickProvider.QueryIndexTime." + 149 base::IntToString(input.text().length()); 150 base::HistogramBase* counter = base::Histogram::FactoryGet( 151 name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag); 152 counter->Add(static_cast<int>((end_time - start_time).InMilliseconds())); 153 } 154 UpdateStarredStateOfMatches(); 155 } 156} 157 158void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) { 159 DCHECK(match.deletable); 160 DCHECK(match.destination_url.is_valid()); 161 // Delete the match from the InMemoryURLIndex. 162 GetIndex()->DeleteURL(match.destination_url); 163 DeleteMatchFromMatches(match); 164} 165 166HistoryQuickProvider::~HistoryQuickProvider() {} 167 168void HistoryQuickProvider::DoAutocomplete() { 169 // Get the matching URLs from the DB. 170 ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms( 171 autocomplete_input_.text(), 172 autocomplete_input_.cursor_position()); 173 if (matches.empty()) 174 return; 175 176 // If we're allowed to reorder results in order to get an inlineable 177 // result to appear first (and hence have a HistoryQuickProvider 178 // suggestion possibly appear first), find the first inlineable 179 // result and then swap it to the front. Obviously, don't do this 180 // if we're told to prevent inline autocompletion. (If we're told 181 // we're going to prevent inline autocompletion, we're going to 182 // later demote the score of all results so none will be inlined. 183 // Hence there's no need to reorder the results so an inlineable one 184 // appears first.) 185 if (reorder_for_inlining_ && 186 !PreventInlineAutocomplete(autocomplete_input_)) { 187 for (ScoredHistoryMatches::iterator i(matches.begin()); 188 (i != matches.end()) && 189 (i->raw_score >= AutocompleteResult::kLowestDefaultScore); 190 ++i) { 191 if (i->can_inline) { // this test is only true once because of the break 192 if (i != matches.begin()) 193 std::rotate(matches.begin(), i, i + 1); 194 break; 195 } 196 } 197 } 198 199 // Figure out if HistoryURL provider has a URL-what-you-typed match 200 // that ought to go first and what its score will be. 201 bool will_have_url_what_you_typed_match_first = false; 202 int url_what_you_typed_match_score = -1; // undefined 203 // These are necessary (but not sufficient) conditions for the omnibox 204 // input to be a URL-what-you-typed match. The username test checks that 205 // either the username does not exist (a regular URL such as http://site/) 206 // or, if the username exists (http://user@site/), there must be either 207 // a password or a port. Together these exclude pure username@site 208 // inputs because these are likely to be an e-mail address. HistoryURL 209 // provider won't promote the URL-what-you-typed match to first 210 // for these inputs. 211 const bool can_have_url_what_you_typed_match_first = 212 autocomplete_input_.canonicalized_url().is_valid() && 213 (autocomplete_input_.type() != AutocompleteInput::QUERY) && 214 (autocomplete_input_.type() != AutocompleteInput::FORCED_QUERY) && 215 (!autocomplete_input_.parts().username.is_nonempty() || 216 autocomplete_input_.parts().password.is_nonempty() || 217 autocomplete_input_.parts().path.is_nonempty()); 218 if (can_have_url_what_you_typed_match_first) { 219 HistoryService* const history_service = 220 HistoryServiceFactory::GetForProfile(profile_, 221 Profile::EXPLICIT_ACCESS); 222 // We expect HistoryService to be available. In case it's not, 223 // (e.g., due to Profile corruption) we let HistoryQuick provider 224 // completions (which may be available because it's a different 225 // data structure) compete with the URL-what-you-typed match as 226 // normal. 227 if (history_service) { 228 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 229 // url_db can be NULL if it hasn't finished initializing (or 230 // failed to to initialize). In this case, we let HistoryQuick 231 // provider completions compete with the URL-what-you-typed 232 // match as normal. 233 if (url_db) { 234 const std::string host(UTF16ToUTF8(autocomplete_input_.text().substr( 235 autocomplete_input_.parts().host.begin, 236 autocomplete_input_.parts().host.len))); 237 // We want to put the URL-what-you-typed match first if either 238 // * the user visited the URL before (intranet or internet). 239 // * it's a URL on a host that user visited before and this 240 // is the root path of the host. (If the user types some 241 // of a path--more than a simple "/"--we let autocomplete compete 242 // normally with the URL-what-you-typed match.) 243 // TODO(mpearson): Remove this hacky code and simply score URL-what- 244 // you-typed in some sane way relative to possible completions: 245 // URL-what-you-typed should get some sort of a boost relative 246 // to completions, but completions should naturally win if 247 // they're a lot more popular. In this process, if the input 248 // is a bare intranet hostname that has been visited before, we 249 // may want to enforce that the only completions that can outscore 250 // the URL-what-you-typed match are on the same host (i.e., aren't 251 // from a longer internet hostname for which the omnibox input is 252 // a prefix). 253 if (url_db->GetRowForURL( 254 autocomplete_input_.canonicalized_url(), NULL) != 0) { 255 // We visited this URL before. 256 will_have_url_what_you_typed_match_first = true; 257 // HistoryURLProvider gives visited what-you-typed URLs a high score. 258 url_what_you_typed_match_score = 259 HistoryURLProvider::kScoreForBestInlineableResult; 260 } else if (url_db->IsTypedHost(host) && 261 (!autocomplete_input_.parts().path.is_nonempty() || 262 ((autocomplete_input_.parts().path.len == 1) && 263 (autocomplete_input_.text()[ 264 autocomplete_input_.parts().path.begin] == '/'))) && 265 !autocomplete_input_.parts().query.is_nonempty() && 266 !autocomplete_input_.parts().ref.is_nonempty()) { 267 // Not visited, but we've seen the host before. 268 will_have_url_what_you_typed_match_first = true; 269 const size_t registry_length = 270 net::registry_controlled_domains::GetRegistryLength( 271 host, 272 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 273 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 274 if (registry_length == 0) { 275 // Known intranet hosts get one score. 276 url_what_you_typed_match_score = 277 HistoryURLProvider::kScoreForUnvisitedIntranetResult; 278 } else { 279 // Known internet hosts get another. 280 url_what_you_typed_match_score = 281 HistoryURLProvider::kScoreForWhatYouTypedResult; 282 } 283 } 284 } 285 } 286 } 287 288 // Loop over every result and add it to matches_. In the process, 289 // guarantee that scores are decreasing. |max_match_score| keeps 290 // track of the highest score we can assign to any later results we 291 // see. Also, if we're not allowing inline autocompletions in 292 // general or the current best suggestion isn't inlineable, 293 // artificially reduce the starting |max_match_score| (which 294 // therefore applies to all results) to something low enough that 295 // guarantees no result will be offered as an autocomplete 296 // suggestion. Also do a similar reduction if we think there will be 297 // a URL-what-you-typed match. (We want URL-what-you-typed matches for 298 // visited URLs to beat out any longer URLs, no matter how frequently 299 // they're visited.) The strength of this last reduction depends on the 300 // likely score for the URL-what-you-typed result. 301 302 // |template_url_service| or |template_url| can be NULL in unit tests. 303 TemplateURLService* template_url_service = 304 TemplateURLServiceFactory::GetForProfile(profile_); 305 TemplateURL* template_url = template_url_service ? 306 template_url_service->GetDefaultSearchProvider() : NULL; 307 int max_match_score = (PreventInlineAutocomplete(autocomplete_input_) || 308 !matches.begin()->can_inline) ? 309 (AutocompleteResult::kLowestDefaultScore - 1) : 310 matches.begin()->raw_score; 311 if (will_have_url_what_you_typed_match_first) { 312 max_match_score = std::min(max_match_score, 313 url_what_you_typed_match_score - 1); 314 } 315 for (ScoredHistoryMatches::const_iterator match_iter = matches.begin(); 316 match_iter != matches.end(); ++match_iter) { 317 const ScoredHistoryMatch& history_match(*match_iter); 318 // Culls results corresponding to queries from the default search engine. 319 // These are low-quality, difficult-to-understand matches for users, and the 320 // SearchProvider should surface past queries in a better way anyway. 321 if (!template_url || 322 !template_url->IsSearchURL(history_match.url_info.url())) { 323 // Set max_match_score to the score we'll assign this result: 324 max_match_score = std::min(max_match_score, history_match.raw_score); 325 matches_.push_back(QuickMatchToACMatch(history_match, max_match_score)); 326 // Mark this max_match_score as being used: 327 max_match_score--; 328 } 329 } 330} 331 332AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch( 333 const ScoredHistoryMatch& history_match, 334 int score) { 335 const history::URLRow& info = history_match.url_info; 336 AutocompleteMatch match(this, score, !!info.visit_count(), 337 history_match.url_matches.empty() ? AutocompleteMatchType::HISTORY_TITLE : 338 AutocompleteMatchType::HISTORY_URL); 339 match.typed_count = info.typed_count(); 340 match.destination_url = info.url(); 341 DCHECK(match.destination_url.is_valid()); 342 343 // Format the URL autocomplete presentation. 344 std::vector<size_t> offsets = 345 OffsetsFromTermMatches(history_match.url_matches); 346 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 347 ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP); 348 match.fill_into_edit = 349 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 350 net::FormatUrlWithOffsets(info.url(), languages_, format_types, 351 net::UnescapeRule::SPACES, NULL, NULL, &offsets)); 352 history::TermMatches new_matches = 353 ReplaceOffsetsInTermMatches(history_match.url_matches, offsets); 354 match.contents = net::FormatUrl(info.url(), languages_, format_types, 355 net::UnescapeRule::SPACES, NULL, NULL, NULL); 356 match.contents_class = 357 SpansFromTermMatch(new_matches, match.contents.length(), true); 358 359 if (!history_match.can_inline) { 360 match.inline_autocomplete_offset = string16::npos; 361 } else { 362 DCHECK(!new_matches.empty()); 363 match.inline_autocomplete_offset = new_matches[0].offset + 364 new_matches[0].length; 365 // The following will happen if the user has typed an URL with a scheme 366 // and the last character typed is a slash because that slash is removed 367 // by the FormatURLWithOffsets call above. 368 if (match.inline_autocomplete_offset > match.fill_into_edit.length()) 369 match.inline_autocomplete_offset = match.fill_into_edit.length(); 370 } 371 372 // Format the description autocomplete presentation. 373 match.description = info.title(); 374 match.description_class = SpansFromTermMatch( 375 history_match.title_matches, match.description.length(), false); 376 377 match.RecordAdditionalInfo("typed count", info.typed_count()); 378 match.RecordAdditionalInfo("visit count", info.visit_count()); 379 match.RecordAdditionalInfo("last visit", info.last_visit()); 380 381 return match; 382} 383 384history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() { 385 if (index_for_testing_.get()) 386 return index_for_testing_.get(); 387 388 HistoryService* const history_service = 389 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 390 if (!history_service) 391 return NULL; 392 393 return history_service->InMemoryIndex(); 394} 395 396// static 397ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch( 398 const history::TermMatches& matches, 399 size_t text_length, 400 bool is_url) { 401 ACMatchClassification::Style url_style = 402 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE; 403 ACMatchClassifications spans; 404 if (matches.empty()) { 405 if (text_length) 406 spans.push_back(ACMatchClassification(0, url_style)); 407 return spans; 408 } 409 if (matches[0].offset) 410 spans.push_back(ACMatchClassification(0, url_style)); 411 size_t match_count = matches.size(); 412 for (size_t i = 0; i < match_count;) { 413 size_t offset = matches[i].offset; 414 spans.push_back(ACMatchClassification(offset, 415 ACMatchClassification::MATCH | url_style)); 416 // Skip all adjacent matches. 417 do { 418 offset += matches[i].length; 419 ++i; 420 } while ((i < match_count) && (offset == matches[i].offset)); 421 if (offset < text_length) 422 spans.push_back(ACMatchClassification(offset, url_style)); 423 } 424 425 return spans; 426} 427