history_quick_provider.cc revision 0529e5d033099cbfc42635f6f6183833b09dff6e
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/history_quick_provider.h" 6 7#include <vector> 8 9#include "base/basictypes.h" 10#include "base/command_line.h" 11#include "base/i18n/break_iterator.h" 12#include "base/logging.h" 13#include "base/metrics/field_trial.h" 14#include "base/metrics/histogram.h" 15#include "base/prefs/pref_service.h" 16#include "base/strings/string_number_conversions.h" 17#include "base/strings/string_util.h" 18#include "base/strings/utf_string_conversions.h" 19#include "base/time/time.h" 20#include "chrome/browser/autocomplete/autocomplete_result.h" 21#include "chrome/browser/autocomplete/history_url_provider.h" 22#include "chrome/browser/history/history_database.h" 23#include "chrome/browser/history/history_service.h" 24#include "chrome/browser/history/history_service_factory.h" 25#include "chrome/browser/history/in_memory_url_index.h" 26#include "chrome/browser/history/in_memory_url_index_types.h" 27#include "chrome/browser/history/scored_history_match.h" 28#include "chrome/browser/omnibox/omnibox_field_trial.h" 29#include "chrome/browser/profiles/profile.h" 30#include "chrome/browser/search/search.h" 31#include "chrome/browser/search_engines/template_url.h" 32#include "chrome/browser/search_engines/template_url_service.h" 33#include "chrome/browser/search_engines/template_url_service_factory.h" 34#include "chrome/common/autocomplete_match_type.h" 35#include "chrome/common/chrome_switches.h" 36#include "chrome/common/net/url_fixer_upper.h" 37#include "chrome/common/pref_names.h" 38#include "chrome/common/url_constants.h" 39#include "content/public/browser/notification_source.h" 40#include "content/public/browser/notification_types.h" 41#include "net/base/escape.h" 42#include "net/base/net_util.h" 43#include "net/base/registry_controlled_domains/registry_controlled_domain.h" 44#include "url/url_parse.h" 45#include "url/url_util.h" 46 47using history::InMemoryURLIndex; 48using history::ScoredHistoryMatch; 49using history::ScoredHistoryMatches; 50 51bool HistoryQuickProvider::disabled_ = false; 52 53HistoryQuickProvider::HistoryQuickProvider( 54 AutocompleteProviderListener* listener, 55 Profile* profile) 56 : HistoryProvider(listener, profile, 57 AutocompleteProvider::TYPE_HISTORY_QUICK), 58 languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)) { 59} 60 61void HistoryQuickProvider::Start(const AutocompleteInput& input, 62 bool minimal_changes) { 63 matches_.clear(); 64 if (disabled_) 65 return; 66 67 // Don't bother with INVALID and FORCED_QUERY. 68 if ((input.type() == AutocompleteInput::INVALID) || 69 (input.type() == AutocompleteInput::FORCED_QUERY)) 70 return; 71 72 autocomplete_input_ = input; 73 74 // TODO(pkasting): We should just block here until this loads. Any time 75 // someone unloads the history backend, we'll get inconsistent inline 76 // autocomplete behavior here. 77 if (GetIndex()) { 78 base::TimeTicks start_time = base::TimeTicks::Now(); 79 DoAutocomplete(); 80 if (input.text().length() < 6) { 81 base::TimeTicks end_time = base::TimeTicks::Now(); 82 std::string name = "HistoryQuickProvider.QueryIndexTime." + 83 base::IntToString(input.text().length()); 84 base::HistogramBase* counter = base::Histogram::FactoryGet( 85 name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag); 86 counter->Add(static_cast<int>((end_time - start_time).InMilliseconds())); 87 } 88 UpdateStarredStateOfMatches(); 89 } 90} 91 92void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) { 93 DCHECK(match.deletable); 94 DCHECK(match.destination_url.is_valid()); 95 // Delete the match from the InMemoryURLIndex. 96 GetIndex()->DeleteURL(match.destination_url); 97 DeleteMatchFromMatches(match); 98} 99 100HistoryQuickProvider::~HistoryQuickProvider() {} 101 102void HistoryQuickProvider::DoAutocomplete() { 103 // Get the matching URLs from the DB. 104 ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms( 105 autocomplete_input_.text(), 106 autocomplete_input_.cursor_position()); 107 if (matches.empty()) 108 return; 109 110 // Figure out if HistoryURL provider has a URL-what-you-typed match 111 // that ought to go first and what its score will be. 112 bool will_have_url_what_you_typed_match_first = false; 113 int url_what_you_typed_match_score = -1; // undefined 114 // These are necessary (but not sufficient) conditions for the omnibox 115 // input to be a URL-what-you-typed match. The username test checks that 116 // either the username does not exist (a regular URL such as http://site/) 117 // or, if the username exists (http://user@site/), there must be either 118 // a password or a port. Together these exclude pure username@site 119 // inputs because these are likely to be an e-mail address. HistoryURL 120 // provider won't promote the URL-what-you-typed match to first 121 // for these inputs. 122 const bool can_have_url_what_you_typed_match_first = 123 autocomplete_input_.canonicalized_url().is_valid() && 124 (autocomplete_input_.type() != AutocompleteInput::QUERY) && 125 (autocomplete_input_.type() != AutocompleteInput::FORCED_QUERY) && 126 (!autocomplete_input_.parts().username.is_nonempty() || 127 autocomplete_input_.parts().password.is_nonempty() || 128 autocomplete_input_.parts().path.is_nonempty()); 129 if (can_have_url_what_you_typed_match_first) { 130 HistoryService* const history_service = 131 HistoryServiceFactory::GetForProfile(profile_, 132 Profile::EXPLICIT_ACCESS); 133 // We expect HistoryService to be available. In case it's not, 134 // (e.g., due to Profile corruption) we let HistoryQuick provider 135 // completions (which may be available because it's a different 136 // data structure) compete with the URL-what-you-typed match as 137 // normal. 138 if (history_service) { 139 history::URLDatabase* url_db = history_service->InMemoryDatabase(); 140 // url_db can be NULL if it hasn't finished initializing (or 141 // failed to to initialize). In this case, we let HistoryQuick 142 // provider completions compete with the URL-what-you-typed 143 // match as normal. 144 if (url_db) { 145 const std::string host(base::UTF16ToUTF8( 146 autocomplete_input_.text().substr( 147 autocomplete_input_.parts().host.begin, 148 autocomplete_input_.parts().host.len))); 149 // We want to put the URL-what-you-typed match first if either 150 // * the user visited the URL before (intranet or internet). 151 // * it's a URL on a host that user visited before and this 152 // is the root path of the host. (If the user types some 153 // of a path--more than a simple "/"--we let autocomplete compete 154 // normally with the URL-what-you-typed match.) 155 // TODO(mpearson): Remove this hacky code and simply score URL-what- 156 // you-typed in some sane way relative to possible completions: 157 // URL-what-you-typed should get some sort of a boost relative 158 // to completions, but completions should naturally win if 159 // they're a lot more popular. In this process, if the input 160 // is a bare intranet hostname that has been visited before, we 161 // may want to enforce that the only completions that can outscore 162 // the URL-what-you-typed match are on the same host (i.e., aren't 163 // from a longer internet hostname for which the omnibox input is 164 // a prefix). 165 if (url_db->GetRowForURL( 166 autocomplete_input_.canonicalized_url(), NULL) != 0) { 167 // We visited this URL before. 168 will_have_url_what_you_typed_match_first = true; 169 // HistoryURLProvider gives visited what-you-typed URLs a high score. 170 url_what_you_typed_match_score = 171 HistoryURLProvider::kScoreForBestInlineableResult; 172 } else if (url_db->IsTypedHost(host) && 173 (!autocomplete_input_.parts().path.is_nonempty() || 174 ((autocomplete_input_.parts().path.len == 1) && 175 (autocomplete_input_.text()[ 176 autocomplete_input_.parts().path.begin] == '/'))) && 177 !autocomplete_input_.parts().query.is_nonempty() && 178 !autocomplete_input_.parts().ref.is_nonempty()) { 179 // Not visited, but we've seen the host before. 180 will_have_url_what_you_typed_match_first = true; 181 const size_t registry_length = 182 net::registry_controlled_domains::GetRegistryLength( 183 host, 184 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 185 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 186 if (registry_length == 0) { 187 // Known intranet hosts get one score. 188 url_what_you_typed_match_score = 189 HistoryURLProvider::kScoreForUnvisitedIntranetResult; 190 } else { 191 // Known internet hosts get another. 192 url_what_you_typed_match_score = 193 HistoryURLProvider::kScoreForWhatYouTypedResult; 194 } 195 } 196 } 197 } 198 } 199 200 // Loop over every result and add it to matches_. In the process, 201 // guarantee that scores are decreasing. |max_match_score| keeps 202 // track of the highest score we can assign to any later results we 203 // see. Also, reduce |max_match_score| if we think there will be 204 // a URL-what-you-typed match. (We want URL-what-you-typed matches for 205 // visited URLs to beat out any longer URLs, no matter how frequently 206 // they're visited.) The strength of this reduction depends on the 207 // likely score for the URL-what-you-typed result. 208 209 // |template_url_service| or |template_url| can be NULL in unit tests. 210 TemplateURLService* template_url_service = 211 TemplateURLServiceFactory::GetForProfile(profile_); 212 TemplateURL* template_url = template_url_service ? 213 template_url_service->GetDefaultSearchProvider() : NULL; 214 int max_match_score = matches.begin()->raw_score(); 215 if (will_have_url_what_you_typed_match_first) { 216 max_match_score = std::min(max_match_score, 217 url_what_you_typed_match_score - 1); 218 } 219 for (ScoredHistoryMatches::const_iterator match_iter = matches.begin(); 220 match_iter != matches.end(); ++match_iter) { 221 const ScoredHistoryMatch& history_match(*match_iter); 222 // Culls results corresponding to queries from the default search engine. 223 // These are low-quality, difficult-to-understand matches for users, and the 224 // SearchProvider should surface past queries in a better way anyway. 225 if (!template_url || 226 !template_url->IsSearchURL(history_match.url_info.url())) { 227 // Set max_match_score to the score we'll assign this result: 228 max_match_score = std::min(max_match_score, history_match.raw_score()); 229 matches_.push_back(QuickMatchToACMatch(history_match, max_match_score)); 230 // Mark this max_match_score as being used: 231 max_match_score--; 232 } 233 } 234} 235 236AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch( 237 const ScoredHistoryMatch& history_match, 238 int score) { 239 const history::URLRow& info = history_match.url_info; 240 AutocompleteMatch match( 241 this, score, !!info.visit_count(), 242 history_match.url_matches().empty() ? 243 AutocompleteMatchType::HISTORY_TITLE : 244 AutocompleteMatchType::HISTORY_URL); 245 match.typed_count = info.typed_count(); 246 match.destination_url = info.url(); 247 DCHECK(match.destination_url.is_valid()); 248 249 // Format the URL autocomplete presentation. 250 std::vector<size_t> offsets = 251 OffsetsFromTermMatches(history_match.url_matches()); 252 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & 253 ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP); 254 match.fill_into_edit = 255 AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(), 256 net::FormatUrlWithOffsets(info.url(), languages_, format_types, 257 net::UnescapeRule::SPACES, NULL, NULL, &offsets)); 258 history::TermMatches new_matches = 259 ReplaceOffsetsInTermMatches(history_match.url_matches(), offsets); 260 match.contents = net::FormatUrl(info.url(), languages_, format_types, 261 net::UnescapeRule::SPACES, NULL, NULL, NULL); 262 match.contents_class = 263 SpansFromTermMatch(new_matches, match.contents.length(), true); 264 265 // Set |inline_autocompletion| and |allowed_to_be_default_match| if possible. 266 // The second part of this test can happen if the only match(es) of the user's 267 // term occur in places FormatUrl() decides to omit in the formatted url. 268 // In these cases, it's impossible to set |inline_autocompletion| correctly 269 // and hence the match cannot be the default match. I (mpearson@) believe 270 // this is likely caused by the mismatch that offsets are originally 271 // computed with respect to the cleaned-up URL yet then applied and 272 // updated by FormatUrl() as if they applied to the original string. 273 // See crbug.com/252630. 274 // TODO(mpearson): replacing the second clause with a DCHECK after fixing 275 // 252630. 276 if (history_match.can_inline() && !new_matches.empty()) { 277 size_t inline_autocomplete_offset = new_matches[0].offset + 278 new_matches[0].length; 279 // |inline_autocomplete_offset| may be beyond the end of the 280 // |fill_into_edit| if the user has typed an URL with a scheme and the 281 // last character typed is a slash. That slash is removed by the 282 // FormatURLWithOffsets call above. 283 if (inline_autocomplete_offset < match.fill_into_edit.length()) { 284 match.inline_autocompletion = 285 match.fill_into_edit.substr(inline_autocomplete_offset); 286 } 287 match.allowed_to_be_default_match = match.inline_autocompletion.empty() || 288 !PreventInlineAutocomplete(autocomplete_input_); 289 } 290 291 // Format the description autocomplete presentation. 292 match.description = info.title(); 293 match.description_class = SpansFromTermMatch( 294 history_match.title_matches(), match.description.length(), false); 295 296 match.RecordAdditionalInfo("typed count", info.typed_count()); 297 match.RecordAdditionalInfo("visit count", info.visit_count()); 298 match.RecordAdditionalInfo("last visit", info.last_visit()); 299 300 return match; 301} 302 303history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() { 304 if (index_for_testing_.get()) 305 return index_for_testing_.get(); 306 307 HistoryService* const history_service = 308 HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS); 309 if (!history_service) 310 return NULL; 311 312 return history_service->InMemoryIndex(); 313} 314