history_quick_provider.cc revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/history_quick_provider.h"
6
7#include <vector>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/i18n/break_iterator.h"
12#include "base/logging.h"
13#include "base/metrics/field_trial.h"
14#include "base/metrics/histogram.h"
15#include "base/prefs/pref_service.h"
16#include "base/strings/string_number_conversions.h"
17#include "base/strings/string_util.h"
18#include "base/strings/utf_string_conversions.h"
19#include "base/time/time.h"
20#include "chrome/browser/autocomplete/autocomplete_result.h"
21#include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
22#include "chrome/browser/autocomplete/history_url_provider.h"
23#include "chrome/browser/bookmarks/bookmark_model_factory.h"
24#include "chrome/browser/history/history_database.h"
25#include "chrome/browser/history/history_service.h"
26#include "chrome/browser/history/history_service_factory.h"
27#include "chrome/browser/history/in_memory_url_index.h"
28#include "chrome/browser/history/in_memory_url_index_types.h"
29#include "chrome/browser/history/scored_history_match.h"
30#include "chrome/browser/omnibox/omnibox_field_trial.h"
31#include "chrome/browser/profiles/profile.h"
32#include "chrome/browser/search_engines/template_url_service_factory.h"
33#include "chrome/common/chrome_switches.h"
34#include "chrome/common/pref_names.h"
35#include "chrome/common/url_constants.h"
36#include "components/autocomplete/autocomplete_match_type.h"
37#include "components/metrics/proto/omnibox_input_type.pb.h"
38#include "components/search_engines/template_url.h"
39#include "components/search_engines/template_url_service.h"
40#include "content/public/browser/notification_source.h"
41#include "content/public/browser/notification_types.h"
42#include "net/base/escape.h"
43#include "net/base/net_util.h"
44#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
45#include "url/url_parse.h"
46#include "url/url_util.h"
47
48using history::InMemoryURLIndex;
49using history::ScoredHistoryMatch;
50using history::ScoredHistoryMatches;
51
52bool HistoryQuickProvider::disabled_ = false;
53
54HistoryQuickProvider::HistoryQuickProvider(Profile* profile)
55    : HistoryProvider(profile, AutocompleteProvider::TYPE_HISTORY_QUICK),
56      languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)) {
57}
58
59void HistoryQuickProvider::Start(const AutocompleteInput& input,
60                                 bool minimal_changes) {
61  matches_.clear();
62  if (disabled_)
63    return;
64
65  // Don't bother with INVALID and FORCED_QUERY.
66  if ((input.type() == metrics::OmniboxInputType::INVALID) ||
67      (input.type() == metrics::OmniboxInputType::FORCED_QUERY))
68    return;
69
70  autocomplete_input_ = input;
71
72  // TODO(pkasting): We should just block here until this loads.  Any time
73  // someone unloads the history backend, we'll get inconsistent inline
74  // autocomplete behavior here.
75  if (GetIndex()) {
76    base::TimeTicks start_time = base::TimeTicks::Now();
77    DoAutocomplete();
78    if (input.text().length() < 6) {
79      base::TimeTicks end_time = base::TimeTicks::Now();
80      std::string name = "HistoryQuickProvider.QueryIndexTime." +
81          base::IntToString(input.text().length());
82      base::HistogramBase* counter = base::Histogram::FactoryGet(
83          name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
84      counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
85    }
86    UpdateStarredStateOfMatches(BookmarkModelFactory::GetForProfile(profile_));
87  }
88}
89
90HistoryQuickProvider::~HistoryQuickProvider() {}
91
92void HistoryQuickProvider::DoAutocomplete() {
93  // Get the matching URLs from the DB.
94  ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(
95      autocomplete_input_.text(),
96      autocomplete_input_.cursor_position(),
97      AutocompleteProvider::kMaxMatches);
98  if (matches.empty())
99    return;
100
101  // Figure out if HistoryURL provider has a URL-what-you-typed match
102  // that ought to go first and what its score will be.
103  bool will_have_url_what_you_typed_match_first = false;
104  int url_what_you_typed_match_score = -1;  // undefined
105  // These are necessary (but not sufficient) conditions for the omnibox
106  // input to be a URL-what-you-typed match.  The username test checks that
107  // either the username does not exist (a regular URL such as http://site/)
108  // or, if the username exists (http://user@site/), there must be either
109  // a password or a port.  Together these exclude pure username@site
110  // inputs because these are likely to be an e-mail address.  HistoryURL
111  // provider won't promote the URL-what-you-typed match to first
112  // for these inputs.
113  const bool can_have_url_what_you_typed_match_first =
114      (autocomplete_input_.type() != metrics::OmniboxInputType::QUERY) &&
115      (!autocomplete_input_.parts().username.is_nonempty() ||
116       autocomplete_input_.parts().password.is_nonempty() ||
117       autocomplete_input_.parts().path.is_nonempty());
118  if (can_have_url_what_you_typed_match_first) {
119    HistoryService* const history_service =
120        HistoryServiceFactory::GetForProfile(profile_,
121                                             Profile::EXPLICIT_ACCESS);
122    // We expect HistoryService to be available.  In case it's not,
123    // (e.g., due to Profile corruption) we let HistoryQuick provider
124    // completions (which may be available because it's a different
125    // data structure) compete with the URL-what-you-typed match as
126    // normal.
127    if (history_service) {
128      history::URLDatabase* url_db = history_service->InMemoryDatabase();
129      // url_db can be NULL if it hasn't finished initializing (or
130      // failed to to initialize).  In this case, we let HistoryQuick
131      // provider completions compete with the URL-what-you-typed
132      // match as normal.
133      if (url_db) {
134        const std::string host(base::UTF16ToUTF8(
135            autocomplete_input_.text().substr(
136                autocomplete_input_.parts().host.begin,
137                autocomplete_input_.parts().host.len)));
138        // We want to put the URL-what-you-typed match first if either
139        // * the user visited the URL before (intranet or internet).
140        // * it's a URL on a host that user visited before and this
141        //   is the root path of the host.  (If the user types some
142        //   of a path--more than a simple "/"--we let autocomplete compete
143        //   normally with the URL-what-you-typed match.)
144        // TODO(mpearson): Remove this hacky code and simply score URL-what-
145        // you-typed in some sane way relative to possible completions:
146        // URL-what-you-typed should get some sort of a boost relative
147        // to completions, but completions should naturally win if
148        // they're a lot more popular.  In this process, if the input
149        // is a bare intranet hostname that has been visited before, we
150        // may want to enforce that the only completions that can outscore
151        // the URL-what-you-typed match are on the same host (i.e., aren't
152        // from a longer internet hostname for which the omnibox input is
153        // a prefix).
154        if (url_db->GetRowForURL(
155            autocomplete_input_.canonicalized_url(), NULL) != 0) {
156          // We visited this URL before.
157          will_have_url_what_you_typed_match_first = true;
158          // HistoryURLProvider gives visited what-you-typed URLs a high score.
159          url_what_you_typed_match_score =
160              HistoryURLProvider::kScoreForBestInlineableResult;
161        } else if (url_db->IsTypedHost(host) &&
162             (!autocomplete_input_.parts().path.is_nonempty() ||
163              ((autocomplete_input_.parts().path.len == 1) &&
164               (autocomplete_input_.text()[
165                   autocomplete_input_.parts().path.begin] == '/'))) &&
166             !autocomplete_input_.parts().query.is_nonempty() &&
167             !autocomplete_input_.parts().ref.is_nonempty()) {
168          // Not visited, but we've seen the host before.
169          will_have_url_what_you_typed_match_first = true;
170          const size_t registry_length =
171              net::registry_controlled_domains::GetRegistryLength(
172                  host,
173                  net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
174                  net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
175          if (registry_length == 0) {
176            // Known intranet hosts get one score.
177            url_what_you_typed_match_score =
178                HistoryURLProvider::kScoreForUnvisitedIntranetResult;
179          } else {
180            // Known internet hosts get another.
181            url_what_you_typed_match_score =
182                HistoryURLProvider::kScoreForWhatYouTypedResult;
183          }
184        }
185      }
186    }
187  }
188
189  // Loop over every result and add it to matches_.  In the process,
190  // guarantee that scores are decreasing.  |max_match_score| keeps
191  // track of the highest score we can assign to any later results we
192  // see.  Also, reduce |max_match_score| if we think there will be
193  // a URL-what-you-typed match.  (We want URL-what-you-typed matches for
194  // visited URLs to beat out any longer URLs, no matter how frequently
195  // they're visited.)  The strength of this reduction depends on the
196  // likely score for the URL-what-you-typed result.
197
198  // |template_url_service| or |template_url| can be NULL in unit tests.
199  TemplateURLService* template_url_service =
200      TemplateURLServiceFactory::GetForProfile(profile_);
201  TemplateURL* template_url = template_url_service ?
202      template_url_service->GetDefaultSearchProvider() : NULL;
203  int max_match_score = matches.begin()->raw_score();
204  if (will_have_url_what_you_typed_match_first) {
205    max_match_score = std::min(max_match_score,
206        url_what_you_typed_match_score - 1);
207  }
208  for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
209       match_iter != matches.end(); ++match_iter) {
210    const ScoredHistoryMatch& history_match(*match_iter);
211    // Culls results corresponding to queries from the default search engine.
212    // These are low-quality, difficult-to-understand matches for users, and the
213    // SearchProvider should surface past queries in a better way anyway.
214    if (!template_url ||
215        !template_url->IsSearchURL(history_match.url_info.url(),
216                                   template_url_service->search_terms_data())) {
217      // Set max_match_score to the score we'll assign this result:
218      max_match_score = std::min(max_match_score, history_match.raw_score());
219      matches_.push_back(QuickMatchToACMatch(history_match, max_match_score));
220      // Mark this max_match_score as being used:
221      max_match_score--;
222    }
223  }
224}
225
226AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
227    const ScoredHistoryMatch& history_match,
228    int score) {
229  const history::URLRow& info = history_match.url_info;
230  AutocompleteMatch match(
231      this, score, !!info.visit_count(),
232      history_match.url_matches().empty() ?
233          AutocompleteMatchType::HISTORY_TITLE :
234          AutocompleteMatchType::HISTORY_URL);
235  match.typed_count = info.typed_count();
236  match.destination_url = info.url();
237  DCHECK(match.destination_url.is_valid());
238
239  // Format the URL autocomplete presentation.
240  const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll &
241      ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP);
242  match.fill_into_edit =
243      AutocompleteInput::FormattedStringWithEquivalentMeaning(
244          info.url(),
245          net::FormatUrl(info.url(), languages_, format_types,
246                         net::UnescapeRule::SPACES, NULL, NULL, NULL),
247          ChromeAutocompleteSchemeClassifier(profile_));
248  std::vector<size_t> offsets =
249      OffsetsFromTermMatches(history_match.url_matches());
250  base::OffsetAdjuster::Adjustments adjustments;
251  match.contents = net::FormatUrlWithAdjustments(
252      info.url(), languages_, format_types, net::UnescapeRule::SPACES, NULL,
253      NULL, &adjustments);
254  base::OffsetAdjuster::AdjustOffsets(adjustments, &offsets);
255  history::TermMatches new_matches =
256      ReplaceOffsetsInTermMatches(history_match.url_matches(), offsets);
257  match.contents_class =
258      SpansFromTermMatch(new_matches, match.contents.length(), true);
259
260  // Set |inline_autocompletion| and |allowed_to_be_default_match| if possible.
261  if (history_match.can_inline()) {
262    DCHECK(!new_matches.empty());
263    size_t inline_autocomplete_offset = new_matches[0].offset +
264        new_matches[0].length;
265    // |inline_autocomplete_offset| may be beyond the end of the
266    // |fill_into_edit| if the user has typed an URL with a scheme and the
267    // last character typed is a slash.  That slash is removed by the
268    // FormatURLWithOffsets call above.
269    if (inline_autocomplete_offset < match.fill_into_edit.length()) {
270      match.inline_autocompletion =
271          match.fill_into_edit.substr(inline_autocomplete_offset);
272    }
273    match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
274        !PreventInlineAutocomplete(autocomplete_input_);
275  }
276  match.EnsureUWYTIsAllowedToBeDefault(
277      autocomplete_input_.canonicalized_url(),
278      TemplateURLServiceFactory::GetForProfile(profile_));
279
280  // Format the description autocomplete presentation.
281  match.description = info.title();
282  match.description_class = SpansFromTermMatch(
283      history_match.title_matches(), match.description.length(), false);
284
285  match.RecordAdditionalInfo("typed count", info.typed_count());
286  match.RecordAdditionalInfo("visit count", info.visit_count());
287  match.RecordAdditionalInfo("last visit", info.last_visit());
288
289  return match;
290}
291
292history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
293  if (index_for_testing_.get())
294    return index_for_testing_.get();
295
296  HistoryService* const history_service =
297      HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
298  if (!history_service)
299    return NULL;
300
301  return history_service->InMemoryIndex();
302}
303