history_quick_provider.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/history_quick_provider.h"
6
7#include <vector>
8
9#include "base/basictypes.h"
10#include "base/command_line.h"
11#include "base/i18n/break_iterator.h"
12#include "base/logging.h"
13#include "base/metrics/field_trial.h"
14#include "base/metrics/histogram.h"
15#include "base/prefs/pref_service.h"
16#include "base/strings/string_number_conversions.h"
17#include "base/strings/string_util.h"
18#include "base/strings/utf_string_conversions.h"
19#include "base/time/time.h"
20#include "chrome/browser/autocomplete/autocomplete_result.h"
21#include "chrome/browser/autocomplete/history_url_provider.h"
22#include "chrome/browser/history/history_database.h"
23#include "chrome/browser/history/history_service.h"
24#include "chrome/browser/history/history_service_factory.h"
25#include "chrome/browser/history/in_memory_url_index.h"
26#include "chrome/browser/history/in_memory_url_index_types.h"
27#include "chrome/browser/history/scored_history_match.h"
28#include "chrome/browser/net/url_fixer_upper.h"
29#include "chrome/browser/omnibox/omnibox_field_trial.h"
30#include "chrome/browser/profiles/profile.h"
31#include "chrome/browser/search/search.h"
32#include "chrome/browser/search_engines/template_url.h"
33#include "chrome/browser/search_engines/template_url_service.h"
34#include "chrome/browser/search_engines/template_url_service_factory.h"
35#include "chrome/common/autocomplete_match_type.h"
36#include "chrome/common/chrome_switches.h"
37#include "chrome/common/pref_names.h"
38#include "chrome/common/url_constants.h"
39#include "content/public/browser/browser_thread.h"
40#include "content/public/browser/notification_source.h"
41#include "content/public/browser/notification_types.h"
42#include "net/base/escape.h"
43#include "net/base/net_util.h"
44#include "net/base/registry_controlled_domains/registry_controlled_domain.h"
45#include "url/url_parse.h"
46#include "url/url_util.h"
47
48using history::InMemoryURLIndex;
49using history::ScoredHistoryMatch;
50using history::ScoredHistoryMatches;
51
52bool HistoryQuickProvider::disabled_ = false;
53
54HistoryQuickProvider::HistoryQuickProvider(
55    AutocompleteProviderListener* listener,
56    Profile* profile)
57    : HistoryProvider(listener, profile,
58          AutocompleteProvider::TYPE_HISTORY_QUICK),
59      languages_(profile_->GetPrefs()->GetString(prefs::kAcceptLanguages)),
60      reorder_for_inlining_(false) {
61  enum InliningOption {
62    INLINING_PROHIBITED = 0,
63    INLINING_ALLOWED = 1,
64    INLINING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2,
65    INLINING_FIELD_TRIAL_DEFAULT_GROUP = 3,
66    INLINING_FIELD_TRIAL_EXPERIMENT_GROUP = 4,
67    NUM_OPTIONS = 5
68  };
69  // should always be overwritten
70  InliningOption inlining_option = NUM_OPTIONS;
71
72  const std::string switch_value = CommandLine::ForCurrentProcess()->
73      GetSwitchValueASCII(switches::kOmniboxInlineHistoryQuickProvider);
74  if (switch_value == switches::kOmniboxInlineHistoryQuickProviderAllowed) {
75    inlining_option = INLINING_ALLOWED;
76    always_prevent_inline_autocomplete_ = false;
77  } else if (switch_value ==
78             switches::kOmniboxInlineHistoryQuickProviderProhibited) {
79    inlining_option = INLINING_PROHIBITED;
80    always_prevent_inline_autocomplete_ = true;
81  } else {
82    // We'll assume any other flag means automatic.
83    // Automatic means eligible for the field trial.
84
85    // For the field trial stuff to work correctly, we must be running
86    // on the same thread as the thread that created the field trial,
87    // which happens via a call to OmniboxFieldTrial::Active in
88    // chrome_browser_main.cc on the main thread.  Let's check this to
89    // be sure.  We check "if we've heard of the UI thread then we'd better
90    // be on it."  The first part is necessary so unit tests pass.  (Many
91    // unit tests don't set up the threading naming system; hence
92    // CurrentlyOn(UI thread) will fail.)
93    DCHECK(!content::BrowserThread::IsWellKnownThread(
94               content::BrowserThread::UI) ||
95           content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
96    if (OmniboxFieldTrial::InDisallowInlineHQPFieldTrial()) {
97      if (OmniboxFieldTrial::InDisallowInlineHQPFieldTrialExperimentGroup()) {
98        always_prevent_inline_autocomplete_ = true;
99        inlining_option = INLINING_FIELD_TRIAL_EXPERIMENT_GROUP;
100      } else {
101        always_prevent_inline_autocomplete_ = false;
102        inlining_option = INLINING_FIELD_TRIAL_DEFAULT_GROUP;
103      }
104    } else {
105      always_prevent_inline_autocomplete_ = false;
106      inlining_option = INLINING_AUTO_BUT_NOT_IN_FIELD_TRIAL;
107    }
108  }
109
110  // Add a beacon to the logs that'll allow us to identify later what
111  // inlining state a user is in.  Do this by incrementing a bucket in
112  // a histogram, where the bucket represents the user's inlining state.
113  UMA_HISTOGRAM_ENUMERATION(
114      "Omnibox.InlineHistoryQuickProviderFieldTrialBeacon",
115      inlining_option, NUM_OPTIONS);
116
117  reorder_for_inlining_ = CommandLine::ForCurrentProcess()->
118      GetSwitchValueASCII(switches::
119                          kOmniboxHistoryQuickProviderReorderForInlining) ==
120      switches::kOmniboxHistoryQuickProviderReorderForInliningEnabled;
121}
122
123void HistoryQuickProvider::Start(const AutocompleteInput& input,
124                                 bool minimal_changes) {
125  matches_.clear();
126  if (disabled_)
127    return;
128
129  // Don't bother with INVALID and FORCED_QUERY.  Also pass when looking for
130  // BEST_MATCH and there is no inline autocompletion because none of the HQP
131  // matches can score highly enough to qualify.
132  if ((input.type() == AutocompleteInput::INVALID) ||
133      (input.type() == AutocompleteInput::FORCED_QUERY) ||
134      (input.matches_requested() == AutocompleteInput::BEST_MATCH &&
135       input.prevent_inline_autocomplete()))
136    return;
137
138  autocomplete_input_ = input;
139
140  // TODO(pkasting): We should just block here until this loads.  Any time
141  // someone unloads the history backend, we'll get inconsistent inline
142  // autocomplete behavior here.
143  if (GetIndex()) {
144    base::TimeTicks start_time = base::TimeTicks::Now();
145    DoAutocomplete();
146    if (input.text().length() < 6) {
147      base::TimeTicks end_time = base::TimeTicks::Now();
148      std::string name = "HistoryQuickProvider.QueryIndexTime." +
149          base::IntToString(input.text().length());
150      base::HistogramBase* counter = base::Histogram::FactoryGet(
151          name, 1, 1000, 50, base::Histogram::kUmaTargetedHistogramFlag);
152      counter->Add(static_cast<int>((end_time - start_time).InMilliseconds()));
153    }
154    UpdateStarredStateOfMatches();
155  }
156}
157
158void HistoryQuickProvider::DeleteMatch(const AutocompleteMatch& match) {
159  DCHECK(match.deletable);
160  DCHECK(match.destination_url.is_valid());
161  // Delete the match from the InMemoryURLIndex.
162  GetIndex()->DeleteURL(match.destination_url);
163  DeleteMatchFromMatches(match);
164}
165
166HistoryQuickProvider::~HistoryQuickProvider() {}
167
168void HistoryQuickProvider::DoAutocomplete() {
169  // Get the matching URLs from the DB.
170  ScoredHistoryMatches matches = GetIndex()->HistoryItemsForTerms(
171      autocomplete_input_.text(),
172      autocomplete_input_.cursor_position());
173  if (matches.empty())
174    return;
175
176  // If we're allowed to reorder results in order to get an inlineable
177  // result to appear first (and hence have a HistoryQuickProvider
178  // suggestion possibly appear first), find the first inlineable
179  // result and then swap it to the front.  Obviously, don't do this
180  // if we're told to prevent inline autocompletion.  (If we're told
181  // we're going to prevent inline autocompletion, we're going to
182  // later demote the score of all results so none will be inlined.
183  // Hence there's no need to reorder the results so an inlineable one
184  // appears first.)
185  if (reorder_for_inlining_ &&
186      !PreventInlineAutocomplete(autocomplete_input_)) {
187    for (ScoredHistoryMatches::iterator i(matches.begin());
188         (i != matches.end()) &&
189             (i->raw_score >= AutocompleteResult::kLowestDefaultScore);
190         ++i) {
191      if (i->can_inline) {  // this test is only true once because of the break
192        if (i != matches.begin())
193          std::rotate(matches.begin(), i, i + 1);
194        break;
195      }
196    }
197  }
198
199  // Figure out if HistoryURL provider has a URL-what-you-typed match
200  // that ought to go first and what its score will be.
201  bool will_have_url_what_you_typed_match_first = false;
202  int url_what_you_typed_match_score = -1;  // undefined
203  // These are necessary (but not sufficient) conditions for the omnibox
204  // input to be a URL-what-you-typed match.  The username test checks that
205  // either the username does not exist (a regular URL such as http://site/)
206  // or, if the username exists (http://user@site/), there must be either
207  // a password or a port.  Together these exclude pure username@site
208  // inputs because these are likely to be an e-mail address.  HistoryURL
209  // provider won't promote the URL-what-you-typed match to first
210  // for these inputs.
211  const bool can_have_url_what_you_typed_match_first =
212      autocomplete_input_.canonicalized_url().is_valid() &&
213      (autocomplete_input_.type() != AutocompleteInput::QUERY) &&
214      (autocomplete_input_.type() != AutocompleteInput::FORCED_QUERY) &&
215      (!autocomplete_input_.parts().username.is_nonempty() ||
216       autocomplete_input_.parts().password.is_nonempty() ||
217       autocomplete_input_.parts().path.is_nonempty());
218  if (can_have_url_what_you_typed_match_first) {
219    HistoryService* const history_service =
220        HistoryServiceFactory::GetForProfile(profile_,
221                                             Profile::EXPLICIT_ACCESS);
222    // We expect HistoryService to be available.  In case it's not,
223    // (e.g., due to Profile corruption) we let HistoryQuick provider
224    // completions (which may be available because it's a different
225    // data structure) compete with the URL-what-you-typed match as
226    // normal.
227    if (history_service) {
228      history::URLDatabase* url_db = history_service->InMemoryDatabase();
229      // url_db can be NULL if it hasn't finished initializing (or
230      // failed to to initialize).  In this case, we let HistoryQuick
231      // provider completions compete with the URL-what-you-typed
232      // match as normal.
233      if (url_db) {
234        const std::string host(UTF16ToUTF8(autocomplete_input_.text().substr(
235            autocomplete_input_.parts().host.begin,
236            autocomplete_input_.parts().host.len)));
237        // We want to put the URL-what-you-typed match first if either
238        // * the user visited the URL before (intranet or internet).
239        // * it's a URL on a host that user visited before and this
240        //   is the root path of the host.  (If the user types some
241        //   of a path--more than a simple "/"--we let autocomplete compete
242        //   normally with the URL-what-you-typed match.)
243        // TODO(mpearson): Remove this hacky code and simply score URL-what-
244        // you-typed in some sane way relative to possible completions:
245        // URL-what-you-typed should get some sort of a boost relative
246        // to completions, but completions should naturally win if
247        // they're a lot more popular.  In this process, if the input
248        // is a bare intranet hostname that has been visited before, we
249        // may want to enforce that the only completions that can outscore
250        // the URL-what-you-typed match are on the same host (i.e., aren't
251        // from a longer internet hostname for which the omnibox input is
252        // a prefix).
253        if (url_db->GetRowForURL(
254            autocomplete_input_.canonicalized_url(), NULL) != 0) {
255          // We visited this URL before.
256          will_have_url_what_you_typed_match_first = true;
257          // HistoryURLProvider gives visited what-you-typed URLs a high score.
258          url_what_you_typed_match_score =
259              HistoryURLProvider::kScoreForBestInlineableResult;
260        } else if (url_db->IsTypedHost(host) &&
261             (!autocomplete_input_.parts().path.is_nonempty() ||
262              ((autocomplete_input_.parts().path.len == 1) &&
263               (autocomplete_input_.text()[
264                   autocomplete_input_.parts().path.begin] == '/'))) &&
265             !autocomplete_input_.parts().query.is_nonempty() &&
266             !autocomplete_input_.parts().ref.is_nonempty()) {
267          // Not visited, but we've seen the host before.
268          will_have_url_what_you_typed_match_first = true;
269          const size_t registry_length =
270              net::registry_controlled_domains::GetRegistryLength(
271                  host,
272                  net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
273                  net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
274          if (registry_length == 0) {
275            // Known intranet hosts get one score.
276            url_what_you_typed_match_score =
277                HistoryURLProvider::kScoreForUnvisitedIntranetResult;
278          } else {
279            // Known internet hosts get another.
280            url_what_you_typed_match_score =
281                HistoryURLProvider::kScoreForWhatYouTypedResult;
282          }
283        }
284      }
285    }
286  }
287
288  // Loop over every result and add it to matches_.  In the process,
289  // guarantee that scores are decreasing.  |max_match_score| keeps
290  // track of the highest score we can assign to any later results we
291  // see.  Also, if we're not allowing inline autocompletions in
292  // general or the current best suggestion isn't inlineable,
293  // artificially reduce the starting |max_match_score| (which
294  // therefore applies to all results) to something low enough that
295  // guarantees no result will be offered as an autocomplete
296  // suggestion.  Also do a similar reduction if we think there will be
297  // a URL-what-you-typed match.  (We want URL-what-you-typed matches for
298  // visited URLs to beat out any longer URLs, no matter how frequently
299  // they're visited.)  The strength of this last reduction depends on the
300  // likely score for the URL-what-you-typed result.
301
302  // |template_url_service| or |template_url| can be NULL in unit tests.
303  TemplateURLService* template_url_service =
304      TemplateURLServiceFactory::GetForProfile(profile_);
305  TemplateURL* template_url = template_url_service ?
306      template_url_service->GetDefaultSearchProvider() : NULL;
307  int max_match_score = (PreventInlineAutocomplete(autocomplete_input_) ||
308      !matches.begin()->can_inline) ?
309      (AutocompleteResult::kLowestDefaultScore - 1) :
310      matches.begin()->raw_score;
311  if (will_have_url_what_you_typed_match_first) {
312    max_match_score = std::min(max_match_score,
313        url_what_you_typed_match_score - 1);
314  }
315  for (ScoredHistoryMatches::const_iterator match_iter = matches.begin();
316       match_iter != matches.end(); ++match_iter) {
317    const ScoredHistoryMatch& history_match(*match_iter);
318    // Culls results corresponding to queries from the default search engine.
319    // These are low-quality, difficult-to-understand matches for users, and the
320    // SearchProvider should surface past queries in a better way anyway.
321    if (!template_url ||
322        !template_url->IsSearchURL(history_match.url_info.url())) {
323      // Set max_match_score to the score we'll assign this result:
324      max_match_score = std::min(max_match_score, history_match.raw_score);
325      matches_.push_back(QuickMatchToACMatch(history_match, max_match_score));
326      // Mark this max_match_score as being used:
327      max_match_score--;
328    }
329  }
330}
331
332AutocompleteMatch HistoryQuickProvider::QuickMatchToACMatch(
333    const ScoredHistoryMatch& history_match,
334    int score) {
335  const history::URLRow& info = history_match.url_info;
336  AutocompleteMatch match(this, score, !!info.visit_count(),
337      history_match.url_matches.empty() ? AutocompleteMatchType::HISTORY_TITLE :
338          AutocompleteMatchType::HISTORY_URL);
339  match.typed_count = info.typed_count();
340  match.destination_url = info.url();
341  DCHECK(match.destination_url.is_valid());
342
343  // Format the URL autocomplete presentation.
344  std::vector<size_t> offsets =
345      OffsetsFromTermMatches(history_match.url_matches);
346  const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll &
347      ~(!history_match.match_in_scheme ? 0 : net::kFormatUrlOmitHTTP);
348  match.fill_into_edit =
349      AutocompleteInput::FormattedStringWithEquivalentMeaning(info.url(),
350          net::FormatUrlWithOffsets(info.url(), languages_, format_types,
351              net::UnescapeRule::SPACES, NULL, NULL, &offsets));
352  history::TermMatches new_matches =
353      ReplaceOffsetsInTermMatches(history_match.url_matches, offsets);
354  match.contents = net::FormatUrl(info.url(), languages_, format_types,
355              net::UnescapeRule::SPACES, NULL, NULL, NULL);
356  match.contents_class =
357      SpansFromTermMatch(new_matches, match.contents.length(), true);
358
359  if (!history_match.can_inline) {
360    match.inline_autocomplete_offset = string16::npos;
361  } else {
362    DCHECK(!new_matches.empty());
363    match.inline_autocomplete_offset = new_matches[0].offset +
364        new_matches[0].length;
365    // The following will happen if the user has typed an URL with a scheme
366    // and the last character typed is a slash because that slash is removed
367    // by the FormatURLWithOffsets call above.
368    if (match.inline_autocomplete_offset > match.fill_into_edit.length())
369      match.inline_autocomplete_offset = match.fill_into_edit.length();
370  }
371
372  // Format the description autocomplete presentation.
373  match.description = info.title();
374  match.description_class = SpansFromTermMatch(
375      history_match.title_matches, match.description.length(), false);
376
377  match.RecordAdditionalInfo("typed count", info.typed_count());
378  match.RecordAdditionalInfo("visit count", info.visit_count());
379  match.RecordAdditionalInfo("last visit", info.last_visit());
380
381  return match;
382}
383
384history::InMemoryURLIndex* HistoryQuickProvider::GetIndex() {
385  if (index_for_testing_.get())
386    return index_for_testing_.get();
387
388  HistoryService* const history_service =
389      HistoryServiceFactory::GetForProfile(profile_, Profile::EXPLICIT_ACCESS);
390  if (!history_service)
391    return NULL;
392
393  return history_service->InMemoryIndex();
394}
395
396// static
397ACMatchClassifications HistoryQuickProvider::SpansFromTermMatch(
398    const history::TermMatches& matches,
399    size_t text_length,
400    bool is_url) {
401  ACMatchClassification::Style url_style =
402      is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
403  ACMatchClassifications spans;
404  if (matches.empty()) {
405    if (text_length)
406      spans.push_back(ACMatchClassification(0, url_style));
407    return spans;
408  }
409  if (matches[0].offset)
410    spans.push_back(ACMatchClassification(0, url_style));
411  size_t match_count = matches.size();
412  for (size_t i = 0; i < match_count;) {
413    size_t offset = matches[i].offset;
414    spans.push_back(ACMatchClassification(offset,
415        ACMatchClassification::MATCH | url_style));
416    // Skip all adjacent matches.
417    do {
418      offset += matches[i].length;
419      ++i;
420    } while ((i < match_count) && (offset == matches[i].offset));
421    if (offset < text_length)
422      spans.push_back(ACMatchClassification(offset, url_style));
423  }
424
425  return spans;
426}
427