history_url_provider.h revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
6#define CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
7
8#include <string>
9#include <vector>
10
11#include "base/compiler_specific.h"
12#include "base/synchronization/cancellation_flag.h"
13#include "chrome/browser/autocomplete/autocomplete_input.h"
14#include "chrome/browser/autocomplete/history_provider.h"
15#include "chrome/browser/autocomplete/history_provider_util.h"
16#include "chrome/browser/autocomplete/url_prefix.h"
17#include "chrome/browser/omnibox/omnibox_field_trial.h"
18#include "chrome/browser/search_engines/search_terms_data.h"
19#include "chrome/browser/search_engines/template_url.h"
20
21class Profile;
22
23namespace base {
24class MessageLoop;
25}
26
27namespace history {
28class HistoryBackend;
29class URLDatabase;
30}
31
32// How history autocomplete works
33// ==============================
34//
35// Read down this diagram for temporal ordering.
36//
37//   Main thread                History thread
38//   -----------                --------------
39//   AutocompleteController::Start
40//     -> HistoryURLProvider::Start
41//       -> RunAutocompletePasses
42//         -> SuggestExactInput
43//         [params_ allocated]
44//         -> DoAutocomplete (for inline autocomplete)
45//           -> URLDatabase::AutocompleteForPrefix (on in-memory DB)
46//         -> HistoryService::ScheduleAutocomplete
47//         (return to controller) ----
48//                                   /
49//                              HistoryBackend::ScheduleAutocomplete
50//                                -> HistoryURLProvider::ExecuteWithDB
51//                                  -> DoAutocomplete
52//                                    -> URLDatabase::AutocompleteForPrefix
53//                                /
54//   HistoryService::QueryComplete
55//     [params_ destroyed]
56//     -> AutocompleteProviderListener::OnProviderUpdate
57//
58// The autocomplete controller calls us, and must be called back, on the main
59// thread.  When called, we run two autocomplete passes.  The first pass runs
60// synchronously on the main thread and queries the in-memory URL database.
61// This pass promotes matches for inline autocomplete if applicable.  We do
62// this synchronously so that users get consistent behavior when they type
63// quickly and hit enter, no matter how loaded the main history database is.
64// Doing this synchronously also prevents inline autocomplete from being
65// "flickery" in the AutocompleteEdit.  Because the in-memory DB does not have
66// redirect data, results other than the top match might change between the
67// two passes, so we can't just decide to use this pass' matches as the final
68// results.
69//
70// The second autocomplete pass uses the full history database, which must be
71// queried on the history thread.  Start() asks the history service schedule to
72// callback on the history thread with a pointer to the main database.  When we
73// are done doing queries, we schedule a task on the main thread that notifies
74// the AutocompleteController that we're done.
75//
76// The communication between these threads is done using a
77// HistoryURLProviderParams object.  This is allocated in the main thread, and
78// normally deleted in QueryComplete().  So that both autocomplete passes can
79// use the same code, we also use this to hold results during the first
80// autocomplete pass.
81//
82// While the second pass is running, the AutocompleteController may cancel the
83// request.  This can happen frequently when the user is typing quickly.  In
84// this case, the main thread sets params_->cancel, which the background thread
85// checks periodically.  If it finds the flag set, it stops what it's doing
86// immediately and calls back to the main thread.  (We don't delete the params
87// on the history thread, because we should only do that when we can safely
88// NULL out params_, and that must be done on the main thread.)
89
90// Used to communicate autocomplete parameters between threads via the history
91// service.
92struct HistoryURLProviderParams {
93  HistoryURLProviderParams(const AutocompleteInput& input,
94                           bool trim_http,
95                           const std::string& languages,
96                           TemplateURL* default_search_provider,
97                           const SearchTermsData& search_terms_data);
98  ~HistoryURLProviderParams();
99
100  base::MessageLoop* message_loop;
101
102  // A copy of the autocomplete input. We need the copy since this object will
103  // live beyond the original query while it runs on the history thread.
104  AutocompleteInput input;
105
106  // Should inline autocompletion be disabled? This is initalized from
107  // |input.prevent_inline_autocomplete()|, but set to false is the input
108  // contains trailing white space.
109  bool prevent_inline_autocomplete;
110
111  // Set when "http://" should be trimmed from the beginning of the URLs.
112  bool trim_http;
113
114  // Set by the main thread to cancel this request.  If this flag is set when
115  // the query runs, the query will be abandoned.  This allows us to avoid
116  // running queries that are no longer needed.  Since we don't care if we run
117  // the extra queries, the lack of signaling is not a problem.
118  base::CancellationFlag cancel_flag;
119
120  // Set by ExecuteWithDB() on the history thread when the query could not be
121  // performed because the history system failed to properly init the database.
122  // If this is set when the main thread is called back, it avoids changing
123  // |matches_| at all, so it won't delete the default match
124  // RunAutocompletePasses() creates.
125  bool failed;
126
127  // List of matches written by the history thread.  We keep this separate list
128  // to avoid having the main thread read the provider's matches while the
129  // history thread is manipulating them.  The provider copies this list back
130  // to matches_ on the main thread in QueryComplete().
131  ACMatches matches;
132
133  // Languages we should pass to gfx::GetCleanStringFromUrl.
134  std::string languages;
135
136  // When true, we should avoid calling SuggestExactInput().
137  bool dont_suggest_exact_input;
138
139  // The default search provider and search terms data necessary to cull results
140  // that correspond to searches (on the default engine).  These can only be
141  // obtained on the UI thread, so we have to copy them into here to pass them
142  // to the history thread.  We use a scoped_ptr<TemplateURL> for the DSP since
143  // TemplateURLs can't be copied by value. We use a scoped_ptr<SearchTermsData>
144  // so that we can store a snapshot of the SearchTermsData accessible from the
145  // history thread.
146  scoped_ptr<TemplateURL> default_search_provider;
147  scoped_ptr<SearchTermsData> search_terms_data;
148
149 private:
150  DISALLOW_COPY_AND_ASSIGN(HistoryURLProviderParams);
151};
152
153// This class is an autocomplete provider and is also a pseudo-internal
154// component of the history system.  See comments above.
155class HistoryURLProvider : public HistoryProvider {
156 public:
157  // Various values used in scoring, made public so other providers
158  // can insert results in appropriate ranges relative to these.
159  static const int kScoreForBestInlineableResult;
160  static const int kScoreForUnvisitedIntranetResult;
161  static const int kScoreForWhatYouTypedResult;
162  static const int kBaseScoreForNonInlineableResult;
163
164  HistoryURLProvider(AutocompleteProviderListener* listener, Profile* profile);
165
166  // HistoryProvider:
167  virtual void Start(const AutocompleteInput& input,
168                     bool minimal_changes) OVERRIDE;
169  virtual void Stop(bool clear_cached_results) OVERRIDE;
170
171  // Returns a match representing a navigation to |destination_url| given user
172  // input of |text|.  |trim_http| controls whether the match's |fill_into_edit|
173  // and |contents| should have any HTTP scheme stripped off, and should not be
174  // set to true if |text| contains an http prefix.
175  // NOTE: This does not set the relevance of the returned match, as different
176  //       callers want different behavior. Callers must set this manually.
177  AutocompleteMatch SuggestExactInput(const base::string16& text,
178                                      const GURL& destination_url,
179                                      bool trim_http);
180
181  // Runs the history query on the history thread, called by the history
182  // system. The history database MAY BE NULL in which case it is not
183  // available and we should return no data. Also schedules returning the
184  // results to the main thread
185  void ExecuteWithDB(history::HistoryBackend* backend,
186                     history::URLDatabase* db,
187                     HistoryURLProviderParams* params);
188
189  // Actually runs the autocomplete job on the given database, which is
190  // guaranteed not to be NULL.
191  void DoAutocomplete(history::HistoryBackend* backend,
192                      history::URLDatabase* db,
193                      HistoryURLProviderParams* params);
194
195  // Dispatches the results to the autocomplete controller. Called on the
196  // main thread by ExecuteWithDB when the results are available.
197  // Frees params_gets_deleted on exit.
198  void QueryComplete(HistoryURLProviderParams* params_gets_deleted);
199
200 private:
201  FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest, HUPScoringExperiment);
202
203  enum MatchType {
204    NORMAL,
205    WHAT_YOU_TYPED,
206    INLINE_AUTOCOMPLETE,
207    UNVISITED_INTRANET,  // An intranet site that has never been visited.
208  };
209  class VisitClassifier;
210
211  ~HistoryURLProvider();
212
213  // Determines the relevance for a match, given its type.  If
214  // |match_type| is NORMAL, |match_number| is a number [0,
215  // kMaxSuggestions) indicating the relevance of the match (higher ==
216  // more relevant).  For other values of |match_type|, |match_number|
217  // is ignored.  Only called some of the time; for some matches,
218  // relevancy scores are assigned consecutively decreasing (1416,
219  // 1415, 1414, ...).
220  int CalculateRelevance(MatchType match_type, size_t match_number) const;
221
222  // Helper function that actually launches the two autocomplete passes.
223  void RunAutocompletePasses(const AutocompleteInput& input,
224                             bool fixup_input_and_run_pass_1);
225
226  // Given a |match| containing the "what you typed" suggestion created by
227  // SuggestExactInput(), looks up its info in the DB.  If found, fills in the
228  // title from the DB, promotes the match's priority to that of an inline
229  // autocomplete match (maybe it should be slightly better?), and places it on
230  // the front of |matches| (so we pick the right matches to throw away
231  // when culling redirects to/from it).  Returns whether a match was promoted.
232  bool FixupExactSuggestion(history::URLDatabase* db,
233                            const AutocompleteInput& input,
234                            const VisitClassifier& classifier,
235                            AutocompleteMatch* match,
236                            history::HistoryMatches* matches) const;
237
238  // Helper function for FixupExactSuggestion, this returns true if the input
239  // corresponds to some intranet URL where the user has previously visited the
240  // host in question.  In this case the input should be treated as a URL.
241  bool CanFindIntranetURL(history::URLDatabase* db,
242                          const AutocompleteInput& input) const;
243
244  // Determines if |match| is suitable for inline autocomplete.  If so, and if
245  // |params| is non-NULL, promotes the match.  Returns whether |match| is
246  // suitable for inline autocomplete.
247  bool PromoteMatchForInlineAutocomplete(const history::HistoryMatch& match,
248                                         HistoryURLProviderParams* params);
249
250  // Sees if a shorter version of the best match should be created, and if so
251  // places it at the front of |matches|.  This can suggest history URLs that
252  // are prefixes of the best match (if they've been visited enough, compared to
253  // the best match), or create host-only suggestions even when they haven't
254  // been visited before: if the user visited http://example.com/asdf once,
255  // we'll suggest http://example.com/ even if they've never been to it.
256  void PromoteOrCreateShorterSuggestion(
257      history::URLDatabase* db,
258      const HistoryURLProviderParams& params,
259      bool have_what_you_typed_match,
260      const AutocompleteMatch& what_you_typed_match,
261      history::HistoryMatches* matches);
262
263  // Removes results that have been rarely typed or visited, and not any time
264  // recently.  The exact parameters for this heuristic can be found in the
265  // function body. Also culls results corresponding to queries from the default
266  // search engine. These are low-quality, difficult-to-understand matches for
267  // users, and the SearchProvider should surface past queries in a better way
268  // anyway.
269  void CullPoorMatches(const HistoryURLProviderParams& params,
270                       history::HistoryMatches* matches) const;
271
272  // Removes results that redirect to each other, leaving at most |max_results|
273  // results.
274  void CullRedirects(history::HistoryBackend* backend,
275                     history::HistoryMatches* matches,
276                     size_t max_results) const;
277
278  // Helper function for CullRedirects, this removes all but the first
279  // occurance of [any of the set of strings in |remove|] from the |matches|
280  // list.
281  //
282  // The return value is the index of the item that is after the item in the
283  // input identified by |source_index|. If |source_index| or an item before
284  // is removed, the next item will be shifted, and this allows the caller to
285  // pick up on the next one when this happens.
286  size_t RemoveSubsequentMatchesOf(history::HistoryMatches* matches,
287                                   size_t source_index,
288                                   const std::vector<GURL>& remove) const;
289
290  // Converts a line from the database into an autocomplete match for display.
291  // If experimental scoring is enabled, the final relevance score might be
292  // different from the given |relevance|.
293  AutocompleteMatch HistoryMatchToACMatch(
294      const HistoryURLProviderParams& params,
295      const history::HistoryMatch& history_match,
296      MatchType match_type,
297      int relevance);
298
299  // Returns a set of classifications that highlight all the occurrences
300  // of |input_text| at word breaks in |description|.
301  static ACMatchClassifications ClassifyDescription(
302      const base::string16& input_text,
303      const base::string16& description);
304
305  // Returns a new relevance score for the given |match| based on the
306  // |old_relevance| score and |scoring_params_|.  The new relevance score is
307  // guaranteed to be less than or equal to |old_relevance|.  In other words,
308  // this function can only demote a score, never boost it.
309  // Returns |old_relevance| score if experimental scoring is disabled
310  // or if the |match.promoted| is true.
311  int CalculateRelevanceScoreUsingScoringParams(
312      const history::HistoryMatch& match,
313      int old_relevance) const;
314
315  // Params for the current query.  The provider should not free this directly;
316  // instead, it is passed as a parameter through the history backend, and the
317  // parameter itself is freed once it's no longer needed.  The only reason we
318  // keep this member is so we can set the cancel bit on it.
319  HistoryURLProviderParams* params_;
320
321  // Params controlling experimental behavior of this provider.
322  HUPScoringParams scoring_params_;
323
324  // If true, HistoryURL provider should lookup and cull redirects.  If
325  // false, it returns matches that may be redirects to each other and
326  // simply hopes the default AutoCompleteController behavior to remove
327  // URLs that are likely duplicates (http://google.com <->
328  // https://www.google.com/, etc.) will do a good enough job.
329  bool cull_redirects_;
330
331  // Used in PromoteOrCreateShorterSuggestion().  If true, we may create
332  // shorter suggestions even when they haven't been visited before:
333  // if the user visited http://example.com/asdf once, we'll suggest
334  // http://example.com/ even if they've never been to it.
335  bool create_shorter_match_;
336
337  // Whether to query the history URL database to match.  Even if
338  // false, we still use the URL database to decide if the
339  // URL-what-you-typed was visited before or not.  If false, the only
340  // possible result that HistoryURL provider can return is
341  // URL-what-you-typed.  This variable is not part of params_ because
342  // it never changes after the HistoryURLProvider is initialized.
343  // It's used to aid the transition to get all URLs from history to
344  // be scored in the HistoryQuick provider only.
345  bool search_url_database_;
346};
347
348#endif  // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
349