history_url_provider.h revision 7d4cd473f85ac64c3747c96c277f9e506a0d2246
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
6#define CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
7
8#include <string>
9#include <vector>
10
11#include "base/compiler_specific.h"
12#include "base/synchronization/cancellation_flag.h"
13#include "chrome/browser/autocomplete/autocomplete_input.h"
14#include "chrome/browser/autocomplete/history_provider.h"
15#include "chrome/browser/autocomplete/history_provider_util.h"
16#include "chrome/browser/autocomplete/url_prefix.h"
17#include "chrome/browser/search_engines/search_terms_data.h"
18#include "chrome/browser/search_engines/template_url.h"
19
20class Profile;
21
22namespace base {
23class MessageLoop;
24}
25
26namespace history {
27class HistoryBackend;
28class URLDatabase;
29}
30
31// How history autocomplete works
32// ==============================
33//
34// Read down this diagram for temporal ordering.
35//
36//   Main thread                History thread
37//   -----------                --------------
38//   AutocompleteController::Start
39//     -> HistoryURLProvider::Start
40//       -> RunAutocompletePasses
41//         -> SuggestExactInput
42//         [params_ allocated]
43//         -> DoAutocomplete (for inline autocomplete)
44//           -> URLDatabase::AutocompleteForPrefix (on in-memory DB)
45//         -> HistoryService::ScheduleAutocomplete
46//         (return to controller) ----
47//                                   /
48//                              HistoryBackend::ScheduleAutocomplete
49//                                -> HistoryURLProvider::ExecuteWithDB
50//                                  -> DoAutocomplete
51//                                    -> URLDatabase::AutocompleteForPrefix
52//                                /
53//   HistoryService::QueryComplete
54//     [params_ destroyed]
55//     -> AutocompleteProviderListener::OnProviderUpdate
56//
57// The autocomplete controller calls us, and must be called back, on the main
58// thread.  When called, we run two autocomplete passes.  The first pass runs
59// synchronously on the main thread and queries the in-memory URL database.
60// This pass promotes matches for inline autocomplete if applicable.  We do
61// this synchronously so that users get consistent behavior when they type
62// quickly and hit enter, no matter how loaded the main history database is.
63// Doing this synchronously also prevents inline autocomplete from being
64// "flickery" in the AutocompleteEdit.  Because the in-memory DB does not have
65// redirect data, results other than the top match might change between the
66// two passes, so we can't just decide to use this pass' matches as the final
67// results.
68//
69// The second autocomplete pass uses the full history database, which must be
70// queried on the history thread.  Start() asks the history service schedule to
71// callback on the history thread with a pointer to the main database.  When we
72// are done doing queries, we schedule a task on the main thread that notifies
73// the AutocompleteController that we're done.
74//
75// The communication between these threads is done using a
76// HistoryURLProviderParams object.  This is allocated in the main thread, and
77// normally deleted in QueryComplete().  So that both autocomplete passes can
78// use the same code, we also use this to hold results during the first
79// autocomplete pass.
80//
81// While the second pass is running, the AutocompleteController may cancel the
82// request.  This can happen frequently when the user is typing quickly.  In
83// this case, the main thread sets params_->cancel, which the background thread
84// checks periodically.  If it finds the flag set, it stops what it's doing
85// immediately and calls back to the main thread.  (We don't delete the params
86// on the history thread, because we should only do that when we can safely
87// NULL out params_, and that must be done on the main thread.)
88
89// Used to communicate autocomplete parameters between threads via the history
90// service.
91struct HistoryURLProviderParams {
92  HistoryURLProviderParams(const AutocompleteInput& input,
93                           bool trim_http,
94                           const std::string& languages,
95                           TemplateURL* default_search_provider,
96                           const SearchTermsData& search_terms_data);
97  ~HistoryURLProviderParams();
98
99  base::MessageLoop* message_loop;
100
101  // A copy of the autocomplete input. We need the copy since this object will
102  // live beyond the original query while it runs on the history thread.
103  AutocompleteInput input;
104
105  // Should inline autocompletion be disabled? This is initalized from
106  // |input.prevent_inline_autocomplete()|, but set to false is the input
107  // contains trailing white space.
108  bool prevent_inline_autocomplete;
109
110  // Set when "http://" should be trimmed from the beginning of the URLs.
111  bool trim_http;
112
113  // Set by the main thread to cancel this request.  If this flag is set when
114  // the query runs, the query will be abandoned.  This allows us to avoid
115  // running queries that are no longer needed.  Since we don't care if we run
116  // the extra queries, the lack of signaling is not a problem.
117  base::CancellationFlag cancel_flag;
118
119  // Set by ExecuteWithDB() on the history thread when the query could not be
120  // performed because the history system failed to properly init the database.
121  // If this is set when the main thread is called back, it avoids changing
122  // |matches_| at all, so it won't delete the default match
123  // RunAutocompletePasses() creates.
124  bool failed;
125
126  // List of matches written by the history thread.  We keep this separate list
127  // to avoid having the main thread read the provider's matches while the
128  // history thread is manipulating them.  The provider copies this list back
129  // to matches_ on the main thread in QueryComplete().
130  ACMatches matches;
131
132  // Languages we should pass to gfx::GetCleanStringFromUrl.
133  std::string languages;
134
135  // When true, we should avoid calling SuggestExactInput().
136  bool dont_suggest_exact_input;
137
138  // The default search provider and search terms data necessary to cull results
139  // that correspond to searches (on the default engine).  These can only be
140  // obtained on the UI thread, so we have to copy them into here to pass them
141  // to the history thread.  We use a scoped_ptr<TemplateURL> for the DSP since
142  // TemplateURLs can't be copied by value. We use a scoped_ptr<SearchTermsData>
143  // so that we can store a snapshot of the SearchTermsData accessible from the
144  // history thread.
145  scoped_ptr<TemplateURL> default_search_provider;
146  scoped_ptr<SearchTermsData> search_terms_data;
147
148 private:
149  DISALLOW_COPY_AND_ASSIGN(HistoryURLProviderParams);
150};
151
152// This class is an autocomplete provider and is also a pseudo-internal
153// component of the history system.  See comments above.
154class HistoryURLProvider : public HistoryProvider {
155 public:
156  // Various values used in scoring, made public so other providers
157  // can insert results in appropriate ranges relative to these.
158  static const int kScoreForBestInlineableResult;
159  static const int kScoreForUnvisitedIntranetResult;
160  static const int kScoreForWhatYouTypedResult;
161  static const int kBaseScoreForNonInlineableResult;
162
163  HistoryURLProvider(AutocompleteProviderListener* listener, Profile* profile);
164
165#ifdef UNIT_TEST
166  HistoryURLProvider(AutocompleteProviderListener* listener,
167                     Profile* profile,
168                     const std::string& languages)
169    : HistoryProvider(listener, profile,
170          AutocompleteProvider::TYPE_HISTORY_URL),
171      params_(NULL),
172      cull_redirects_(true),
173      create_shorter_match_(true),
174      search_url_database_(true),
175      languages_(languages) {}
176#endif
177
178  // Returns a match corresponding to exactly what the user has typed.
179  // |trim_http| should not be set to true if |input| contains an http
180  // prefix.
181  // NOTE: This does not set the relevance of the returned match, as different
182  //       callers want different behavior. Callers must set this manually.
183  // This function is static so SearchProvider may construct similar matches.
184  static AutocompleteMatch SuggestExactInput(AutocompleteProvider* provider,
185                                             const AutocompleteInput& input,
186                                             bool trim_http);
187
188  // AutocompleteProvider
189  virtual void Start(const AutocompleteInput& input,
190                     bool minimal_changes) OVERRIDE;
191  virtual void Stop(bool clear_cached_results) OVERRIDE;
192
193  // Runs the history query on the history thread, called by the history
194  // system. The history database MAY BE NULL in which case it is not
195  // available and we should return no data. Also schedules returning the
196  // results to the main thread
197  void ExecuteWithDB(history::HistoryBackend* backend,
198                     history::URLDatabase* db,
199                     HistoryURLProviderParams* params);
200
201  // Actually runs the autocomplete job on the given database, which is
202  // guaranteed not to be NULL.
203  void DoAutocomplete(history::HistoryBackend* backend,
204                      history::URLDatabase* db,
205                      HistoryURLProviderParams* params);
206
207  // Dispatches the results to the autocomplete controller. Called on the
208  // main thread by ExecuteWithDB when the results are available.
209  // Frees params_gets_deleted on exit.
210  void QueryComplete(HistoryURLProviderParams* params_gets_deleted);
211
212 private:
213  enum MatchType {
214    NORMAL,
215    WHAT_YOU_TYPED,
216    INLINE_AUTOCOMPLETE,
217    UNVISITED_INTRANET,  // An intranet site that has never been visited.
218  };
219  class VisitClassifier;
220
221  ~HistoryURLProvider();
222
223  // Determines the relevance for a match, given its type.  If
224  // |match_type| is NORMAL, |match_number| is a number [0,
225  // kMaxSuggestions) indicating the relevance of the match (higher ==
226  // more relevant).  For other values of |match_type|, |match_number|
227  // is ignored.  Only called some of the time; for some matches,
228  // relevancy scores are assigned consecutively decreasing (1416,
229  // 1415, 1414, ...).
230  int CalculateRelevance(MatchType match_type, size_t match_number) const;
231
232  // Helper function that actually launches the two autocomplete passes.
233  void RunAutocompletePasses(const AutocompleteInput& input,
234                             bool fixup_input_and_run_pass_1);
235
236  // Given a |match| containing the "what you typed" suggestion created by
237  // SuggestExactInput(), looks up its info in the DB.  If found, fills in the
238  // title from the DB, promotes the match's priority to that of an inline
239  // autocomplete match (maybe it should be slightly better?), and places it on
240  // the front of |matches| (so we pick the right matches to throw away
241  // when culling redirects to/from it).  Returns whether a match was promoted.
242  bool FixupExactSuggestion(history::URLDatabase* db,
243                            const AutocompleteInput& input,
244                            const VisitClassifier& classifier,
245                            AutocompleteMatch* match,
246                            history::HistoryMatches* matches) const;
247
248  // Helper function for FixupExactSuggestion, this returns true if the input
249  // corresponds to some intranet URL where the user has previously visited the
250  // host in question.  In this case the input should be treated as a URL.
251  bool CanFindIntranetURL(history::URLDatabase* db,
252                          const AutocompleteInput& input) const;
253
254  // Determines if |match| is suitable for inline autocomplete.  If so, and if
255  // |params| is non-NULL, promotes the match.  Returns whether |match| is
256  // suitable for inline autocomplete.
257  bool PromoteMatchForInlineAutocomplete(HistoryURLProviderParams* params,
258                                         const history::HistoryMatch& match);
259
260  // Sees if a shorter version of the best match should be created, and if so
261  // places it at the front of |matches|.  This can suggest history URLs that
262  // are prefixes of the best match (if they've been visited enough, compared to
263  // the best match), or create host-only suggestions even when they haven't
264  // been visited before: if the user visited http://example.com/asdf once,
265  // we'll suggest http://example.com/ even if they've never been to it.
266  void PromoteOrCreateShorterSuggestion(
267      history::URLDatabase* db,
268      const HistoryURLProviderParams& params,
269      bool have_what_you_typed_match,
270      const AutocompleteMatch& what_you_typed_match,
271      history::HistoryMatches* matches);
272
273  // Sorts the given list of matches.
274  void SortMatches(history::HistoryMatches* matches) const;
275
276  // Removes results that have been rarely typed or visited, and not any time
277  // recently.  The exact parameters for this heuristic can be found in the
278  // function body. Also culls results corresponding to queries from the default
279  // search engine. These are low-quality, difficult-to-understand matches for
280  // users, and the SearchProvider should surface past queries in a better way
281  // anyway.
282  void CullPoorMatches(history::HistoryMatches* matches,
283                       HistoryURLProviderParams* params) const;
284
285  // Removes results that redirect to each other, leaving at most |max_results|
286  // results.
287  void CullRedirects(history::HistoryBackend* backend,
288                     history::HistoryMatches* matches,
289                     size_t max_results) const;
290
291  // Helper function for CullRedirects, this removes all but the first
292  // occurance of [any of the set of strings in |remove|] from the |matches|
293  // list.
294  //
295  // The return value is the index of the item that is after the item in the
296  // input identified by |source_index|. If |source_index| or an item before
297  // is removed, the next item will be shifted, and this allows the caller to
298  // pick up on the next one when this happens.
299  size_t RemoveSubsequentMatchesOf(history::HistoryMatches* matches,
300                                   size_t source_index,
301                                   const std::vector<GURL>& remove) const;
302
303  // Converts a line from the database into an autocomplete match for display.
304  AutocompleteMatch HistoryMatchToACMatch(
305      HistoryURLProviderParams* params,
306      const history::HistoryMatch& history_match,
307      MatchType match_type,
308      int relevance);
309
310  // Params for the current query.  The provider should not free this directly;
311  // instead, it is passed as a parameter through the history backend, and the
312  // parameter itself is freed once it's no longer needed.  The only reason we
313  // keep this member is so we can set the cancel bit on it.
314  HistoryURLProviderParams* params_;
315
316  // If true, HistoryURL provider should lookup and cull redirects.  If
317  // false, it returns matches that may be redirects to each other and
318  // simply hopes the default AutoCompleteController behavior to remove
319  // URLs that are likely duplicates (http://google.com <->
320  // https://www.google.com/, etc.) will do a good enough job.
321  bool cull_redirects_;
322
323  // Used in PromoteOrCreateShorterSuggestion().  If true, we may create
324  // shorter suggestions even when they haven't been visited before:
325  // if the user visited http://example.com/asdf once, we'll suggest
326  // http://example.com/ even if they've never been to it.
327  bool create_shorter_match_;
328
329  // Whether to query the history URL database to match.  Even if
330  // false, we still use the URL database to decide if the
331  // URL-what-you-typed was visited before or not.  If false, the only
332  // possible result that HistoryURL provider can return is
333  // URL-what-you-typed.  This variable is not part of params_ because
334  // it never changes after the HistoryURLProvider is initialized.
335  // It's used to aid the transition to get all URLs from history to
336  // be scored in the HistoryQuick provider only.
337  bool search_url_database_;
338
339  // Only used by unittests; if non-empty, overrides accept-languages in the
340  // profile's pref system.
341  std::string languages_;
342};
343
344#endif  // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
345