history_url_provider.h revision f8ee788a64d60abd8f2d742a5fdedde054ecd910
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
6#define CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
7
8#include <string>
9#include <vector>
10
11#include "base/compiler_specific.h"
12#include "base/synchronization/cancellation_flag.h"
13#include "chrome/browser/autocomplete/autocomplete_input.h"
14#include "chrome/browser/autocomplete/history_provider.h"
15#include "chrome/browser/autocomplete/history_provider_util.h"
16#include "chrome/browser/omnibox/omnibox_field_trial.h"
17#include "chrome/browser/search_engines/template_url.h"
18
19class Profile;
20class SearchTermsData;
21
22namespace base {
23class MessageLoop;
24}
25
26namespace history {
27class HistoryBackend;
28class URLDatabase;
29}
30
31// How history autocomplete works
32// ==============================
33//
34// Read down this diagram for temporal ordering.
35//
36//   Main thread                History thread
37//   -----------                --------------
38//   AutocompleteController::Start
39//     -> HistoryURLProvider::Start
40//       -> SuggestExactInput
41//       [params_ allocated]
42//       -> DoAutocomplete (for inline autocomplete)
43//         -> URLDatabase::AutocompleteForPrefix (on in-memory DB)
44//       -> HistoryService::ScheduleAutocomplete
45//       (return to controller) ----
46//                                 /
47//                            HistoryBackend::ScheduleAutocomplete
48//                              -> HistoryURLProvider::ExecuteWithDB
49//                                -> DoAutocomplete
50//                                  -> URLDatabase::AutocompleteForPrefix
51//                              /
52//   HistoryService::QueryComplete
53//     [params_ destroyed]
54//     -> AutocompleteProviderListener::OnProviderUpdate
55//
56// The autocomplete controller calls us, and must be called back, on the main
57// thread.  When called, we run two autocomplete passes.  The first pass runs
58// synchronously on the main thread and queries the in-memory URL database.
59// This pass promotes matches for inline autocomplete if applicable.  We do
60// this synchronously so that users get consistent behavior when they type
61// quickly and hit enter, no matter how loaded the main history database is.
62// Doing this synchronously also prevents inline autocomplete from being
63// "flickery" in the AutocompleteEdit.  Because the in-memory DB does not have
64// redirect data, results other than the top match might change between the
65// two passes, so we can't just decide to use this pass' matches as the final
66// results.
67//
68// The second autocomplete pass uses the full history database, which must be
69// queried on the history thread.  Start() asks the history service schedule to
70// callback on the history thread with a pointer to the main database.  When we
71// are done doing queries, we schedule a task on the main thread that notifies
72// the AutocompleteController that we're done.
73//
74// The communication between these threads is done using a
75// HistoryURLProviderParams object.  This is allocated in the main thread, and
76// normally deleted in QueryComplete().  So that both autocomplete passes can
77// use the same code, we also use this to hold results during the first
78// autocomplete pass.
79//
80// While the second pass is running, the AutocompleteController may cancel the
81// request.  This can happen frequently when the user is typing quickly.  In
82// this case, the main thread sets params_->cancel, which the background thread
83// checks periodically.  If it finds the flag set, it stops what it's doing
84// immediately and calls back to the main thread.  (We don't delete the params
85// on the history thread, because we should only do that when we can safely
86// NULL out params_, and that must be done on the main thread.)
87
88// Used to communicate autocomplete parameters between threads via the history
89// service.
90struct HistoryURLProviderParams {
91  HistoryURLProviderParams(const AutocompleteInput& input,
92                           bool trim_http,
93                           const AutocompleteMatch& what_you_typed_match,
94                           const std::string& languages,
95                           TemplateURL* default_search_provider,
96                           const SearchTermsData& search_terms_data);
97  ~HistoryURLProviderParams();
98
99  base::MessageLoop* message_loop;
100
101  // A copy of the autocomplete input. We need the copy since this object will
102  // live beyond the original query while it runs on the history thread.
103  AutocompleteInput input;
104
105  // Should inline autocompletion be disabled? This is initalized from
106  // |input.prevent_inline_autocomplete()|, but set to false is the input
107  // contains trailing white space.
108  bool prevent_inline_autocomplete;
109
110  // Set when "http://" should be trimmed from the beginning of the URLs.
111  bool trim_http;
112
113  // A match corresponding to what the user typed.
114  AutocompleteMatch what_you_typed_match;
115
116  // Set by the main thread to cancel this request.  If this flag is set when
117  // the query runs, the query will be abandoned.  This allows us to avoid
118  // running queries that are no longer needed.  Since we don't care if we run
119  // the extra queries, the lack of signaling is not a problem.
120  base::CancellationFlag cancel_flag;
121
122  // Set by ExecuteWithDB() on the history thread when the query could not be
123  // performed because the history system failed to properly init the database.
124  // If this is set when the main thread is called back, it avoids changing
125  // |matches_| at all, so it won't delete the default match Start() creates.
126  bool failed;
127
128  // List of matches written by the history thread.  We keep this separate list
129  // to avoid having the main thread read the provider's matches while the
130  // history thread is manipulating them.  The provider copies this list back
131  // to matches_ on the main thread in QueryComplete().
132  ACMatches matches;
133
134  // Languages we should pass to gfx::GetCleanStringFromUrl.
135  std::string languages;
136
137  // When true, we should avoid calling SuggestExactInput().
138  bool dont_suggest_exact_input;
139
140  // The default search provider and search terms data necessary to cull results
141  // that correspond to searches (on the default engine).  These can only be
142  // obtained on the UI thread, so we have to copy them into here to pass them
143  // to the history thread.  We use a scoped_ptr<TemplateURL> for the DSP since
144  // TemplateURLs can't be copied by value. We use a scoped_ptr<SearchTermsData>
145  // so that we can store a snapshot of the SearchTermsData accessible from the
146  // history thread.
147  scoped_ptr<TemplateURL> default_search_provider;
148  scoped_ptr<SearchTermsData> search_terms_data;
149
150 private:
151  DISALLOW_COPY_AND_ASSIGN(HistoryURLProviderParams);
152};
153
154// This class is an autocomplete provider and is also a pseudo-internal
155// component of the history system.  See comments above.
156class HistoryURLProvider : public HistoryProvider {
157 public:
158  // Various values used in scoring, made public so other providers
159  // can insert results in appropriate ranges relative to these.
160  static const int kScoreForBestInlineableResult;
161  static const int kScoreForUnvisitedIntranetResult;
162  static const int kScoreForWhatYouTypedResult;
163  static const int kBaseScoreForNonInlineableResult;
164
165  HistoryURLProvider(AutocompleteProviderListener* listener, Profile* profile);
166
167  // HistoryProvider:
168  virtual void Start(const AutocompleteInput& input,
169                     bool minimal_changes) OVERRIDE;
170  virtual void Stop(bool clear_cached_results) OVERRIDE;
171
172  // Returns a match representing a navigation to |destination_url| given user
173  // input of |text|.  |trim_http| controls whether the match's |fill_into_edit|
174  // and |contents| should have any HTTP scheme stripped off, and should not be
175  // set to true if |text| contains an http prefix.
176  // NOTE: This does not set the relevance of the returned match, as different
177  //       callers want different behavior. Callers must set this manually.
178  AutocompleteMatch SuggestExactInput(const base::string16& text,
179                                      const GURL& destination_url,
180                                      bool trim_http);
181
182  // Runs the history query on the history thread, called by the history
183  // system. The history database MAY BE NULL in which case it is not
184  // available and we should return no data. Also schedules returning the
185  // results to the main thread
186  void ExecuteWithDB(history::HistoryBackend* backend,
187                     history::URLDatabase* db,
188                     HistoryURLProviderParams* params);
189
190 private:
191  FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest, HUPScoringExperiment);
192
193  enum MatchType {
194    NORMAL,
195    WHAT_YOU_TYPED,
196    INLINE_AUTOCOMPLETE,
197    UNVISITED_INTRANET,  // An intranet site that has never been visited.
198  };
199  class VisitClassifier;
200
201  ~HistoryURLProvider();
202
203  // Determines the relevance for a match, given its type.  If |match_type| is
204  // NORMAL, |match_number| is a number indicating the relevance of the match
205  // (higher == more relevant).  For other values of |match_type|,
206  // |match_number| is ignored.  Only called some of the time; for some matches,
207  // relevancy scores are assigned consecutively decreasing (1416, 1415, ...).
208  static int CalculateRelevance(MatchType match_type, int match_number);
209
210  // Returns a set of classifications that highlight all the occurrences of
211  // |input_text| at word breaks in |description|.
212  static ACMatchClassifications ClassifyDescription(
213      const base::string16& input_text,
214      const base::string16& description);
215
216  // Actually runs the autocomplete job on the given database, which is
217  // guaranteed not to be NULL.  Used by both autocomplete passes, and therefore
218  // called on multiple different threads (though not simultaneously).
219  void DoAutocomplete(history::HistoryBackend* backend,
220                      history::URLDatabase* db,
221                      HistoryURLProviderParams* params);
222
223  // Dispatches the results to the autocomplete controller. Called on the
224  // main thread by ExecuteWithDB when the results are available.
225  // Frees params_gets_deleted on exit.
226  void QueryComplete(HistoryURLProviderParams* params_gets_deleted);
227
228  // Looks up the info for params->what_you_typed_match in the DB.  If found,
229  // fills in the title, promotes the match's priority to that of an inline
230  // autocomplete match (maybe it should be slightly better?), and places it on
231  // the front of |matches| (so we pick the right matches to throw away when
232  // culling redirects to/from it).  Returns whether a match was promoted.
233  bool FixupExactSuggestion(history::URLDatabase* db,
234                            const VisitClassifier& classifier,
235                            HistoryURLProviderParams* params,
236                            history::HistoryMatches* matches) const;
237
238  // Helper function for FixupExactSuggestion, this returns true if the input
239  // corresponds to some intranet URL where the user has previously visited the
240  // host in question.  In this case the input should be treated as a URL.
241  bool CanFindIntranetURL(history::URLDatabase* db,
242                          const AutocompleteInput& input) const;
243
244  // Determines if |match| is suitable for inline autocomplete.  If so, promotes
245  // the match.  Returns whether |match| was promoted.
246  bool PromoteMatchForInlineAutocomplete(const history::HistoryMatch& match,
247                                         HistoryURLProviderParams* params);
248
249  // Sees if a shorter version of the best match should be created, and if so
250  // places it at the front of |matches|.  This can suggest history URLs that
251  // are prefixes of the best match (if they've been visited enough, compared to
252  // the best match), or create host-only suggestions even when they haven't
253  // been visited before: if the user visited http://example.com/asdf once,
254  // we'll suggest http://example.com/ even if they've never been to it.
255  void PromoteOrCreateShorterSuggestion(
256      history::URLDatabase* db,
257      const HistoryURLProviderParams& params,
258      bool have_what_you_typed_match,
259      history::HistoryMatches* matches);
260
261  // Removes results that have been rarely typed or visited, and not any time
262  // recently.  The exact parameters for this heuristic can be found in the
263  // function body. Also culls results corresponding to queries from the default
264  // search engine. These are low-quality, difficult-to-understand matches for
265  // users, and the SearchProvider should surface past queries in a better way
266  // anyway.
267  void CullPoorMatches(const HistoryURLProviderParams& params,
268                       history::HistoryMatches* matches) const;
269
270  // Removes results that redirect to each other, leaving at most |max_results|
271  // results.
272  void CullRedirects(history::HistoryBackend* backend,
273                     history::HistoryMatches* matches,
274                     size_t max_results) const;
275
276  // Helper function for CullRedirects, this removes all but the first
277  // occurance of [any of the set of strings in |remove|] from the |matches|
278  // list.
279  //
280  // The return value is the index of the item that is after the item in the
281  // input identified by |source_index|. If |source_index| or an item before
282  // is removed, the next item will be shifted, and this allows the caller to
283  // pick up on the next one when this happens.
284  size_t RemoveSubsequentMatchesOf(history::HistoryMatches* matches,
285                                   size_t source_index,
286                                   const std::vector<GURL>& remove) const;
287
288  // Converts a line from the database into an autocomplete match for display.
289  // If experimental scoring is enabled, the final relevance score might be
290  // different from the given |relevance|.
291  AutocompleteMatch HistoryMatchToACMatch(
292      const HistoryURLProviderParams& params,
293      const history::HistoryMatch& history_match,
294      MatchType match_type,
295      int relevance);
296
297  // Params for the current query.  The provider should not free this directly;
298  // instead, it is passed as a parameter through the history backend, and the
299  // parameter itself is freed once it's no longer needed.  The only reason we
300  // keep this member is so we can set the cancel bit on it.
301  HistoryURLProviderParams* params_;
302
303  // Params controlling experimental behavior of this provider.
304  HUPScoringParams scoring_params_;
305
306  // If true, HistoryURL provider should lookup and cull redirects.  If
307  // false, it returns matches that may be redirects to each other and
308  // simply hopes the default AutoCompleteController behavior to remove
309  // URLs that are likely duplicates (http://google.com <->
310  // https://www.google.com/, etc.) will do a good enough job.
311  bool cull_redirects_;
312
313  // Used in PromoteOrCreateShorterSuggestion().  If true, we may create
314  // shorter suggestions even when they haven't been visited before:
315  // if the user visited http://example.com/asdf once, we'll suggest
316  // http://example.com/ even if they've never been to it.
317  bool create_shorter_match_;
318
319  DISALLOW_COPY_AND_ASSIGN(HistoryURLProvider);
320};
321
322#endif  // CHROME_BROWSER_AUTOCOMPLETE_HISTORY_URL_PROVIDER_H_
323