bookmark_provider.cc revision eb525c5499e34cc9c4b825d6d9e75bb07cc06ace
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/bookmark_provider.h"
6
7#include <algorithm>
8#include <functional>
9#include <vector>
10
11#include "base/metrics/histogram.h"
12#include "base/prefs/pref_service.h"
13#include "base/time/time.h"
14#include "chrome/browser/autocomplete/autocomplete_result.h"
15#include "chrome/browser/bookmarks/bookmark_model.h"
16#include "chrome/browser/bookmarks/bookmark_model_factory.h"
17#include "chrome/browser/bookmarks/bookmark_title_match.h"
18#include "chrome/browser/profiles/profile.h"
19#include "chrome/common/pref_names.h"
20#include "net/base/net_util.h"
21
22typedef std::vector<BookmarkTitleMatch> TitleMatches;
23
24// BookmarkProvider ------------------------------------------------------------
25
26BookmarkProvider::BookmarkProvider(
27    AutocompleteProviderListener* listener,
28    Profile* profile)
29    : AutocompleteProvider(listener, profile,
30                           AutocompleteProvider::TYPE_BOOKMARK),
31      bookmark_model_(NULL) {
32  if (profile) {
33    bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
34    languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
35  }
36}
37
38void BookmarkProvider::Start(const AutocompleteInput& input,
39                             bool minimal_changes) {
40  if (minimal_changes)
41    return;
42  matches_.clear();
43
44  // Short-circuit any matching when inline autocompletion is disabled and
45  // we're looking for BEST_MATCH because none of the BookmarkProvider's
46  // matches can score high enough to qualify.
47  if (input.text().empty() ||
48      ((input.type() != AutocompleteInput::UNKNOWN) &&
49       (input.type() != AutocompleteInput::QUERY)) ||
50      ((input.matches_requested() == AutocompleteInput::BEST_MATCH) &&
51       input.prevent_inline_autocomplete()))
52    return;
53
54  base::TimeTicks start_time = base::TimeTicks::Now();
55  DoAutocomplete(input,
56                 input.matches_requested() == AutocompleteInput::BEST_MATCH);
57  UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime",
58                      base::TimeTicks::Now() - start_time);
59}
60
61BookmarkProvider::~BookmarkProvider() {}
62
63void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input,
64                                      bool best_match) {
65  // We may not have a bookmark model for some unit tests.
66  if (!bookmark_model_)
67    return;
68
69  TitleMatches matches;
70  // Retrieve enough bookmarks so that we have a reasonable probability of
71  // suggesting the one that the user desires.
72  const size_t kMaxBookmarkMatches = 50;
73
74  // GetBookmarksWithTitlesMatching returns bookmarks matching the user's
75  // search terms using the following rules:
76  //  - The search text is broken up into search terms. Each term is searched
77  //    for separately.
78  //  - Term matches are always performed against the start of a word. 'def'
79  //    will match against 'define' but not against 'indefinite'.
80  //  - Terms must be at least three characters in length in order to perform
81  //    partial word matches. Any term of lesser length will only be used as an
82  //    exact match. 'def' will match against 'define' but 'de' will not match.
83  //  - A search containing multiple terms will return results with those words
84  //    occuring in any order.
85  //  - Terms enclosed in quotes comprises a phrase that must match exactly.
86  //  - Multiple terms enclosed in quotes will require those exact words in that
87  //    exact order to match.
88  //
89  // Note: GetBookmarksWithTitlesMatching() will never return a match span
90  // greater than the length of the title against which it is being matched,
91  // nor can those spans ever overlap because the match spans are coalesced
92  // for all matched terms.
93  //
94  // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching
95  // for complete details of how title searches are performed against the user's
96  // bookmarks.
97  bookmark_model_->GetBookmarksWithTitlesMatching(input.text(),
98                                                  kMaxBookmarkMatches,
99                                                  &matches);
100  if (matches.empty())
101    return;  // There were no matches.
102  for (TitleMatches::const_iterator i = matches.begin(); i != matches.end();
103       ++i) {
104    // Create and score the AutocompleteMatch. If its score is 0 then the
105    // match is discarded.
106    AutocompleteMatch match(TitleMatchToACMatch(*i));
107    if (match.relevance > 0)
108      matches_.push_back(match);
109  }
110
111  // Sort and clip the resulting matches.
112  size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches;
113  if (matches_.size() > max_matches) {
114    std::partial_sort(matches_.begin(), matches_.end(),
115                      matches_.begin() + max_matches,
116                      AutocompleteMatch::MoreRelevant);
117    matches_.resize(max_matches);
118  } else {
119    std::sort(matches_.begin(), matches_.end(),
120              AutocompleteMatch::MoreRelevant);
121  }
122}
123
124namespace {
125
126// for_each helper functor that calculates a match factor for each query term
127// when calculating the final score.
128//
129// Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's
130// title the term matches, and 2) where the match is positioned within the
131// bookmark's title. A full length match earns a 1.0. A half-length match earns
132// at most a 0.5 and at least a 0.25. A single character match against a title
133// that is 100 characters long where the match is at the first character will
134// earn a 0.01 and at the last character will earn a 0.0001.
135class ScoringFunctor {
136 public:
137  // |title_length| is the length of the bookmark title against which this
138  // match will be scored.
139  explicit ScoringFunctor(size_t title_length)
140      : title_length_(static_cast<double>(title_length)),
141        scoring_factor_(0.0) {
142  }
143
144  void operator()(const Snippet::MatchPosition& match) {
145    double term_length = static_cast<double>(match.second - match.first);
146    scoring_factor_ += term_length / title_length_ *
147        (title_length_ - match.first) / title_length_;
148  }
149
150  double ScoringFactor() { return scoring_factor_; }
151
152 private:
153  double title_length_;
154  double scoring_factor_;
155};
156
157}  // namespace
158
159AutocompleteMatch BookmarkProvider::TitleMatchToACMatch(
160    const BookmarkTitleMatch& title_match) {
161  // The AutocompleteMatch we construct is non-deletable because the only
162  // way to support this would be to delete the underlying bookmark, which is
163  // unlikely to be what the user intends.
164  AutocompleteMatch match(this, 0, false,
165                          AutocompleteMatchType::BOOKMARK_TITLE);
166  const string16& title(title_match.node->GetTitle());
167  DCHECK(!title.empty());
168  const GURL& url(title_match.node->url());
169  match.destination_url = url;
170  match.contents = net::FormatUrl(url, languages_,
171      net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP,
172      net::UnescapeRule::SPACES, NULL, NULL, NULL);
173  match.contents_class.push_back(
174      ACMatchClassification(0, ACMatchClassification::NONE));
175  match.fill_into_edit =
176      AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
177                                                              match.contents);
178  match.description = title;
179  match.description_class =
180      ClassificationsFromMatch(title_match.match_positions,
181                               match.description.size());
182  match.starred = true;
183
184  // Summary on how a relevance score is determined for the match:
185  //
186  // For each term matching within the bookmark's title (as given by the set of
187  // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then
188  // use the sum to figure out a value between the base score and the maximum
189  // score.
190  //
191  // The factor for each term is the product of:
192  //
193  //  1) how much of the bookmark's title has been matched by the term:
194  //       (term length / title length).
195  //
196  //  Example: Given a bookmark title 'abcde fghijklm', with a title length
197  //     of 14, and two different search terms, 'abcde' and 'fghijklm', with
198  //     term lengths of 5 and 8, respectively, 'fghijklm' will score higher
199  //     (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357).
200  //
201  //  2) where the term match occurs within the bookmark's title, giving more
202  //     points for matches that appear earlier in the title:
203  //       ((title length - position of match start) / title_length).
204  //
205  //  Example: Given a bookmark title of 'abcde fghijklm', with a title length
206  //     of 14, and two different search terms, 'abcde' and 'fghij', with
207  //     start positions of 0 and 6, respectively, 'abcde' will score higher
208  //     (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
209  //     a partial factor of (14-6)/14 = 0.571 ).
210  //
211  // Once all term factors have been calculated they are summed. The resulting
212  // sum will never be greater than 1.0 because of the way the bookmark model
213  // matches and removes overlaps. (In particular, the bookmark model only
214  // matches terms to the beginning of words and it removes all overlapping
215  // matches, keeping only the longest. Together these mean that each
216  // character is included in at most one match. This property ensures the
217  // sum of factors is at most 1.) This sum is then multiplied against the
218  // scoring range available, which is 299. The 299 is calculated by
219  // subtracting the minimum possible score, 900, from the maximum possible
220  // score, 1199. This product, ranging from 0 to 299, is added to the minimum
221  // possible score, 900, giving the preliminary score.
222  //
223  // If the preliminary score is less than the maximum possible score, 1199,
224  // it can be boosted up to that maximum possible score if the URL referenced
225  // by the bookmark is also referenced by any of the user's other bookmarks.
226  // A count of how many times the bookmark's URL is referenced is determined
227  // and, for each additional reference beyond the one for the bookmark being
228  // scored up to a maximum of three, the score is boosted by a fixed amount
229  // given by |kURLCountBoost|, below.
230  //
231  ScoringFunctor position_functor =
232      for_each(title_match.match_positions.begin(),
233               title_match.match_positions.end(), ScoringFunctor(title.size()));
234  const int kBaseBookmarkScore = 900;
235  const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1;
236  const double kBookmarkScoreRange =
237      static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
238  // It's not likely that GetBookmarksWithTitlesMatching will return overlapping
239  // matches but let's play it safe.
240  match.relevance = std::min(kMaxBookmarkScore,
241      static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) +
242      kBaseBookmarkScore);
243  // Don't waste any time searching for additional referenced URLs if we
244  // already have a perfect title match.
245  if (match.relevance >= kMaxBookmarkScore)
246    return match;
247  // Boost the score if the bookmark's URL is referenced by other bookmarks.
248  const int kURLCountBoost[4] = { 0, 75, 125, 150 };
249  std::vector<const BookmarkNode*> nodes;
250  bookmark_model_->GetNodesByURL(url, &nodes);
251  DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
252  match.relevance +=
253      kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
254  match.relevance = std::min(kMaxBookmarkScore, match.relevance);
255  return match;
256}
257
258// static
259ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
260    const Snippet::MatchPositions& positions,
261    size_t text_length) {
262  ACMatchClassifications classifications;
263  if (positions.empty()) {
264    classifications.push_back(
265        ACMatchClassification(0, ACMatchClassification::NONE));
266    return classifications;
267  }
268
269  for (Snippet::MatchPositions::const_iterator i = positions.begin();
270       i != positions.end(); ++i) {
271    AutocompleteMatch::ACMatchClassifications new_class;
272    AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
273        text_length, 0, &new_class);
274    classifications = AutocompleteMatch::MergeClassifications(
275        classifications, new_class);
276  }
277  return classifications;
278}
279