bookmark_provider.cc revision a02191e04bc25c4935f804f2c080ae28663d096d
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/bookmark_provider.h"
6
7#include <algorithm>
8#include <functional>
9#include <vector>
10
11#include "base/prefs/pref_service.h"
12#include "base/strings/utf_string_conversions.h"
13#include "chrome/browser/autocomplete/autocomplete_result.h"
14#include "chrome/browser/autocomplete/history_provider.h"
15#include "chrome/browser/autocomplete/url_prefix.h"
16#include "chrome/browser/bookmarks/bookmark_model.h"
17#include "chrome/browser/bookmarks/bookmark_model_factory.h"
18#include "chrome/browser/bookmarks/bookmark_title_match.h"
19#include "chrome/browser/profiles/profile.h"
20#include "chrome/common/pref_names.h"
21#include "net/base/net_util.h"
22
23typedef std::vector<BookmarkTitleMatch> TitleMatches;
24
25// BookmarkProvider ------------------------------------------------------------
26
27BookmarkProvider::BookmarkProvider(
28    AutocompleteProviderListener* listener,
29    Profile* profile)
30    : AutocompleteProvider(listener, profile,
31                           AutocompleteProvider::TYPE_BOOKMARK),
32      bookmark_model_(NULL) {
33  if (profile) {
34    bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
35    languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
36  }
37}
38
39void BookmarkProvider::Start(const AutocompleteInput& input,
40                             bool minimal_changes) {
41  if (minimal_changes)
42    return;
43  matches_.clear();
44
45  if (input.text().empty() ||
46      ((input.type() != AutocompleteInput::UNKNOWN) &&
47       (input.type() != AutocompleteInput::QUERY)))
48    return;
49
50  DoAutocomplete(input,
51                 input.matches_requested() == AutocompleteInput::BEST_MATCH);
52}
53
54BookmarkProvider::~BookmarkProvider() {}
55
56void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input,
57                                      bool best_match) {
58  // We may not have a bookmark model for some unit tests.
59  if (!bookmark_model_)
60    return;
61
62  TitleMatches matches;
63  // Retrieve enough bookmarks so that we have a reasonable probability of
64  // suggesting the one that the user desires.
65  const size_t kMaxBookmarkMatches = 50;
66
67  // GetBookmarksWithTitlesMatching returns bookmarks matching the user's
68  // search terms using the following rules:
69  //  - The search text is broken up into search terms. Each term is searched
70  //    for separately.
71  //  - Term matches are always performed against the start of a word. 'def'
72  //    will match against 'define' but not against 'indefinite'.
73  //  - Terms must be at least three characters in length in order to perform
74  //    partial word matches. Any term of lesser length will only be used as an
75  //    exact match. 'def' will match against 'define' but 'de' will not match.
76  //  - A search containing multiple terms will return results with those words
77  //    occuring in any order.
78  //  - Terms enclosed in quotes comprises a phrase that must match exactly.
79  //  - Multiple terms enclosed in quotes will require those exact words in that
80  //    exact order to match.
81  //
82  // Note: GetBookmarksWithTitlesMatching() will never return a match span
83  // greater than the length of the title against which it is being matched,
84  // nor can those spans ever overlap because the match spans are coalesced
85  // for all matched terms.
86  //
87  // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching
88  // for complete details of how title searches are performed against the user's
89  // bookmarks.
90  bookmark_model_->GetBookmarksWithTitlesMatching(input.text(),
91                                                  kMaxBookmarkMatches,
92                                                  &matches);
93  if (matches.empty())
94    return;  // There were no matches.
95  AutocompleteInput fixed_up_input(input);
96  FixupUserInput(&fixed_up_input);
97  for (TitleMatches::const_iterator i = matches.begin(); i != matches.end();
98       ++i) {
99    // Create and score the AutocompleteMatch. If its score is 0 then the
100    // match is discarded.
101    AutocompleteMatch match(TitleMatchToACMatch(input, fixed_up_input, *i));
102    if (match.relevance > 0)
103      matches_.push_back(match);
104  }
105
106  // Sort and clip the resulting matches.
107  size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches;
108  if (matches_.size() > max_matches) {
109    std::partial_sort(matches_.begin(),
110                      matches_.begin() + max_matches,
111                      matches_.end(),
112                      AutocompleteMatch::MoreRelevant);
113    matches_.resize(max_matches);
114  } else {
115    std::sort(matches_.begin(), matches_.end(),
116              AutocompleteMatch::MoreRelevant);
117  }
118}
119
120namespace {
121
122// for_each helper functor that calculates a match factor for each query term
123// when calculating the final score.
124//
125// Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's
126// title the term matches, and 2) where the match is positioned within the
127// bookmark's title. A full length match earns a 1.0. A half-length match earns
128// at most a 0.5 and at least a 0.25. A single character match against a title
129// that is 100 characters long where the match is at the first character will
130// earn a 0.01 and at the last character will earn a 0.0001.
131class ScoringFunctor {
132 public:
133  // |title_length| is the length of the bookmark title against which this
134  // match will be scored.
135  explicit ScoringFunctor(size_t title_length)
136      : title_length_(static_cast<double>(title_length)),
137        scoring_factor_(0.0) {
138  }
139
140  void operator()(const Snippet::MatchPosition& match) {
141    double term_length = static_cast<double>(match.second - match.first);
142    scoring_factor_ += term_length / title_length_ *
143        (title_length_ - match.first) / title_length_;
144  }
145
146  double ScoringFactor() { return scoring_factor_; }
147
148 private:
149  double title_length_;
150  double scoring_factor_;
151};
152
153}  // namespace
154
155AutocompleteMatch BookmarkProvider::TitleMatchToACMatch(
156    const AutocompleteInput& input,
157    const AutocompleteInput& fixed_up_input,
158    const BookmarkTitleMatch& title_match) {
159  // The AutocompleteMatch we construct is non-deletable because the only
160  // way to support this would be to delete the underlying bookmark, which is
161  // unlikely to be what the user intends.
162  AutocompleteMatch match(this, 0, false,
163                          AutocompleteMatchType::BOOKMARK_TITLE);
164  const base::string16& title(title_match.node->GetTitle());
165  DCHECK(!title.empty());
166
167  const GURL& url(title_match.node->url());
168  const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec());
169  size_t match_start, inline_autocomplete_offset;
170  URLPrefix::ComputeMatchStartAndInlineAutocompleteOffset(
171      input, fixed_up_input, false, url_utf16, &match_start,
172      &inline_autocomplete_offset);
173  match.destination_url = url;
174  const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) &&
175      ((match_start == base::string16::npos) || (match_start != 0));
176  match.contents = net::FormatUrl(url, languages_,
177      net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
178      net::UnescapeRule::SPACES, NULL, NULL, &inline_autocomplete_offset);
179  match.contents_class.push_back(
180      ACMatchClassification(0, ACMatchClassification::URL));
181  match.fill_into_edit =
182      AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
183                                                              match.contents);
184  if (inline_autocomplete_offset != base::string16::npos) {
185    // |inline_autocomplete_offset| may be beyond the end of the
186    // |fill_into_edit| if the user has typed an URL with a scheme and the
187    // last character typed is a slash.  That slash is removed by the
188    // FormatURLWithOffsets call above.
189    if (inline_autocomplete_offset < match.fill_into_edit.length()) {
190      match.inline_autocompletion =
191          match.fill_into_edit.substr(inline_autocomplete_offset);
192    }
193    match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
194        !HistoryProvider::PreventInlineAutocomplete(input);
195  }
196  match.description = title;
197  match.description_class =
198      ClassificationsFromMatch(title_match.match_positions,
199                               match.description.size());
200  match.starred = true;
201
202  // Summary on how a relevance score is determined for the match:
203  //
204  // For each term matching within the bookmark's title (as given by the set of
205  // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then
206  // use the sum to figure out a value between the base score and the maximum
207  // score.
208  //
209  // The factor for each term is the product of:
210  //
211  //  1) how much of the bookmark's title has been matched by the term:
212  //       (term length / title length).
213  //
214  //  Example: Given a bookmark title 'abcde fghijklm', with a title length
215  //     of 14, and two different search terms, 'abcde' and 'fghijklm', with
216  //     term lengths of 5 and 8, respectively, 'fghijklm' will score higher
217  //     (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357).
218  //
219  //  2) where the term match occurs within the bookmark's title, giving more
220  //     points for matches that appear earlier in the title:
221  //       ((title length - position of match start) / title_length).
222  //
223  //  Example: Given a bookmark title of 'abcde fghijklm', with a title length
224  //     of 14, and two different search terms, 'abcde' and 'fghij', with
225  //     start positions of 0 and 6, respectively, 'abcde' will score higher
226  //     (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
227  //     a partial factor of (14-6)/14 = 0.571 ).
228  //
229  // Once all term factors have been calculated they are summed. The resulting
230  // sum will never be greater than 1.0 because of the way the bookmark model
231  // matches and removes overlaps. (In particular, the bookmark model only
232  // matches terms to the beginning of words and it removes all overlapping
233  // matches, keeping only the longest. Together these mean that each
234  // character is included in at most one match. This property ensures the
235  // sum of factors is at most 1.) This sum is then multiplied against the
236  // scoring range available, which is 299. The 299 is calculated by
237  // subtracting the minimum possible score, 900, from the maximum possible
238  // score, 1199. This product, ranging from 0 to 299, is added to the minimum
239  // possible score, 900, giving the preliminary score.
240  //
241  // If the preliminary score is less than the maximum possible score, 1199,
242  // it can be boosted up to that maximum possible score if the URL referenced
243  // by the bookmark is also referenced by any of the user's other bookmarks.
244  // A count of how many times the bookmark's URL is referenced is determined
245  // and, for each additional reference beyond the one for the bookmark being
246  // scored up to a maximum of three, the score is boosted by a fixed amount
247  // given by |kURLCountBoost|, below.
248  //
249  ScoringFunctor position_functor =
250      for_each(title_match.match_positions.begin(),
251               title_match.match_positions.end(), ScoringFunctor(title.size()));
252  const int kBaseBookmarkScore = 900;
253  const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1;
254  const double kBookmarkScoreRange =
255      static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
256  // It's not likely that GetBookmarksWithTitlesMatching will return overlapping
257  // matches but let's play it safe.
258  match.relevance = std::min(kMaxBookmarkScore,
259      static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) +
260      kBaseBookmarkScore);
261  // Don't waste any time searching for additional referenced URLs if we
262  // already have a perfect title match.
263  if (match.relevance >= kMaxBookmarkScore)
264    return match;
265  // Boost the score if the bookmark's URL is referenced by other bookmarks.
266  const int kURLCountBoost[4] = { 0, 75, 125, 150 };
267  std::vector<const BookmarkNode*> nodes;
268  bookmark_model_->GetNodesByURL(url, &nodes);
269  DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
270  match.relevance +=
271      kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
272  match.relevance = std::min(kMaxBookmarkScore, match.relevance);
273  return match;
274}
275
276// static
277ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
278    const Snippet::MatchPositions& positions,
279    size_t text_length) {
280  ACMatchClassifications classifications;
281  if (positions.empty()) {
282    classifications.push_back(
283        ACMatchClassification(0, ACMatchClassification::NONE));
284    return classifications;
285  }
286
287  for (Snippet::MatchPositions::const_iterator i = positions.begin();
288       i != positions.end(); ++i) {
289    AutocompleteMatch::ACMatchClassifications new_class;
290    AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
291        text_length, 0, &new_class);
292    classifications = AutocompleteMatch::MergeClassifications(
293        classifications, new_class);
294  }
295  return classifications;
296}
297