bookmark_provider.cc revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/autocomplete/bookmark_provider.h"
6
7#include <algorithm>
8#include <functional>
9#include <vector>
10
11#include "base/prefs/pref_service.h"
12#include "base/strings/utf_string_conversions.h"
13#include "chrome/browser/autocomplete/autocomplete_result.h"
14#include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h"
15#include "chrome/browser/autocomplete/history_provider.h"
16#include "chrome/browser/bookmarks/bookmark_model_factory.h"
17#include "chrome/browser/omnibox/omnibox_field_trial.h"
18#include "chrome/browser/profiles/profile.h"
19#include "chrome/common/pref_names.h"
20#include "components/autocomplete/url_prefix.h"
21#include "components/bookmarks/browser/bookmark_match.h"
22#include "components/bookmarks/browser/bookmark_model.h"
23#include "components/metrics/proto/omnibox_input_type.pb.h"
24#include "net/base/net_util.h"
25
26using bookmarks::BookmarkMatch;
27
28typedef std::vector<BookmarkMatch> BookmarkMatches;
29
30// BookmarkProvider ------------------------------------------------------------
31
32BookmarkProvider::BookmarkProvider(Profile* profile)
33    : AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK),
34      profile_(profile),
35      bookmark_model_(NULL),
36      score_using_url_matches_(OmniboxFieldTrial::BookmarksIndexURLsValue()) {
37  if (profile) {
38    bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
39    languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
40  }
41}
42
43void BookmarkProvider::Start(const AutocompleteInput& input,
44                             bool minimal_changes) {
45  if (minimal_changes)
46    return;
47  matches_.clear();
48
49  if (input.text().empty() ||
50      (input.type() == metrics::OmniboxInputType::FORCED_QUERY))
51    return;
52
53  DoAutocomplete(input);
54}
55
56BookmarkProvider::~BookmarkProvider() {}
57
58void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) {
59  // We may not have a bookmark model for some unit tests.
60  if (!bookmark_model_)
61    return;
62
63  BookmarkMatches matches;
64  // Retrieve enough bookmarks so that we have a reasonable probability of
65  // suggesting the one that the user desires.
66  const size_t kMaxBookmarkMatches = 50;
67
68  // GetBookmarksMatching returns bookmarks matching the user's
69  // search terms using the following rules:
70  //  - The search text is broken up into search terms. Each term is searched
71  //    for separately.
72  //  - Term matches are always performed against the start of a word. 'def'
73  //    will match against 'define' but not against 'indefinite'.
74  //  - Terms must be at least three characters in length in order to perform
75  //    partial word matches. Any term of lesser length will only be used as an
76  //    exact match. 'def' will match against 'define' but 'de' will not match.
77  //  - A search containing multiple terms will return results with those words
78  //    occuring in any order.
79  //  - Terms enclosed in quotes comprises a phrase that must match exactly.
80  //  - Multiple terms enclosed in quotes will require those exact words in that
81  //    exact order to match.
82  //
83  // Please refer to the code for BookmarkIndex::GetBookmarksMatching for
84  // complete details of how searches are performed against the user's
85  // bookmarks.
86  bookmark_model_->GetBookmarksMatching(input.text(),
87                                        kMaxBookmarkMatches,
88                                        &matches);
89  if (matches.empty())
90    return;  // There were no matches.
91  const base::string16 fixed_up_input(FixupUserInput(input).second);
92  for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end();
93       ++i) {
94    // Create and score the AutocompleteMatch. If its score is 0 then the
95    // match is discarded.
96    AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i));
97    if (match.relevance > 0)
98      matches_.push_back(match);
99  }
100
101  // Sort and clip the resulting matches.
102  size_t num_matches =
103      std::min(matches_.size(), AutocompleteProvider::kMaxMatches);
104  std::partial_sort(matches_.begin(), matches_.begin() + num_matches,
105                    matches_.end(), AutocompleteMatch::MoreRelevant);
106  matches_.resize(num_matches);
107}
108
109namespace {
110
111// for_each helper functor that calculates a match factor for each query term
112// when calculating the final score.
113//
114// Calculate a 'factor' from 0 to the bookmark's title length for a match
115// based on 1) how many characters match and 2) where the match is positioned.
116class ScoringFunctor {
117 public:
118  // |title_length| is the length of the bookmark title against which this
119  // match will be scored.
120  explicit ScoringFunctor(size_t title_length)
121      : title_length_(static_cast<double>(title_length)),
122        scoring_factor_(0.0) {
123  }
124
125  void operator()(const query_parser::Snippet::MatchPosition& match) {
126    double term_length = static_cast<double>(match.second - match.first);
127    scoring_factor_ += term_length *
128        (title_length_ - match.first) / title_length_;
129  }
130
131  double ScoringFactor() { return scoring_factor_; }
132
133 private:
134  double title_length_;
135  double scoring_factor_;
136};
137
138}  // namespace
139
140AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch(
141    const AutocompleteInput& input,
142    const base::string16& fixed_up_input_text,
143    const BookmarkMatch& bookmark_match) {
144  // The AutocompleteMatch we construct is non-deletable because the only
145  // way to support this would be to delete the underlying bookmark, which is
146  // unlikely to be what the user intends.
147  AutocompleteMatch match(this, 0, false,
148                          AutocompleteMatchType::BOOKMARK_TITLE);
149  base::string16 title(bookmark_match.node->GetTitle());
150  const GURL& url(bookmark_match.node->url());
151  const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec());
152  size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset(
153      input.text(), fixed_up_input_text, false, url_utf16);
154  match.destination_url = url;
155  const size_t match_start = bookmark_match.url_match_positions.empty() ?
156      0 : bookmark_match.url_match_positions[0].first;
157  const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) &&
158      ((match_start == base::string16::npos) || (match_start != 0));
159  std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions(
160      bookmark_match.url_match_positions);
161  // In addition to knowing how |offsets| is transformed, we need to know how
162  // |inline_autocomplete_offset| is transformed.  We add it to the end of
163  // |offsets|, compute how everything is transformed, then remove it from the
164  // end.
165  offsets.push_back(inline_autocomplete_offset);
166  match.contents = net::FormatUrlWithOffsets(url, languages_,
167      net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP),
168      net::UnescapeRule::SPACES, NULL, NULL, &offsets);
169  inline_autocomplete_offset = offsets.back();
170  offsets.pop_back();
171  BookmarkMatch::MatchPositions new_url_match_positions =
172      BookmarkMatch::ReplaceOffsetsInMatchPositions(
173          bookmark_match.url_match_positions, offsets);
174  match.contents_class =
175      ClassificationsFromMatch(new_url_match_positions,
176                               match.contents.size(),
177                               true);
178  match.fill_into_edit =
179      AutocompleteInput::FormattedStringWithEquivalentMeaning(
180          url, match.contents, ChromeAutocompleteSchemeClassifier(profile_));
181  if (inline_autocomplete_offset != base::string16::npos) {
182    // |inline_autocomplete_offset| may be beyond the end of the
183    // |fill_into_edit| if the user has typed an URL with a scheme and the
184    // last character typed is a slash.  That slash is removed by the
185    // FormatURLWithOffsets call above.
186    if (inline_autocomplete_offset < match.fill_into_edit.length()) {
187      match.inline_autocompletion =
188          match.fill_into_edit.substr(inline_autocomplete_offset);
189    }
190    match.allowed_to_be_default_match = match.inline_autocompletion.empty() ||
191        !HistoryProvider::PreventInlineAutocomplete(input);
192  }
193  match.description = title;
194  match.description_class =
195      ClassificationsFromMatch(bookmark_match.title_match_positions,
196                               match.description.size(),
197                               false);
198  match.starred = true;
199
200  // Summary on how a relevance score is determined for the match:
201  //
202  // For each match within the bookmark's title or URL (or both), calculate a
203  // 'factor', sum up those factors, then use the sum to figure out a value
204  // between the base score and the maximum score.
205  //
206  // The factor for each match is the product of:
207  //
208  //  1) how many characters in the bookmark's title/URL are part of this match.
209  //     This is capped at the length of the bookmark's title
210  //     to prevent terms that match in both the title and the URL from
211  //     scoring too strongly.
212  //
213  //  2) where the match occurs within the bookmark's title or URL,
214  //     giving more points for matches that appear earlier in the string:
215  //       ((string_length - position of match start) / string_length).
216  //
217  //  Example: Given a bookmark title of 'abcde fghijklm', with a title length
218  //     of 14, and two different search terms, 'abcde' and 'fghij', with
219  //     start positions of 0 and 6, respectively, 'abcde' will score higher
220  //     (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
221  //     a partial factor of (14-6)/14 = 0.571 ).  (In this example neither
222  //     term matches in the URL.)
223  //
224  // Once all match factors have been calculated they are summed.  If URL
225  // matches are not considered, the resulting sum will never be greater than
226  // the length of the bookmark title because of the way the bookmark model
227  // matches and removes overlaps.  (In particular, the bookmark model only
228  // matches terms to the beginning of words and it removes all overlapping
229  // matches, keeping only the longest.  Together these mean that each
230  // character is included in at most one match.)  If URL matches are
231  // considered, the sum can be greater.
232  //
233  // This sum is then normalized by the length of the bookmark title (if URL
234  // matches are not considered) or by the length of the bookmark title + 10
235  // (if URL matches are considered) and capped at 1.0.  (If URL matches
236  // are considered, we want to expand the scoring range so fewer bookmarks
237  // will hit the 1.0 cap and hence lose all ability to distinguish between
238  // these high-quality bookmarks.)
239  //
240  // The normalized value is multiplied against the scoring range available,
241  // which is 299.  The 299 is calculated by subtracting the minimum possible
242  // score, 900, from the maximum possible score, 1199.  This product, ranging
243  // from 0 to 299, is added to the minimum possible score, 900, giving the
244  // preliminary score.
245  //
246  // If the preliminary score is less than the maximum possible score, 1199,
247  // it can be boosted up to that maximum possible score if the URL referenced
248  // by the bookmark is also referenced by any of the user's other bookmarks.
249  // A count of how many times the bookmark's URL is referenced is determined
250  // and, for each additional reference beyond the one for the bookmark being
251  // scored up to a maximum of three, the score is boosted by a fixed amount
252  // given by |kURLCountBoost|, below.
253  //
254  if (score_using_url_matches_) {
255    // Pretend empty titles are identical to the URL.
256    if (title.empty())
257      title = base::ASCIIToUTF16(url.spec());
258  } else {
259    DCHECK(!title.empty());
260  }
261  ScoringFunctor title_position_functor =
262      for_each(bookmark_match.title_match_positions.begin(),
263               bookmark_match.title_match_positions.end(),
264               ScoringFunctor(title.size()));
265  ScoringFunctor url_position_functor =
266      for_each(bookmark_match.url_match_positions.begin(),
267               bookmark_match.url_match_positions.end(),
268               ScoringFunctor(bookmark_match.node->url().spec().length()));
269  const double summed_factors = title_position_functor.ScoringFactor() +
270      (score_using_url_matches_ ? url_position_functor.ScoringFactor() : 0);
271  const double normalized_sum = std::min(
272      summed_factors / (title.size() + (score_using_url_matches_ ? 10 : 0)),
273      1.0);
274  const int kBaseBookmarkScore = 900;
275  const int kMaxBookmarkScore = 1199;
276  const double kBookmarkScoreRange =
277      static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
278  match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) +
279      kBaseBookmarkScore;
280  // Don't waste any time searching for additional referenced URLs if we
281  // already have a perfect title match.
282  if (match.relevance >= kMaxBookmarkScore)
283    return match;
284  // Boost the score if the bookmark's URL is referenced by other bookmarks.
285  const int kURLCountBoost[4] = { 0, 75, 125, 150 };
286  std::vector<const BookmarkNode*> nodes;
287  bookmark_model_->GetNodesByURL(url, &nodes);
288  DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
289  match.relevance +=
290      kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
291  match.relevance = std::min(kMaxBookmarkScore, match.relevance);
292  return match;
293}
294
295// static
296ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
297    const query_parser::Snippet::MatchPositions& positions,
298    size_t text_length,
299    bool is_url) {
300  ACMatchClassification::Style url_style =
301      is_url ? ACMatchClassification::URL : ACMatchClassification::NONE;
302  ACMatchClassifications classifications;
303  if (positions.empty()) {
304    classifications.push_back(
305        ACMatchClassification(0, url_style));
306    return classifications;
307  }
308
309  for (query_parser::Snippet::MatchPositions::const_iterator i =
310           positions.begin();
311       i != positions.end();
312       ++i) {
313    AutocompleteMatch::ACMatchClassifications new_class;
314    AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
315        text_length, url_style, &new_class);
316    classifications = AutocompleteMatch::MergeClassifications(
317        classifications, new_class);
318  }
319  return classifications;
320}
321