bookmark_provider.cc revision 5f1c94371a64b3196d4be9466099bb892df9b88e
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/bookmark_provider.h" 6 7#include <algorithm> 8#include <functional> 9#include <vector> 10 11#include "base/prefs/pref_service.h" 12#include "base/strings/utf_string_conversions.h" 13#include "chrome/browser/autocomplete/autocomplete_result.h" 14#include "chrome/browser/autocomplete/chrome_autocomplete_scheme_classifier.h" 15#include "chrome/browser/autocomplete/history_provider.h" 16#include "chrome/browser/bookmarks/bookmark_model_factory.h" 17#include "chrome/browser/omnibox/omnibox_field_trial.h" 18#include "chrome/browser/profiles/profile.h" 19#include "chrome/common/pref_names.h" 20#include "components/bookmarks/browser/bookmark_match.h" 21#include "components/bookmarks/browser/bookmark_model.h" 22#include "components/metrics/proto/omnibox_input_type.pb.h" 23#include "components/omnibox/url_prefix.h" 24#include "net/base/net_util.h" 25 26using bookmarks::BookmarkMatch; 27 28typedef std::vector<BookmarkMatch> BookmarkMatches; 29 30// BookmarkProvider ------------------------------------------------------------ 31 32BookmarkProvider::BookmarkProvider(Profile* profile) 33 : AutocompleteProvider(AutocompleteProvider::TYPE_BOOKMARK), 34 profile_(profile), 35 bookmark_model_(NULL), 36 score_using_url_matches_(OmniboxFieldTrial::BookmarksIndexURLsValue()) { 37 if (profile) { 38 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); 39 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 40 } 41} 42 43void BookmarkProvider::Start(const AutocompleteInput& input, 44 bool minimal_changes) { 45 if (minimal_changes) 46 return; 47 matches_.clear(); 48 49 if (input.text().empty() || 50 (input.type() == metrics::OmniboxInputType::FORCED_QUERY)) 51 return; 52 53 DoAutocomplete(input); 54} 55 56BookmarkProvider::~BookmarkProvider() {} 57 58void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input) { 59 // We may not have a bookmark model for some unit tests. 60 if (!bookmark_model_) 61 return; 62 63 BookmarkMatches matches; 64 // Retrieve enough bookmarks so that we have a reasonable probability of 65 // suggesting the one that the user desires. 66 const size_t kMaxBookmarkMatches = 50; 67 68 // GetBookmarksMatching returns bookmarks matching the user's 69 // search terms using the following rules: 70 // - The search text is broken up into search terms. Each term is searched 71 // for separately. 72 // - Term matches are always performed against the start of a word. 'def' 73 // will match against 'define' but not against 'indefinite'. 74 // - Terms must be at least three characters in length in order to perform 75 // partial word matches. Any term of lesser length will only be used as an 76 // exact match. 'def' will match against 'define' but 'de' will not match. 77 // - A search containing multiple terms will return results with those words 78 // occuring in any order. 79 // - Terms enclosed in quotes comprises a phrase that must match exactly. 80 // - Multiple terms enclosed in quotes will require those exact words in that 81 // exact order to match. 82 // 83 // Please refer to the code for BookmarkIndex::GetBookmarksMatching for 84 // complete details of how searches are performed against the user's 85 // bookmarks. 86 bookmark_model_->GetBookmarksMatching(input.text(), 87 kMaxBookmarkMatches, 88 &matches); 89 if (matches.empty()) 90 return; // There were no matches. 91 const base::string16 fixed_up_input(FixupUserInput(input).second); 92 for (BookmarkMatches::const_iterator i = matches.begin(); i != matches.end(); 93 ++i) { 94 // Create and score the AutocompleteMatch. If its score is 0 then the 95 // match is discarded. 96 AutocompleteMatch match(BookmarkMatchToACMatch(input, fixed_up_input, *i)); 97 if (match.relevance > 0) 98 matches_.push_back(match); 99 } 100 101 // Sort and clip the resulting matches. 102 size_t num_matches = 103 std::min(matches_.size(), AutocompleteProvider::kMaxMatches); 104 std::partial_sort(matches_.begin(), matches_.begin() + num_matches, 105 matches_.end(), AutocompleteMatch::MoreRelevant); 106 matches_.resize(num_matches); 107} 108 109namespace { 110 111// for_each helper functor that calculates a match factor for each query term 112// when calculating the final score. 113// 114// Calculate a 'factor' from 0 to the bookmark's title length for a match 115// based on 1) how many characters match and 2) where the match is positioned. 116class ScoringFunctor { 117 public: 118 // |title_length| is the length of the bookmark title against which this 119 // match will be scored. 120 explicit ScoringFunctor(size_t title_length) 121 : title_length_(static_cast<double>(title_length)), 122 scoring_factor_(0.0) { 123 } 124 125 void operator()(const query_parser::Snippet::MatchPosition& match) { 126 double term_length = static_cast<double>(match.second - match.first); 127 scoring_factor_ += term_length * 128 (title_length_ - match.first) / title_length_; 129 } 130 131 double ScoringFactor() { return scoring_factor_; } 132 133 private: 134 double title_length_; 135 double scoring_factor_; 136}; 137 138} // namespace 139 140AutocompleteMatch BookmarkProvider::BookmarkMatchToACMatch( 141 const AutocompleteInput& input, 142 const base::string16& fixed_up_input_text, 143 const BookmarkMatch& bookmark_match) { 144 // The AutocompleteMatch we construct is non-deletable because the only 145 // way to support this would be to delete the underlying bookmark, which is 146 // unlikely to be what the user intends. 147 AutocompleteMatch match(this, 0, false, 148 AutocompleteMatchType::BOOKMARK_TITLE); 149 base::string16 title(bookmark_match.node->GetTitle()); 150 const GURL& url(bookmark_match.node->url()); 151 const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec()); 152 size_t inline_autocomplete_offset = URLPrefix::GetInlineAutocompleteOffset( 153 input.text(), fixed_up_input_text, false, url_utf16); 154 match.destination_url = url; 155 const size_t match_start = bookmark_match.url_match_positions.empty() ? 156 0 : bookmark_match.url_match_positions[0].first; 157 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) && 158 ((match_start == base::string16::npos) || (match_start != 0)); 159 std::vector<size_t> offsets = BookmarkMatch::OffsetsFromMatchPositions( 160 bookmark_match.url_match_positions); 161 // In addition to knowing how |offsets| is transformed, we need to know how 162 // |inline_autocomplete_offset| is transformed. We add it to the end of 163 // |offsets|, compute how everything is transformed, then remove it from the 164 // end. 165 offsets.push_back(inline_autocomplete_offset); 166 match.contents = net::FormatUrlWithOffsets(url, languages_, 167 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP), 168 net::UnescapeRule::SPACES, NULL, NULL, &offsets); 169 inline_autocomplete_offset = offsets.back(); 170 offsets.pop_back(); 171 BookmarkMatch::MatchPositions new_url_match_positions = 172 BookmarkMatch::ReplaceOffsetsInMatchPositions( 173 bookmark_match.url_match_positions, offsets); 174 match.contents_class = 175 ClassificationsFromMatch(new_url_match_positions, 176 match.contents.size(), 177 true); 178 match.fill_into_edit = 179 AutocompleteInput::FormattedStringWithEquivalentMeaning( 180 url, match.contents, ChromeAutocompleteSchemeClassifier(profile_)); 181 if (inline_autocomplete_offset != base::string16::npos) { 182 // |inline_autocomplete_offset| may be beyond the end of the 183 // |fill_into_edit| if the user has typed an URL with a scheme and the 184 // last character typed is a slash. That slash is removed by the 185 // FormatURLWithOffsets call above. 186 if (inline_autocomplete_offset < match.fill_into_edit.length()) { 187 match.inline_autocompletion = 188 match.fill_into_edit.substr(inline_autocomplete_offset); 189 } 190 match.allowed_to_be_default_match = match.inline_autocompletion.empty() || 191 !HistoryProvider::PreventInlineAutocomplete(input); 192 } 193 match.description = title; 194 match.description_class = 195 ClassificationsFromMatch(bookmark_match.title_match_positions, 196 match.description.size(), 197 false); 198 199 // Summary on how a relevance score is determined for the match: 200 // 201 // For each match within the bookmark's title or URL (or both), calculate a 202 // 'factor', sum up those factors, then use the sum to figure out a value 203 // between the base score and the maximum score. 204 // 205 // The factor for each match is the product of: 206 // 207 // 1) how many characters in the bookmark's title/URL are part of this match. 208 // This is capped at the length of the bookmark's title 209 // to prevent terms that match in both the title and the URL from 210 // scoring too strongly. 211 // 212 // 2) where the match occurs within the bookmark's title or URL, 213 // giving more points for matches that appear earlier in the string: 214 // ((string_length - position of match start) / string_length). 215 // 216 // Example: Given a bookmark title of 'abcde fghijklm', with a title length 217 // of 14, and two different search terms, 'abcde' and 'fghij', with 218 // start positions of 0 and 6, respectively, 'abcde' will score higher 219 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with 220 // a partial factor of (14-6)/14 = 0.571 ). (In this example neither 221 // term matches in the URL.) 222 // 223 // Once all match factors have been calculated they are summed. If URL 224 // matches are not considered, the resulting sum will never be greater than 225 // the length of the bookmark title because of the way the bookmark model 226 // matches and removes overlaps. (In particular, the bookmark model only 227 // matches terms to the beginning of words and it removes all overlapping 228 // matches, keeping only the longest. Together these mean that each 229 // character is included in at most one match.) If URL matches are 230 // considered, the sum can be greater. 231 // 232 // This sum is then normalized by the length of the bookmark title (if URL 233 // matches are not considered) or by the length of the bookmark title + 10 234 // (if URL matches are considered) and capped at 1.0. (If URL matches 235 // are considered, we want to expand the scoring range so fewer bookmarks 236 // will hit the 1.0 cap and hence lose all ability to distinguish between 237 // these high-quality bookmarks.) 238 // 239 // The normalized value is multiplied against the scoring range available, 240 // which is 299. The 299 is calculated by subtracting the minimum possible 241 // score, 900, from the maximum possible score, 1199. This product, ranging 242 // from 0 to 299, is added to the minimum possible score, 900, giving the 243 // preliminary score. 244 // 245 // If the preliminary score is less than the maximum possible score, 1199, 246 // it can be boosted up to that maximum possible score if the URL referenced 247 // by the bookmark is also referenced by any of the user's other bookmarks. 248 // A count of how many times the bookmark's URL is referenced is determined 249 // and, for each additional reference beyond the one for the bookmark being 250 // scored up to a maximum of three, the score is boosted by a fixed amount 251 // given by |kURLCountBoost|, below. 252 // 253 if (score_using_url_matches_) { 254 // Pretend empty titles are identical to the URL. 255 if (title.empty()) 256 title = base::ASCIIToUTF16(url.spec()); 257 } else { 258 DCHECK(!title.empty()); 259 } 260 ScoringFunctor title_position_functor = 261 for_each(bookmark_match.title_match_positions.begin(), 262 bookmark_match.title_match_positions.end(), 263 ScoringFunctor(title.size())); 264 ScoringFunctor url_position_functor = 265 for_each(bookmark_match.url_match_positions.begin(), 266 bookmark_match.url_match_positions.end(), 267 ScoringFunctor(bookmark_match.node->url().spec().length())); 268 const double summed_factors = title_position_functor.ScoringFactor() + 269 (score_using_url_matches_ ? url_position_functor.ScoringFactor() : 0); 270 const double normalized_sum = std::min( 271 summed_factors / (title.size() + (score_using_url_matches_ ? 10 : 0)), 272 1.0); 273 const int kBaseBookmarkScore = 900; 274 const int kMaxBookmarkScore = 1199; 275 const double kBookmarkScoreRange = 276 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); 277 match.relevance = static_cast<int>(normalized_sum * kBookmarkScoreRange) + 278 kBaseBookmarkScore; 279 // Don't waste any time searching for additional referenced URLs if we 280 // already have a perfect title match. 281 if (match.relevance >= kMaxBookmarkScore) 282 return match; 283 // Boost the score if the bookmark's URL is referenced by other bookmarks. 284 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; 285 std::vector<const BookmarkNode*> nodes; 286 bookmark_model_->GetNodesByURL(url, &nodes); 287 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); 288 match.relevance += 289 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; 290 match.relevance = std::min(kMaxBookmarkScore, match.relevance); 291 return match; 292} 293 294// static 295ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( 296 const query_parser::Snippet::MatchPositions& positions, 297 size_t text_length, 298 bool is_url) { 299 ACMatchClassification::Style url_style = 300 is_url ? ACMatchClassification::URL : ACMatchClassification::NONE; 301 ACMatchClassifications classifications; 302 if (positions.empty()) { 303 classifications.push_back( 304 ACMatchClassification(0, url_style)); 305 return classifications; 306 } 307 308 for (query_parser::Snippet::MatchPositions::const_iterator i = 309 positions.begin(); 310 i != positions.end(); 311 ++i) { 312 AutocompleteMatch::ACMatchClassifications new_class; 313 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, 314 text_length, url_style, &new_class); 315 classifications = AutocompleteMatch::MergeClassifications( 316 classifications, new_class); 317 } 318 return classifications; 319} 320