bookmark_provider.cc revision a02191e04bc25c4935f804f2c080ae28663d096d
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/bookmark_provider.h" 6 7#include <algorithm> 8#include <functional> 9#include <vector> 10 11#include "base/prefs/pref_service.h" 12#include "base/strings/utf_string_conversions.h" 13#include "chrome/browser/autocomplete/autocomplete_result.h" 14#include "chrome/browser/autocomplete/history_provider.h" 15#include "chrome/browser/autocomplete/url_prefix.h" 16#include "chrome/browser/bookmarks/bookmark_model.h" 17#include "chrome/browser/bookmarks/bookmark_model_factory.h" 18#include "chrome/browser/bookmarks/bookmark_title_match.h" 19#include "chrome/browser/profiles/profile.h" 20#include "chrome/common/pref_names.h" 21#include "net/base/net_util.h" 22 23typedef std::vector<BookmarkTitleMatch> TitleMatches; 24 25// BookmarkProvider ------------------------------------------------------------ 26 27BookmarkProvider::BookmarkProvider( 28 AutocompleteProviderListener* listener, 29 Profile* profile) 30 : AutocompleteProvider(listener, profile, 31 AutocompleteProvider::TYPE_BOOKMARK), 32 bookmark_model_(NULL) { 33 if (profile) { 34 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); 35 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 36 } 37} 38 39void BookmarkProvider::Start(const AutocompleteInput& input, 40 bool minimal_changes) { 41 if (minimal_changes) 42 return; 43 matches_.clear(); 44 45 if (input.text().empty() || 46 ((input.type() != AutocompleteInput::UNKNOWN) && 47 (input.type() != AutocompleteInput::QUERY))) 48 return; 49 50 DoAutocomplete(input, 51 input.matches_requested() == AutocompleteInput::BEST_MATCH); 52} 53 54BookmarkProvider::~BookmarkProvider() {} 55 56void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, 57 bool best_match) { 58 // We may not have a bookmark model for some unit tests. 59 if (!bookmark_model_) 60 return; 61 62 TitleMatches matches; 63 // Retrieve enough bookmarks so that we have a reasonable probability of 64 // suggesting the one that the user desires. 65 const size_t kMaxBookmarkMatches = 50; 66 67 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's 68 // search terms using the following rules: 69 // - The search text is broken up into search terms. Each term is searched 70 // for separately. 71 // - Term matches are always performed against the start of a word. 'def' 72 // will match against 'define' but not against 'indefinite'. 73 // - Terms must be at least three characters in length in order to perform 74 // partial word matches. Any term of lesser length will only be used as an 75 // exact match. 'def' will match against 'define' but 'de' will not match. 76 // - A search containing multiple terms will return results with those words 77 // occuring in any order. 78 // - Terms enclosed in quotes comprises a phrase that must match exactly. 79 // - Multiple terms enclosed in quotes will require those exact words in that 80 // exact order to match. 81 // 82 // Note: GetBookmarksWithTitlesMatching() will never return a match span 83 // greater than the length of the title against which it is being matched, 84 // nor can those spans ever overlap because the match spans are coalesced 85 // for all matched terms. 86 // 87 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching 88 // for complete details of how title searches are performed against the user's 89 // bookmarks. 90 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), 91 kMaxBookmarkMatches, 92 &matches); 93 if (matches.empty()) 94 return; // There were no matches. 95 AutocompleteInput fixed_up_input(input); 96 FixupUserInput(&fixed_up_input); 97 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); 98 ++i) { 99 // Create and score the AutocompleteMatch. If its score is 0 then the 100 // match is discarded. 101 AutocompleteMatch match(TitleMatchToACMatch(input, fixed_up_input, *i)); 102 if (match.relevance > 0) 103 matches_.push_back(match); 104 } 105 106 // Sort and clip the resulting matches. 107 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; 108 if (matches_.size() > max_matches) { 109 std::partial_sort(matches_.begin(), 110 matches_.begin() + max_matches, 111 matches_.end(), 112 AutocompleteMatch::MoreRelevant); 113 matches_.resize(max_matches); 114 } else { 115 std::sort(matches_.begin(), matches_.end(), 116 AutocompleteMatch::MoreRelevant); 117 } 118} 119 120namespace { 121 122// for_each helper functor that calculates a match factor for each query term 123// when calculating the final score. 124// 125// Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's 126// title the term matches, and 2) where the match is positioned within the 127// bookmark's title. A full length match earns a 1.0. A half-length match earns 128// at most a 0.5 and at least a 0.25. A single character match against a title 129// that is 100 characters long where the match is at the first character will 130// earn a 0.01 and at the last character will earn a 0.0001. 131class ScoringFunctor { 132 public: 133 // |title_length| is the length of the bookmark title against which this 134 // match will be scored. 135 explicit ScoringFunctor(size_t title_length) 136 : title_length_(static_cast<double>(title_length)), 137 scoring_factor_(0.0) { 138 } 139 140 void operator()(const Snippet::MatchPosition& match) { 141 double term_length = static_cast<double>(match.second - match.first); 142 scoring_factor_ += term_length / title_length_ * 143 (title_length_ - match.first) / title_length_; 144 } 145 146 double ScoringFactor() { return scoring_factor_; } 147 148 private: 149 double title_length_; 150 double scoring_factor_; 151}; 152 153} // namespace 154 155AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( 156 const AutocompleteInput& input, 157 const AutocompleteInput& fixed_up_input, 158 const BookmarkTitleMatch& title_match) { 159 // The AutocompleteMatch we construct is non-deletable because the only 160 // way to support this would be to delete the underlying bookmark, which is 161 // unlikely to be what the user intends. 162 AutocompleteMatch match(this, 0, false, 163 AutocompleteMatchType::BOOKMARK_TITLE); 164 const base::string16& title(title_match.node->GetTitle()); 165 DCHECK(!title.empty()); 166 167 const GURL& url(title_match.node->url()); 168 const base::string16& url_utf16 = base::UTF8ToUTF16(url.spec()); 169 size_t match_start, inline_autocomplete_offset; 170 URLPrefix::ComputeMatchStartAndInlineAutocompleteOffset( 171 input, fixed_up_input, false, url_utf16, &match_start, 172 &inline_autocomplete_offset); 173 match.destination_url = url; 174 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()) && 175 ((match_start == base::string16::npos) || (match_start != 0)); 176 match.contents = net::FormatUrl(url, languages_, 177 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP), 178 net::UnescapeRule::SPACES, NULL, NULL, &inline_autocomplete_offset); 179 match.contents_class.push_back( 180 ACMatchClassification(0, ACMatchClassification::URL)); 181 match.fill_into_edit = 182 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, 183 match.contents); 184 if (inline_autocomplete_offset != base::string16::npos) { 185 // |inline_autocomplete_offset| may be beyond the end of the 186 // |fill_into_edit| if the user has typed an URL with a scheme and the 187 // last character typed is a slash. That slash is removed by the 188 // FormatURLWithOffsets call above. 189 if (inline_autocomplete_offset < match.fill_into_edit.length()) { 190 match.inline_autocompletion = 191 match.fill_into_edit.substr(inline_autocomplete_offset); 192 } 193 match.allowed_to_be_default_match = match.inline_autocompletion.empty() || 194 !HistoryProvider::PreventInlineAutocomplete(input); 195 } 196 match.description = title; 197 match.description_class = 198 ClassificationsFromMatch(title_match.match_positions, 199 match.description.size()); 200 match.starred = true; 201 202 // Summary on how a relevance score is determined for the match: 203 // 204 // For each term matching within the bookmark's title (as given by the set of 205 // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then 206 // use the sum to figure out a value between the base score and the maximum 207 // score. 208 // 209 // The factor for each term is the product of: 210 // 211 // 1) how much of the bookmark's title has been matched by the term: 212 // (term length / title length). 213 // 214 // Example: Given a bookmark title 'abcde fghijklm', with a title length 215 // of 14, and two different search terms, 'abcde' and 'fghijklm', with 216 // term lengths of 5 and 8, respectively, 'fghijklm' will score higher 217 // (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). 218 // 219 // 2) where the term match occurs within the bookmark's title, giving more 220 // points for matches that appear earlier in the title: 221 // ((title length - position of match start) / title_length). 222 // 223 // Example: Given a bookmark title of 'abcde fghijklm', with a title length 224 // of 14, and two different search terms, 'abcde' and 'fghij', with 225 // start positions of 0 and 6, respectively, 'abcde' will score higher 226 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with 227 // a partial factor of (14-6)/14 = 0.571 ). 228 // 229 // Once all term factors have been calculated they are summed. The resulting 230 // sum will never be greater than 1.0 because of the way the bookmark model 231 // matches and removes overlaps. (In particular, the bookmark model only 232 // matches terms to the beginning of words and it removes all overlapping 233 // matches, keeping only the longest. Together these mean that each 234 // character is included in at most one match. This property ensures the 235 // sum of factors is at most 1.) This sum is then multiplied against the 236 // scoring range available, which is 299. The 299 is calculated by 237 // subtracting the minimum possible score, 900, from the maximum possible 238 // score, 1199. This product, ranging from 0 to 299, is added to the minimum 239 // possible score, 900, giving the preliminary score. 240 // 241 // If the preliminary score is less than the maximum possible score, 1199, 242 // it can be boosted up to that maximum possible score if the URL referenced 243 // by the bookmark is also referenced by any of the user's other bookmarks. 244 // A count of how many times the bookmark's URL is referenced is determined 245 // and, for each additional reference beyond the one for the bookmark being 246 // scored up to a maximum of three, the score is boosted by a fixed amount 247 // given by |kURLCountBoost|, below. 248 // 249 ScoringFunctor position_functor = 250 for_each(title_match.match_positions.begin(), 251 title_match.match_positions.end(), ScoringFunctor(title.size())); 252 const int kBaseBookmarkScore = 900; 253 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; 254 const double kBookmarkScoreRange = 255 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); 256 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping 257 // matches but let's play it safe. 258 match.relevance = std::min(kMaxBookmarkScore, 259 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + 260 kBaseBookmarkScore); 261 // Don't waste any time searching for additional referenced URLs if we 262 // already have a perfect title match. 263 if (match.relevance >= kMaxBookmarkScore) 264 return match; 265 // Boost the score if the bookmark's URL is referenced by other bookmarks. 266 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; 267 std::vector<const BookmarkNode*> nodes; 268 bookmark_model_->GetNodesByURL(url, &nodes); 269 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); 270 match.relevance += 271 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; 272 match.relevance = std::min(kMaxBookmarkScore, match.relevance); 273 return match; 274} 275 276// static 277ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( 278 const Snippet::MatchPositions& positions, 279 size_t text_length) { 280 ACMatchClassifications classifications; 281 if (positions.empty()) { 282 classifications.push_back( 283 ACMatchClassification(0, ACMatchClassification::NONE)); 284 return classifications; 285 } 286 287 for (Snippet::MatchPositions::const_iterator i = positions.begin(); 288 i != positions.end(); ++i) { 289 AutocompleteMatch::ACMatchClassifications new_class; 290 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, 291 text_length, 0, &new_class); 292 classifications = AutocompleteMatch::MergeClassifications( 293 classifications, new_class); 294 } 295 return classifications; 296} 297