bookmark_provider.cc revision 90dce4d38c5ff5333bea97d859d4e484e27edf0c
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/autocomplete/bookmark_provider.h" 6 7#include <algorithm> 8#include <functional> 9#include <vector> 10 11#include "base/metrics/histogram.h" 12#include "base/prefs/pref_service.h" 13#include "base/time.h" 14#include "chrome/browser/autocomplete/autocomplete_result.h" 15#include "chrome/browser/bookmarks/bookmark_model.h" 16#include "chrome/browser/bookmarks/bookmark_model_factory.h" 17#include "chrome/browser/profiles/profile.h" 18#include "chrome/common/pref_names.h" 19#include "net/base/net_util.h" 20 21typedef std::vector<bookmark_utils::TitleMatch> TitleMatches; 22 23// BookmarkProvider ------------------------------------------------------------ 24 25BookmarkProvider::BookmarkProvider( 26 AutocompleteProviderListener* listener, 27 Profile* profile) 28 : AutocompleteProvider(listener, profile, 29 AutocompleteProvider::TYPE_BOOKMARK), 30 bookmark_model_(NULL) { 31 if (profile) { 32 bookmark_model_ = BookmarkModelFactory::GetForProfile(profile); 33 languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); 34 } 35} 36 37void BookmarkProvider::Start(const AutocompleteInput& input, 38 bool minimal_changes) { 39 if (minimal_changes) 40 return; 41 matches_.clear(); 42 43 // Short-circuit any matching when inline autocompletion is disabled and 44 // we're looking for BEST_MATCH because none of the BookmarkProvider's 45 // matches can score high enough to qualify. 46 if (input.text().empty() || 47 ((input.type() != AutocompleteInput::UNKNOWN) && 48 (input.type() != AutocompleteInput::QUERY)) || 49 ((input.matches_requested() == AutocompleteInput::BEST_MATCH) && 50 input.prevent_inline_autocomplete())) 51 return; 52 53 base::TimeTicks start_time = base::TimeTicks::Now(); 54 DoAutocomplete(input, 55 input.matches_requested() == AutocompleteInput::BEST_MATCH); 56 UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime", 57 base::TimeTicks::Now() - start_time); 58} 59 60BookmarkProvider::~BookmarkProvider() {} 61 62void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input, 63 bool best_match) { 64 // We may not have a bookmark model for some unit tests. 65 if (!bookmark_model_) 66 return; 67 68 TitleMatches matches; 69 // Retrieve enough bookmarks so that we have a reasonable probability of 70 // suggesting the one that the user desires. 71 const size_t kMaxBookmarkMatches = 50; 72 73 // GetBookmarksWithTitlesMatching returns bookmarks matching the user's 74 // search terms using the following rules: 75 // - The search text is broken up into search terms. Each term is searched 76 // for separately. 77 // - Term matches are always performed against the start of a word. 'def' 78 // will match against 'define' but not against 'indefinite'. 79 // - Terms must be at least three characters in length in order to perform 80 // partial word matches. Any term of lesser length will only be used as an 81 // exact match. 'def' will match against 'define' but 'de' will not match. 82 // - A search containing multiple terms will return results with those words 83 // occuring in any order. 84 // - Terms enclosed in quotes comprises a phrase that must match exactly. 85 // - Multiple terms enclosed in quotes will require those exact words in that 86 // exact order to match. 87 // 88 // Note: GetBookmarksWithTitlesMatching() will never return a match span 89 // greater than the length of the title against which it is being matched, 90 // nor can those spans ever overlap because the match spans are coalesced 91 // for all matched terms. 92 // 93 // Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching 94 // for complete details of how title searches are performed against the user's 95 // bookmarks. 96 bookmark_model_->GetBookmarksWithTitlesMatching(input.text(), 97 kMaxBookmarkMatches, 98 &matches); 99 if (matches.empty()) 100 return; // There were no matches. 101 for (TitleMatches::const_iterator i = matches.begin(); i != matches.end(); 102 ++i) { 103 // Create and score the AutocompleteMatch. If its score is 0 then the 104 // match is discarded. 105 AutocompleteMatch match(TitleMatchToACMatch(*i)); 106 if (match.relevance > 0) 107 matches_.push_back(match); 108 } 109 110 // Sort and clip the resulting matches. 111 size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches; 112 if (matches_.size() > max_matches) { 113 std::partial_sort(matches_.begin(), matches_.end(), 114 matches_.begin() + max_matches, 115 AutocompleteMatch::MoreRelevant); 116 matches_.resize(max_matches); 117 } else { 118 std::sort(matches_.begin(), matches_.end(), 119 AutocompleteMatch::MoreRelevant); 120 } 121} 122 123namespace { 124 125// for_each helper functor that calculates a match factor for each query term 126// when calculating the final score. 127// 128// Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's 129// title the term matches, and 2) where the match is positioned within the 130// bookmark's title. A full length match earns a 1.0. A half-length match earns 131// at most a 0.5 and at least a 0.25. A single character match against a title 132// that is 100 characters long where the match is at the first character will 133// earn a 0.01 and at the last character will earn a 0.0001. 134class ScoringFunctor { 135 public: 136 // |title_length| is the length of the bookmark title against which this 137 // match will be scored. 138 explicit ScoringFunctor(size_t title_length) 139 : title_length_(static_cast<double>(title_length)), 140 scoring_factor_(0.0) { 141 } 142 143 void operator()(const Snippet::MatchPosition& match) { 144 double term_length = static_cast<double>(match.second - match.first); 145 scoring_factor_ += term_length / title_length_ * 146 (title_length_ - match.first) / title_length_; 147 } 148 149 double ScoringFactor() { return scoring_factor_; } 150 151 private: 152 double title_length_; 153 double scoring_factor_; 154}; 155 156} // namespace 157 158AutocompleteMatch BookmarkProvider::TitleMatchToACMatch( 159 const bookmark_utils::TitleMatch& title_match) { 160 // The AutocompleteMatch we construct is non-deletable because the only 161 // way to support this would be to delete the underlying bookmark, which is 162 // unlikely to be what the user intends. 163 AutocompleteMatch match(this, 0, false, 164 AutocompleteMatchType::BOOKMARK_TITLE); 165 const string16& title(title_match.node->GetTitle()); 166 DCHECK(!title.empty()); 167 const GURL& url(title_match.node->url()); 168 match.destination_url = url; 169 match.contents = net::FormatUrl(url, languages_, 170 net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP, 171 net::UnescapeRule::SPACES, NULL, NULL, NULL); 172 match.contents_class.push_back( 173 ACMatchClassification(0, ACMatchClassification::NONE)); 174 match.fill_into_edit = 175 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, 176 match.contents); 177 match.description = title; 178 match.description_class = 179 ClassificationsFromMatch(title_match.match_positions, 180 match.description.size()); 181 match.starred = true; 182 183 // Summary on how a relevance score is determined for the match: 184 // 185 // For each term matching within the bookmark's title (as given by the set of 186 // Snippet::MatchPositions) calculate a 'factor', sum up those factors, then 187 // use the sum to figure out a value between the base score and the maximum 188 // score. 189 // 190 // The factor for each term is calculated based on: 191 // 192 // 1) how much of the bookmark's title has been matched by the term: 193 // (term length / title length). 194 // 195 // Example: Given a bookmark title 'abcde fghijklm', with a title length 196 // of 14, and two different search terms, 'abcde' and 'fghijklm', with 197 // term lengths of 5 and 8, respectively, 'fghijklm' will score higher 198 // (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357). 199 // 200 // 2) where the term match occurs within the bookmark's title, giving more 201 // points for matches that appear earlier in the title: 202 // ((title length - position of match start) / title_length). 203 // 204 // Example: Given a bookmark title of 'abcde fghijklm', with a title length 205 // of 14, and two different search terms, 'abcde' and 'fghij', with 206 // start positions of 0 and 6, respectively, 'abcde' will score higher 207 // (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with 208 // a partial factor of (14-6)/14 = 0.571 ). 209 // 210 // Once all term factors have been calculated they are summed. The resulting 211 // sum will never be greater than 1.0. This sum is then multiplied against 212 // the scoring range available, which is 299. The 299 is calculated by 213 // subtracting the minimum possible score, 900, from the maximum possible 214 // score, 1199. This product, ranging from 0 to 299, is added to the minimum 215 // possible score, 900, giving the preliminary score. 216 // 217 // If the preliminary score is less than the maximum possible score, 1199, 218 // it can be boosted up to that maximum possible score if the URL referenced 219 // by the bookmark is also referenced by any of the user's other bookmarks. 220 // A count of how many times the bookmark's URL is referenced is determined 221 // and, for each additional reference beyond the one for the bookmark being 222 // scored up to a maximum of three, the score is boosted by a fixed amount 223 // given by |kURLCountBoost|, below. 224 // 225 ScoringFunctor position_functor = 226 for_each(title_match.match_positions.begin(), 227 title_match.match_positions.end(), ScoringFunctor(title.size())); 228 const int kBaseBookmarkScore = 900; 229 const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1; 230 const double kBookmarkScoreRange = 231 static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore); 232 // It's not likely that GetBookmarksWithTitlesMatching will return overlapping 233 // matches but let's play it safe. 234 match.relevance = std::min(kMaxBookmarkScore, 235 static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) + 236 kBaseBookmarkScore); 237 // Don't waste any time searching for additional referenced URLs if we 238 // already have a perfect title match. 239 if (match.relevance >= kMaxBookmarkScore) 240 return match; 241 // Boost the score if the bookmark's URL is referenced by other bookmarks. 242 const int kURLCountBoost[4] = { 0, 75, 125, 150 }; 243 std::vector<const BookmarkNode*> nodes; 244 bookmark_model_->GetNodesByURL(url, &nodes); 245 DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U); 246 match.relevance += 247 kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1]; 248 match.relevance = std::min(kMaxBookmarkScore, match.relevance); 249 return match; 250} 251 252// static 253ACMatchClassifications BookmarkProvider::ClassificationsFromMatch( 254 const Snippet::MatchPositions& positions, 255 size_t text_length) { 256 ACMatchClassifications classifications; 257 if (positions.empty()) { 258 classifications.push_back( 259 ACMatchClassification(0, ACMatchClassification::NONE)); 260 return classifications; 261 } 262 263 for (Snippet::MatchPositions::const_iterator i = positions.begin(); 264 i != positions.end(); ++i) { 265 AutocompleteMatch::ACMatchClassifications new_class; 266 AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first, 267 text_length, 0, &new_class); 268 classifications = AutocompleteMatch::MergeClassifications( 269 classifications, new_class); 270 } 271 return classifications; 272} 273