scored_history_match.cc revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/scored_history_match.h" 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm> 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <functional> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <iterator> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <numeric> 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <set> 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <math.h> 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/command_line.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/metrics/histogram.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_util.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/utf_string_conversions.h" 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/autocomplete/history_url_provider.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/autocomplete/url_prefix.h" 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/bookmarks/bookmark_service.h" 222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/omnibox/omnibox_field_trial.h" 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/chrome_switches.h" 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/browser_thread.h" 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace history { 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// The maximum score any candidate match can achieve. 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int kMaxTotalScore = 1425; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Score ranges used to get a 'base' score for each of the scoring factors 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (such as recency of last visit, times visited, times the URL was typed, 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and the quality of the string match). There is a matching value range for 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// each of these scores for each factor. Note that the top score is greater 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// than |kMaxTotalScore|. The score for each candidate will be capped in the 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// final calculation. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int kScoreRank[] = { 1450, 1200, 900, 400 }; 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ScoredHistoryMatch ---------------------------------------------------------- 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::initialized_ = false; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::use_new_scoring = false; 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::also_do_hup_like_scoring = false; 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches = -1; 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ScoredHistoryMatch::ScoredHistoryMatch() 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : raw_score(0), 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) can_inline(false) { 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!initialized_) { 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InitializeNewScoringField(); 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) initialized_ = true; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row, 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const std::string& languages, 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16& lower_string, 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const String16Vector& terms, 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RowWordStarts& word_starts, 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const base::Time now, 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) BookmarkService* bookmark_service) 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : HistoryMatch(row, 0, false, false), 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score(0), 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) can_inline(false) { 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!initialized_) { 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) InitializeNewScoringField(); 682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField(); 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) initialized_ = true; 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GURL gurl = row.url(); 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (!gurl.is_valid()) 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Figure out where each search term appears in the URL and/or page title 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // so that we can score as well as provide autocomplete highlighting. 782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) string16 url = CleanUpUrlForMatching(gurl, languages); 792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) string16 title = CleanUpTitleForMatching(row.title()); 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int term_num = 0; 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++iter, ++term_num) { 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) string16 term = *iter; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TermMatches url_term_matches = MatchTermInString(term, url, term_num); 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) TermMatches title_term_matches = MatchTermInString(term, title, term_num); 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (url_term_matches.empty() && title_term_matches.empty()) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; // A term was not found in either URL or title - reject. 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matches.insert(url_matches.end(), url_term_matches.begin(), 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_term_matches.end()); 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) title_matches.insert(title_matches.end(), title_term_matches.begin(), 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) title_term_matches.end()); 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Sort matches by offset and eliminate any which overlap. 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(mpearson): Investigate whether this has any meaningful 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // effect on scoring. (It's necessary at some point: removing 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // overlaps and sorting is needed to decide what to highlight in the 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // suggestion string. But this sort and de-overlap doesn't have to 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be done before scoring.) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url_matches = SortAndDeoverlapMatches(url_matches); 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) title_matches = SortAndDeoverlapMatches(title_matches); 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // We can inline autocomplete a match if: 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 1) there is only one search term 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 2) AND the match begins immediately after one of the prefixes in 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLPrefix such as http://www and https:// (note that one of these 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is the empty prefix, for cases where the user has typed the scheme) 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // 3) AND the search string does not end in whitespace (making it look to 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the IMUI as though there is a single search term when actually there 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is a second, empty term). 1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // |best_inlineable_prefix| stores the inlineable prefix computed in 1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // clause (2) or NULL if no such prefix exists. (The URL is not inlineable.) 1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Note that using the best prefix here means that when multiple 1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // prefixes match, we'll choose to inline following the longest one. 1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // For a URL like "http://www.washingtonmutual.com", this means 1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // typing "w" will inline "ashington..." instead of "ww.washington...". 1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const URLPrefix* best_inlineable_prefix = 1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) (!url_matches.empty() && (terms.size() == 1)) ? 1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) URLPrefix::BestURLPrefix(UTF8ToUTF16(gurl.spec()), terms[0]) : 1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) NULL; 1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) can_inline = (best_inlineable_prefix != NULL) && 1222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) !IsWhitespace(*(lower_string.rbegin())); 1232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) match_in_scheme = can_inline && best_inlineable_prefix->prefix.empty(); 1242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (can_inline) { 1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Initialize innermost_match. 1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The idea here is that matches that occur in the scheme or 1272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // "www." are worse than matches which don't. For the URLs 1282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // "http://www.google.com" and "http://wellsfargo.com", we want 1292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // the omnibox input "w" to cause the latter URL to rank higher 1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // than the former. Note that this is not the same as checking 1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // whether one match's inlinable prefix has more components than 1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // the other match's, since in this example, both matches would 1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // have an inlinable prefix of "http://", which is one component. 1342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Instead, we look for the overall best (i.e., most components) 1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // prefix of the current URL, and then check whether the inlinable 1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // prefix has that many components. If it does, this is an 1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // "innermost" match, and should be boosted. In the example 1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // above, the best prefixes for the two URLs have two and one 1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // components respectively, while the inlinable prefixes each 1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // have one component; this means the first match is not innermost 1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // and the second match is innermost, resulting in us boosting the 1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // second match. 1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Now, the code that implements this. 1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The deepest prefix for this URL regardless of where the match is. 1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const URLPrefix* best_prefix = 1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) URLPrefix::BestURLPrefix(UTF8ToUTF16(gurl.spec()), string16()); 1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DCHECK(best_prefix != NULL); 1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const int num_components_in_best_prefix = best_prefix->num_components; 1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // If the URL is inlineable, we must have a match. Note the prefix that 1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // makes it inlineable may be empty. 1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DCHECK(best_inlineable_prefix != NULL); 1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const int num_components_in_best_inlineable_prefix = 1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) best_inlineable_prefix->num_components; 1562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) innermost_match = (num_components_in_best_inlineable_prefix == 1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) num_components_in_best_prefix); 1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Determine if the associated URLs is referenced by any bookmarks. 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float bookmark_boost = 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0; 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (use_new_scoring) { 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const float topicality_score = GetTopicalityScore( 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) terms.size(), url, url_matches, title_matches, word_starts); 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const float recency_score = GetRecencyScore( 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (now - row.last_visit()).InDays()); 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const float popularity_score = GetPopularityScore( 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) row.typed_count() + bookmark_boost, row.visit_count()); 1712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) raw_score = GetFinalRelevancyScore( 1722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) topicality_score, recency_score, popularity_score); 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score = 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max; 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { // "old" scoring 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Get partial scores based on term matching. Note that the score for 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // each of the URL and title are adjusted by the fraction of the 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // terms appearing in each. 1792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int url_score = 1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ScoreComponentForMatches(url_matches, word_starts.url_word_starts_, 1812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) url.length()) * 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::min(url_matches.size(), terms.size()) / terms.size(); 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int title_score = 1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ScoreComponentForMatches(title_matches, word_starts.title_word_starts_, 1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) title.length()) * 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::min(title_matches.size(), terms.size()) / terms.size(); 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Arbitrarily pick the best. 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(mrossetti): It might make sense that a term which appears in both 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the URL and the Title should boost the score a bit. 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int term_score = std::max(url_score, title_score); 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (term_score == 0) 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return; 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Determine scoring factors for the recency of visit, visit count and typed 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // count attributes of the URLRow. 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kDaysAgoLevel[] = { 1, 10, 20, 30 }; 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int days_ago_value = ScoreForValue((base::Time::Now() - 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) row.last_visit()).InDays(), kDaysAgoLevel); 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kVisitCountLevel[] = { 50, 30, 10, 5 }; 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kTypedCountLevel[] = { 50, 30, 10, 5 }; 2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int typed_count_value = ScoreForValue(row.typed_count() + bookmark_boost, 2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kTypedCountLevel); 2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The final raw score is calculated by: 2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // - multiplying each factor by a 'relevance' 2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // - calculating the average. 2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Note that visit_count is reduced by typed_count because both are bumped 2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // when a typed URL is recorded thus giving visit_count too much weight. 2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kTermScoreRelevance = 4; 2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kDaysAgoRelevance = 2; 2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kVisitCountRelevance = 2; 2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kTypedCountRelevance = 5; 2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int effective_visit_count_value = 2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::max(0, visit_count_value - typed_count_value); 2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score = term_score * kTermScoreRelevance + 2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) days_ago_value * kDaysAgoRelevance + 2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) effective_visit_count_value * kVisitCountRelevance + 2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) typed_count_value * kTypedCountRelevance; 2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score /= (kTermScoreRelevance + kDaysAgoRelevance + 2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kVisitCountRelevance + kTypedCountRelevance); 2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score = std::min(kMaxTotalScore, raw_score); 2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (also_do_hup_like_scoring && can_inline) { 2262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // HistoryURL-provider-like scoring gives any match that is 2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // capable of being inlined a certain minimum score. Some of these 2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // are given a higher score that lets them be shown in inline. 2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // This test here derives from the test in 2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // HistoryURLProvider::PromoteMatchForInlineAutocomplete(). 2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const bool promote_to_inline = (row.typed_count() > 1) || 2322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) (IsHostOnly() && (row.typed_count() == 1)); 2332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int hup_like_score = promote_to_inline ? 2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) HistoryURLProvider::kScoreForBestInlineableResult : 2352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) HistoryURLProvider::kBaseScoreForNonInlineableResult; 2362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Also, if the user types the hostname of a host with a typed 2382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // visit, then everything from that host get given inlineable scores 2392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // (because the URL-that-you-typed will go first and everything 2402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // else will be assigned one minus the previous score, as coded 2412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // at the end of HistoryURLProvider::DoAutocomplete(). 2422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (UTF8ToUTF16(gurl.host()) == terms[0]) 2432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) hup_like_score = HistoryURLProvider::kScoreForBestInlineableResult; 2442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // HistoryURLProvider has the function PromoteOrCreateShorterSuggestion() 2462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // that's meant to promote prefixes of the best match (if they've 2472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // been visited enough related to the best match) or 2482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // create/promote host-only suggestions (even if they've never 2492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // been typed). The code is complicated and we don't try to 2502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // duplicate the logic here. Instead, we handle a simple case: in 2512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // low-typed-count ranges, give host-only matches (i.e., 2522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // http://www.foo.com/ vs. http://www.foo.com/bar.html) a boost so 2532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // that the host-only match outscores all the other matches that 2542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // would normally have the same base score. This behavior is not 2552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // identical to what happens in HistoryURLProvider even in these 2562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // low typed count ranges--sometimes it will create/promote when 2572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // this test does not (indeed, we cannot create matches like HUP 2582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // can) and vice versa--but the underlying philosophy is similar. 2592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!promote_to_inline && IsHostOnly()) 2602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) hup_like_score++; 2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // All the other logic to goes into hup-like-scoring happens in 2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the tie-breaker case of MatchScoreGreater(). 2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Incorporate hup_like_score into raw_score. 2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score = std::max(raw_score, hup_like_score); 2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 2682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // If this match is not inlineable and there's a cap on the maximum 2702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // score that can be given to non-inlineable matches, apply the cap. 2712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!can_inline && (max_assigned_score_for_non_inlineable_matches != -1)) { 2722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) raw_score = std::min(max_assigned_score_for_non_inlineable_matches, 2732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) raw_score); 2742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ScoredHistoryMatch::~ScoredHistoryMatch() {} 2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// std::accumulate helper function to add up TermMatches' lengths as used in 2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ScoreComponentForMatches 2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int AccumulateMatchLength(int total, const TermMatch& match) { 2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return total + match.length; 2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 2862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)int ScoredHistoryMatch::ScoreComponentForMatches( 2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const TermMatches& provided_matches, 2882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const WordStarts& word_starts, 2892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) size_t max_length) { 2902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (provided_matches.empty()) 2912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return 0; 2922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The actual matches we'll use for matching. This is |provided_matches| 2942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // with all the matches not at a word boundary removed. 2952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) TermMatches matches; 2962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) MakeTermMatchesOnlyAtWordBoundaries(provided_matches, word_starts, 2972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) &matches); 2982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (matches.empty()) 3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Score component for whether the input terms (if more than one) were found 3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in the same order in the match. Start with kOrderMaxValue points divided 3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // equally among (number of terms - 1); then discount each of those terms that 3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // is out-of-order in the match. 3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kOrderMaxValue = 1000; 3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int order_value = kOrderMaxValue; 3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (matches.size() > 1) { 3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int max_possible_out_of_order = matches.size() - 1; 3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int out_of_order = 0; 3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 1; i < matches.size(); ++i) { 3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (matches[i - 1].term_num > matches[i].term_num) 3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++out_of_order; 3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / 3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) max_possible_out_of_order; 3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Score component for how early in the match string the first search term 3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // appears. Start with kStartMaxValue points and discount by 3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // kStartMaxValue/kMaxSignificantChars points for each character later than 3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the first at which the term begins. No points are earned if the start of 3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the match occurs at or after kMaxSignificantChars. 3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kStartMaxValue = 1000; 3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int start_value = (kMaxSignificantChars - 3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue / 3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) kMaxSignificantChars; 3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Score component for how much of the matched string the input terms cover. 3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // kCompleteMaxValue points times the fraction of the URL/page title string 3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // that was matched. 3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t term_length_total = std::accumulate(matches.begin(), matches.end(), 3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 0, AccumulateMatchLength); 3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const size_t kMaxSignificantLength = 50; 3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t max_significant_length = 3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::min(max_length, std::max(term_length_total, kMaxSignificantLength)); 3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kCompleteMaxValue = 1000; 3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int complete_value = 3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) term_length_total * kCompleteMaxValue / max_significant_length; 3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kOrderRelevance = 1; 3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kStartRelevance = 6; 3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kCompleteRelevance = 3; 3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int raw_score = order_value * kOrderRelevance + 3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) start_value * kStartRelevance + 3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) complete_value * kCompleteRelevance; 3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance); 3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Scale the raw score into a single score component in the same manner as 3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // used in ScoredMatchForURL(). 3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int kTermScoreLevel[] = { 1000, 750, 500, 200 }; 3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ScoreForValue(raw_score, kTermScoreLevel); 3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 3562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries( 3572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const TermMatches& provided_matches, 3582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const WordStarts& word_starts, 3592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) TermMatches* matches_at_word_boundaries) { 3602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) matches_at_word_boundaries->clear(); 3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Resize it to an upper-bound estimate of the correct size. 3622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) matches_at_word_boundaries->reserve(provided_matches.size()); 3632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) WordStarts::const_iterator next_word_starts = word_starts.begin(); 3642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) for (TermMatches::const_iterator iter = provided_matches.begin(); 3652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) iter != provided_matches.end(); ++iter) { 3662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Advance next_word_starts until it's >= the position of the term 3672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // we're considering. 3682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) while ((next_word_starts != word_starts.end()) && 3692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) (*next_word_starts < iter->offset)) { 3702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ++next_word_starts; 3712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if ((next_word_starts != word_starts.end()) && 3732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) (*next_word_starts == iter->offset)) { 3742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // At word boundary: copy this element into |matches_at_word_boundaries|. 3752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) matches_at_word_boundaries->push_back(*iter); 3762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// static 3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) { 3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int i = 0; 3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int rank_count = arraysize(kScoreRank); 3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? 3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (value > value_ranks[i]) : (value < value_ranks[i]))) 3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++i; 3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i >= rank_count) 3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 0; 3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int score = kScoreRank[i]; 3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (i > 0) { 3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) score += (value - value_ranks[i]) * 3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (kScoreRank[i - 1] - kScoreRank[i]) / 3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (value_ranks[i - 1] - value_ranks[i]); 3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return score; 3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Comparison function for sorting ScoredMatches by their scores with 3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// intelligent tie-breaking. 4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, 4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const ScoredHistoryMatch& m2) { 4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (m1.raw_score != m2.raw_score) 4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return m1.raw_score > m2.raw_score; 4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This tie-breaking logic is inspired by / largely copied from the 4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // ordering logic in history_url_provider.cc CompareHistoryMatch(). 4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // A URL that has been typed at all is better than one that has never been 4092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // typed. (Note "!"s on each side.) 4102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (!m1.url_info.typed_count() != !m2.url_info.typed_count()) 4112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return m1.url_info.typed_count() > m2.url_info.typed_count(); 4122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Innermost matches (matches after any scheme or "www.") are better than 4142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // non-innermost matches. 4152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (m1.innermost_match != m2.innermost_match) 4162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return m1.innermost_match; 4172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLs that have been typed more often are better. 4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (m1.url_info.typed_count() != m2.url_info.typed_count()) 4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return m1.url_info.typed_count() > m2.url_info.typed_count(); 4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For URLs that have each been typed once, a host (alone) is better 4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // than a page inside. 4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (m1.url_info.typed_count() == 1) { 4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (m1.IsHostOnly() != m2.IsHostOnly()) 4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return m1.IsHostOnly(); 4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLs that have been visited more often are better. 4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (m1.url_info.visit_count() != m2.url_info.visit_count()) 4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return m1.url_info.visit_count() > m2.url_info.visit_count(); 4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // URLs that have been visited more recently are better. 4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return m1.url_info.last_visit() > m2.url_info.last_visit(); 4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float ScoredHistoryMatch::GetTopicalityScore( 4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const int num_terms, 4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const string16& url, 4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TermMatches& url_matches, 4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const TermMatches& title_matches, 4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const RowWordStarts& word_starts) { 4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Because the below thread is not thread safe, we check that we're 4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // only calling it from one thread: the UI thread. Specifically, 4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we check "if we've heard of the UI thread then we'd better 4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be on it." The first part is necessary so unit tests pass. (Many 4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // unit tests don't set up the threading naming system; hence 4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // CurrentlyOn(UI thread) will fail.) 4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK( 4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !content::BrowserThread::IsWellKnownThread(content::BrowserThread::UI) || 4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (raw_term_score_to_topicality_score == NULL) { 4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_term_score_to_topicality_score = new float[kMaxRawTermScore]; 4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FillInTermScoreToTopicalityScoreArray(); 4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // A vector that accumulates per-term scores. The strongest match--a 4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match in the hostname at a word boundary--is worth 10 points. 4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Everything else is less. In general, a match that's not at a word 4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // boundary is worth about 1/4th or 1/5th of a match at the word boundary 4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in the same part of the URL/title. 4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<int> term_scores(num_terms, 0); 4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<size_t>::const_iterator next_word_starts = 4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_starts.url_word_starts_.begin(); 4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::vector<size_t>::const_iterator end_word_starts = 4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) word_starts.url_word_starts_.end(); 4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const size_t question_mark_pos = url.find('?'); 4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const size_t colon_pos = url.find(':'); 4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The + 3 skips the // that probably appears in the protocol 4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // after the colon. If the protocol doesn't have two slashes after 4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the colon, that's okay--all this ends up doing is starting our 4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // search for the next / a few characters into the hostname. The 4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // only times this can cause problems is if we have a protocol without 4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a // after the colon and the hostname is only one or two characters. 4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // This isn't worth worrying about. 4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const size_t end_of_hostname_pos = (colon_pos != std::string::npos) ? 4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.find('/', colon_pos + 3) : url.find('/'); 4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t last_part_of_hostname_pos = 4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (end_of_hostname_pos != std::string::npos) ? 4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.rfind('.', end_of_hostname_pos) : 4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) url.rfind('.'); 4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Loop through all URL matches and score them appropriately. 4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (TermMatches::const_iterator iter = url_matches.begin(); 4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) iter != url_matches.end(); ++iter) { 4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Advance next_word_starts until it's >= the position of the term 4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we're considering. 4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((next_word_starts != end_word_starts) && 4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*next_word_starts < iter->offset)) { 4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++next_word_starts; 4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const bool at_word_boundary = (next_word_starts != end_word_starts) && 4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*next_word_starts == iter->offset); 4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((question_mark_pos != std::string::npos) && 4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (iter->offset > question_mark_pos)) { 4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match in CGI ?... fragment 4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) term_scores[iter->term_num] += at_word_boundary ? 5 : 0; 4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((end_of_hostname_pos != std::string::npos) && 4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (iter->offset > end_of_hostname_pos)) { 4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match in path 5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) term_scores[iter->term_num] += at_word_boundary ? 8 : 1; 5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if ((colon_pos == std::string::npos) || 5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (iter->offset > colon_pos)) { 5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // match in hostname 5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if ((last_part_of_hostname_pos == std::string::npos) || 5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (iter->offset < last_part_of_hostname_pos)) { 5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Either there are no dots in the hostname or this match isn't 5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // the last dotted component. 5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) term_scores[iter->term_num] += at_word_boundary ? 10 : 2; 5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } // else: match in the last part of a dotted hostname (usually 5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // this is the top-level domain .com, .net, etc.). Do not 5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // count this match for scoring. 5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } // else: match in protocol. Do not count this match for scoring. 5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Now do the analogous loop over all matches in the title. 5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next_word_starts = word_starts.title_word_starts_.begin(); 5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) end_word_starts = word_starts.title_word_starts_.end(); 5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int word_num = 0; 5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (TermMatches::const_iterator iter = title_matches.begin(); 5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) iter != title_matches.end(); ++iter) { 5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Advance next_word_starts until it's >= the position of the term 5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we're considering. 5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while ((next_word_starts != end_word_starts) && 5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*next_word_starts < iter->offset)) { 5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++next_word_starts; 5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ++word_num; 5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (word_num >= 10) break; // only count the first ten words 5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const bool at_word_boundary = (next_word_starts != end_word_starts) && 5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (*next_word_starts == iter->offset); 5302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) term_scores[iter->term_num] += at_word_boundary ? 8 : 0; 5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(mpearson): Restore logic for penalizing out-of-order matches. 5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (Perhaps discount them by 0.8?) 5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(mpearson): Consider: if the earliest match occurs late in the string, 5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // should we discount it? 5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(mpearson): Consider: do we want to score based on how much of the 5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // input string the input covers? (I'm leaning toward no.) 5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Compute the topicality_score as the sum of transformed term_scores. 5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float topicality_score = 0; 5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (size_t i = 0; i < term_scores.size(); ++i) { 5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) topicality_score += raw_term_score_to_topicality_score[ 5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (term_scores[i] >= kMaxRawTermScore)? kMaxRawTermScore - 1: 5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) term_scores[i]]; 5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // TODO(mpearson): If there are multiple terms, consider taking the 5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // geometric mean of per-term scores rather than sum as we're doing now 5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (which is equivalent to the arthimatic mean). 5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return topicality_score; 5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float* ScoredHistoryMatch::raw_term_score_to_topicality_score = NULL; 5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() { 5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) { 5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float topicality_score; 5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (term_score < 10) { 5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // If the term scores less than 10 points (no full-credit hit, or 5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // no combination of hits that score that well), then the topicality 5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // score is linear in the term score. 5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) topicality_score = 0.1 * term_score; 5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For term scores of at least ten points, pass them through a log 5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // function so a score of 10 points gets a 1.0 (to meet up exactly 5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // with the linear component) and increases logarithmically until 5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // maxing out at 30 points, with computes to a score around 2.1. 5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) topicality_score = (1.0 + 2.25 * log10(0.1 * term_score)); 5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raw_term_score_to_topicality_score[term_score] = topicality_score; 5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float* ScoredHistoryMatch::days_ago_to_recency_score = NULL; 5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float ScoredHistoryMatch::GetRecencyScore(int last_visit_days_ago) { 5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Because the below thread is not thread safe, we check that we're 5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // only calling it from one thread: the UI thread. Specifically, 5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we check "if we've heard of the UI thread then we'd better 5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be on it." The first part is necessary so unit tests pass. (Many 5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // unit tests don't set up the threading naming system; hence 5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // CurrentlyOn(UI thread) will fail.) 5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK( 5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) !content::BrowserThread::IsWellKnownThread(content::BrowserThread::UI) || 5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (days_ago_to_recency_score == NULL) { 5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) days_ago_to_recency_score = new float[kDaysToPrecomputeRecencyScoresFor]; 5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FillInDaysAgoToRecencyScoreArray(); 5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Lookup the score in days_ago_to_recency_score, treating 5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // everything older than what we've precomputed as the oldest thing 5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // we've precomputed. The std::max is to protect against corruption 5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // in the database (in case last_visit_days_ago is negative). 5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return days_ago_to_recency_score[ 5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::max( 6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::min(last_visit_days_ago, kDaysToPrecomputeRecencyScoresFor - 1), 6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 0)]; 6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ScoredHistoryMatch::FillInDaysAgoToRecencyScoreArray() { 6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) for (int days_ago = 0; days_ago < kDaysToPrecomputeRecencyScoresFor; 6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) days_ago++) { 6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int unnormalized_recency_score; 6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (days_ago <= 1) { 6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unnormalized_recency_score = 100; 6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (days_ago <= 7) { 6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Linearly extrapolate between 1 and 7 days so 7 days has a score of 70. 6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unnormalized_recency_score = 70 + (7 - days_ago) * (100 - 70) / (7 - 1); 6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (days_ago <= 30) { 6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Linearly extrapolate between 7 and 30 days so 30 days has a score 6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of 50. 6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unnormalized_recency_score = 50 + (30 - days_ago) * (70 - 50) / (30 - 7); 6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (days_ago <= 90) { 6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Linearly extrapolate between 30 and 90 days so 90 days has a score 6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of 20. 6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unnormalized_recency_score = 20 + (90 - days_ago) * (50 - 20) / (90 - 30); 6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Linearly extrapolate between 90 and 365 days so 365 days has a score 6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // of 10. 6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) unnormalized_recency_score = 6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 10 + (365 - days_ago) * (20 - 10) / (365 - 90); 6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) days_ago_to_recency_score[days_ago] = unnormalized_recency_score / 100.0; 6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (days_ago > 0) { 6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK_LE(days_ago_to_recency_score[days_ago], 6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) days_ago_to_recency_score[days_ago - 1]); 6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static 6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float ScoredHistoryMatch::GetPopularityScore(int typed_count, 6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int visit_count) { 6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The max()s are to guard against database corruption. 6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) / 6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) (5.0 + 3.0); 6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// static 6442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)float ScoredHistoryMatch::GetFinalRelevancyScore( 6452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) float topicality_score, float recency_score, float popularity_score) { 6462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Here's how to interpret intermediate_score: Suppose the omnibox 6472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // has one input term. Suppose we have a URL that has 5 typed 6482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // visits with the most recent being within a day and the omnibox 6492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // input term has a single URL hostname hit at a word boundary. 6502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // This URL will have an intermediate_score of 5.0 (= 1 topicality * 6512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // 1 recency * 5 popularity). 6522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) float intermediate_score = 6532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) topicality_score * recency_score * popularity_score; 6542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The below code takes intermediate_score from [0, infinity) to 6552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // relevancy scores in the range [0, 1400). 6562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) float attenuating_factor = 1.0; 6572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (intermediate_score < 4) { 6582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // The formula in the final return line in this function only works if 6592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // intermediate_score > 4. For lower scores, we linearly interpolate 6602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // between 0 and the formula when intermediate_score = 4.0. 6612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) attenuating_factor = intermediate_score / 4.0; 6622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) intermediate_score = 4.0; 6632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 6642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) DCHECK_GE(intermediate_score, 4.0); 6652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score)); 6662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 6672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ScoredHistoryMatch::InitializeNewScoringField() { 6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum NewScoringOption { 6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) OLD_SCORING = 0, 6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEW_SCORING = 1, 6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2, 6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3, 6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4, 6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NUM_OPTIONS = 5 6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // should always be overwritten 6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) NewScoringOption new_scoring_option = NUM_OPTIONS; 6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const std::string switch_value = CommandLine::ForCurrentProcess()-> 6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) GetSwitchValueASCII(switches::kOmniboxHistoryQuickProviderNewScoring); 6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if (switch_value == switches::kOmniboxHistoryQuickProviderNewScoringEnabled) { 6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_scoring_option = NEW_SCORING; 6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) use_new_scoring = true; 6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else if (switch_value == 6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) switches::kOmniboxHistoryQuickProviderNewScoringDisabled) { 6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_scoring_option = OLD_SCORING; 6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) use_new_scoring = false; 6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // We'll assume any other flag means automatic. 6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Automatic means eligible for the field trial. 6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // For the field trial stuff to work correctly, we must be running 6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // on the same thread as the thread that created the field trial, 6952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // which happens via a call to OmniboxFieldTrial::Active in 6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // chrome_browser_main.cc on the main thread. Let's check this to 6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be sure. We check "if we've heard of the UI thread then we'd better 6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // be on it." The first part is necessary so unit tests pass. (Many 6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // unit tests don't set up the threading naming system; hence 7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // CurrentlyOn(UI thread) will fail.) 7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DCHECK(!content::BrowserThread::IsWellKnownThread( 7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) content::BrowserThread::UI) || 7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) content::BrowserThread::CurrentlyOn(content::BrowserThread::UI)); 7042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (OmniboxFieldTrial::InHQPNewScoringFieldTrial()) { 7052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (OmniboxFieldTrial::InHQPNewScoringFieldTrialExperimentGroup()) { 7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_scoring_option = NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP; 7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) use_new_scoring = true; 7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_scoring_option = NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP; 7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) use_new_scoring = false; 7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } else { 7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_scoring_option = NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL; 7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) use_new_scoring = false; 7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) } 7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Add a beacon to the logs that'll allow us to identify later what 7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // new scoring state a user is in. Do this by incrementing a bucket in 7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // a histogram, where the bucket represents the user's new scoring state. 7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) UMA_HISTOGRAM_ENUMERATION( 7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon", 7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) new_scoring_option, NUM_OPTIONS); 7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField() { 7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) also_do_hup_like_scoring = 7282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) OmniboxFieldTrial::InHQPReplaceHUPScoringFieldTrial() && 7292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) OmniboxFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup(); 7302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // When doing HUP-like scoring, don't allow a non-inlineable match 7312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // to beat the score of good inlineable matches. This is a problem 7322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // because if a non-inlineable match ends up with the highest score 7332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // from HistoryQuick provider, all HistoryQuick matches get demoted 7342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // to non-inlineable scores (scores less than 1200). Without 7352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // HUP-like-scoring, these results would actually come from the HUP 7362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // and not be demoted, thus outscoring the demoted HQP results. 7372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // When the HQP provides these, we need to clamp the non-inlineable 7382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // results to preserve this behavior. 7392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) if (also_do_hup_like_scoring) { 7402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) max_assigned_score_for_non_inlineable_matches = 7412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) HistoryURLProvider::kScoreForBestInlineableResult - 1; 7422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace history 746