scored_history_match.cc revision 2a99a7e74a7f215066514fe81d2bfa6639d9eddd
15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/history/scored_history_match.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <functional>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <iterator>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <numeric>
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <set>
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <math.h>
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/command_line.h"
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/metrics/histogram.h"
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/string_util.h"
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/utf_string_conversions.h"
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/autocomplete/history_url_provider.h"
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/autocomplete/url_prefix.h"
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/bookmarks/bookmark_service.h"
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "chrome/browser/omnibox/omnibox_field_trial.h"
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/common/chrome_switches.h"
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/browser_thread.h"
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace history {
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// The maximum score any candidate match can achieve.
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int kMaxTotalScore = 1425;
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Score ranges used to get a 'base' score for each of the scoring factors
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// (such as recency of last visit, times visited, times the URL was typed,
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// and the quality of the string match). There is a matching value range for
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// each of these scores for each factor. Note that the top score is greater
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// than |kMaxTotalScore|. The score for each candidate will be capped in the
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// final calculation.
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)const int kScoreRank[] = { 1450, 1200, 900, 400 };
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ScoredHistoryMatch ----------------------------------------------------------
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::initialized_ = false;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::use_new_scoring = false;
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::also_do_hup_like_scoring = false;
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)int ScoredHistoryMatch::max_assigned_score_for_non_inlineable_matches = -1;
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ScoredHistoryMatch::ScoredHistoryMatch()
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : raw_score(0),
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      can_inline(false) {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!initialized_) {
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    InitializeNewScoringField();
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField();
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    initialized_ = true;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ScoredHistoryMatch::ScoredHistoryMatch(const URLRow& row,
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                       const std::string& languages,
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       const string16& lower_string,
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       const String16Vector& terms,
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       const RowWordStarts& word_starts,
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       const base::Time now,
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       BookmarkService* bookmark_service)
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    : HistoryMatch(row, 0, false, false),
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      raw_score(0),
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      can_inline(false) {
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!initialized_) {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    InitializeNewScoringField();
682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField();
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    initialized_ = true;
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  GURL gurl = row.url();
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!gurl.is_valid())
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return;
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Figure out where each search term appears in the URL and/or page title
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // so that we can score as well as provide autocomplete highlighting.
782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  string16 url = CleanUpUrlForMatching(gurl, languages);
792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  string16 title = CleanUpTitleForMatching(row.title());
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int term_num = 0;
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       ++iter, ++term_num) {
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    string16 term = *iter;
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    TermMatches url_term_matches = MatchTermInString(term, url, term_num);
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    TermMatches title_term_matches = MatchTermInString(term, title, term_num);
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (url_term_matches.empty() && title_term_matches.empty())
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;  // A term was not found in either URL or title - reject.
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    url_matches.insert(url_matches.end(), url_term_matches.begin(),
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                       url_term_matches.end());
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    title_matches.insert(title_matches.end(), title_term_matches.begin(),
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                         title_term_matches.end());
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Sort matches by offset and eliminate any which overlap.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(mpearson): Investigate whether this has any meaningful
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // effect on scoring.  (It's necessary at some point: removing
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // overlaps and sorting is needed to decide what to highlight in the
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // suggestion string.  But this sort and de-overlap doesn't have to
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // be done before scoring.)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  url_matches = SortAndDeoverlapMatches(url_matches);
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  title_matches = SortAndDeoverlapMatches(title_matches);
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // We can inline autocomplete a match if:
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //  1) there is only one search term
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //  2) AND the match begins immediately after one of the prefixes in
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //     URLPrefix such as http://www and https:// (note that one of these
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //     is the empty prefix, for cases where the user has typed the scheme)
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //  3) AND the search string does not end in whitespace (making it look to
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //     the IMUI as though there is a single search term when actually there
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  //     is a second, empty term).
1112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // |best_inlineable_prefix| stores the inlineable prefix computed in
1122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // clause (2) or NULL if no such prefix exists.  (The URL is not inlineable.)
1132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Note that using the best prefix here means that when multiple
1142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // prefixes match, we'll choose to inline following the longest one.
1152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // For a URL like "http://www.washingtonmutual.com", this means
1162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // typing "w" will inline "ashington..." instead of "ww.washington...".
1172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const URLPrefix* best_inlineable_prefix =
1182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      (!url_matches.empty() && (terms.size() == 1)) ?
1192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      URLPrefix::BestURLPrefix(UTF8ToUTF16(gurl.spec()), terms[0]) :
1202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      NULL;
1212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  can_inline = (best_inlineable_prefix != NULL) &&
1222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      !IsWhitespace(*(lower_string.rbegin()));
1232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  match_in_scheme = can_inline && best_inlineable_prefix->prefix.empty();
1242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (can_inline) {
1252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Initialize innermost_match.
1262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // The idea here is that matches that occur in the scheme or
1272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // "www." are worse than matches which don't.  For the URLs
1282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // "http://www.google.com" and "http://wellsfargo.com", we want
1292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // the omnibox input "w" to cause the latter URL to rank higher
1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // than the former.  Note that this is not the same as checking
1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // whether one match's inlinable prefix has more components than
1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // the other match's, since in this example, both matches would
1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // have an inlinable prefix of "http://", which is one component.
1342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    //
1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Instead, we look for the overall best (i.e., most components)
1362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // prefix of the current URL, and then check whether the inlinable
1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // prefix has that many components.  If it does, this is an
1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // "innermost" match, and should be boosted.  In the example
1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // above, the best prefixes for the two URLs have two and one
1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // components respectively, while the inlinable prefixes each
1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // have one component; this means the first match is not innermost
1422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // and the second match is innermost, resulting in us boosting the
1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // second match.
1442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    //
1452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Now, the code that implements this.
1462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // The deepest prefix for this URL regardless of where the match is.
1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const URLPrefix* best_prefix =
1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        URLPrefix::BestURLPrefix(UTF8ToUTF16(gurl.spec()), string16());
1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    DCHECK(best_prefix != NULL);
1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int num_components_in_best_prefix = best_prefix->num_components;
1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // If the URL is inlineable, we must have a match.  Note the prefix that
1522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // makes it inlineable may be empty.
1532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    DCHECK(best_inlineable_prefix != NULL);
1542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const int num_components_in_best_inlineable_prefix =
1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        best_inlineable_prefix->num_components;
1562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    innermost_match = (num_components_in_best_inlineable_prefix ==
1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        num_components_in_best_prefix);
1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Determine if the associated URLs is referenced by any bookmarks.
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  float bookmark_boost =
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (bookmark_service && bookmark_service->IsBookmarked(gurl)) ? 10.0 : 0.0;
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (use_new_scoring) {
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const float topicality_score = GetTopicalityScore(
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        terms.size(), url, url_matches, title_matches, word_starts);
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const float recency_score = GetRecencyScore(
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (now - row.last_visit()).InDays());
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const float popularity_score = GetPopularityScore(
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        row.typed_count() + bookmark_boost, row.visit_count());
1712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    raw_score = GetFinalRelevancyScore(
1722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        topicality_score, recency_score, popularity_score);
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_score =
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (raw_score <= kint32max) ? static_cast<int>(raw_score) : kint32max;
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {  // "old" scoring
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Get partial scores based on term matching. Note that the score for
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // each of the URL and title are adjusted by the fraction of the
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // terms appearing in each.
1792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int url_score =
1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        ScoreComponentForMatches(url_matches, word_starts.url_word_starts_,
1812a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                 url.length()) *
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        std::min(url_matches.size(), terms.size()) / terms.size();
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int title_score =
1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        ScoreComponentForMatches(title_matches, word_starts.title_word_starts_,
1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                 title.length()) *
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        std::min(title_matches.size(), terms.size()) / terms.size();
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Arbitrarily pick the best.
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // TODO(mrossetti): It might make sense that a term which appears in both
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // the URL and the Title should boost the score a bit.
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int term_score = std::max(url_score, title_score);
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (term_score == 0)
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return;
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Determine scoring factors for the recency of visit, visit count and typed
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // count attributes of the URLRow.
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kDaysAgoLevel[] = { 1, 10, 20, 30 };
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int days_ago_value = ScoreForValue((base::Time::Now() -
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        row.last_visit()).InDays(), kDaysAgoLevel);
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kVisitCountLevel[] = { 50, 30, 10, 5 };
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kTypedCountLevel[] = { 50, 30, 10, 5 };
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int typed_count_value = ScoreForValue(row.typed_count() + bookmark_boost,
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                          kTypedCountLevel);
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // The final raw score is calculated by:
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //   - multiplying each factor by a 'relevance'
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    //   - calculating the average.
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Note that visit_count is reduced by typed_count because both are bumped
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // when a typed URL is recorded thus giving visit_count too much weight.
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kTermScoreRelevance = 4;
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kDaysAgoRelevance = 2;
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kVisitCountRelevance = 2;
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int kTypedCountRelevance = 5;
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int effective_visit_count_value =
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        std::max(0, visit_count_value - typed_count_value);
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_score = term_score * kTermScoreRelevance +
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                days_ago_value * kDaysAgoRelevance +
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                effective_visit_count_value * kVisitCountRelevance +
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                typed_count_value * kTypedCountRelevance;
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_score /= (kTermScoreRelevance + kDaysAgoRelevance +
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  kVisitCountRelevance + kTypedCountRelevance);
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_score = std::min(kMaxTotalScore, raw_score);
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (also_do_hup_like_scoring && can_inline) {
2262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // HistoryURL-provider-like scoring gives any match that is
2272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // capable of being inlined a certain minimum score.  Some of these
2282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // are given a higher score that lets them be shown in inline.
2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // This test here derives from the test in
2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // HistoryURLProvider::PromoteMatchForInlineAutocomplete().
2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const bool promote_to_inline = (row.typed_count() > 1) ||
2322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        (IsHostOnly() && (row.typed_count() == 1));
2332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    int hup_like_score = promote_to_inline ?
2342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        HistoryURLProvider::kScoreForBestInlineableResult :
2352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        HistoryURLProvider::kBaseScoreForNonInlineableResult;
2362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Also, if the user types the hostname of a host with a typed
2382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // visit, then everything from that host get given inlineable scores
2392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // (because the URL-that-you-typed will go first and everything
2402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // else will be assigned one minus the previous score, as coded
2412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // at the end of HistoryURLProvider::DoAutocomplete().
2422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (UTF8ToUTF16(gurl.host()) == terms[0])
2432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      hup_like_score = HistoryURLProvider::kScoreForBestInlineableResult;
2442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // HistoryURLProvider has the function PromoteOrCreateShorterSuggestion()
2462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // that's meant to promote prefixes of the best match (if they've
2472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // been visited enough related to the best match) or
2482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // create/promote host-only suggestions (even if they've never
2492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // been typed).  The code is complicated and we don't try to
2502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // duplicate the logic here.  Instead, we handle a simple case: in
2512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // low-typed-count ranges, give host-only matches (i.e.,
2522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // http://www.foo.com/ vs. http://www.foo.com/bar.html) a boost so
2532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // that the host-only match outscores all the other matches that
2542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // would normally have the same base score.  This behavior is not
2552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // identical to what happens in HistoryURLProvider even in these
2562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // low typed count ranges--sometimes it will create/promote when
2572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // this test does not (indeed, we cannot create matches like HUP
2582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // can) and vice versa--but the underlying philosophy is similar.
2592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (!promote_to_inline && IsHostOnly())
2602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      hup_like_score++;
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // All the other logic to goes into hup-like-scoring happens in
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // the tie-breaker case of MatchScoreGreater().
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Incorporate hup_like_score into raw_score.
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_score = std::max(raw_score, hup_like_score);
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // If this match is not inlineable and there's a cap on the maximum
2702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // score that can be given to non-inlineable matches, apply the cap.
2712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!can_inline && (max_assigned_score_for_non_inlineable_matches != -1)) {
2722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    raw_score = std::min(max_assigned_score_for_non_inlineable_matches,
2732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                         raw_score);
2742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)ScoredHistoryMatch::~ScoredHistoryMatch() {}
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// std::accumulate helper function to add up TermMatches' lengths as used in
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// ScoreComponentForMatches
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int AccumulateMatchLength(int total, const TermMatch& match) {
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return total + match.length;
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
2862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)int ScoredHistoryMatch::ScoreComponentForMatches(
2872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const TermMatches& provided_matches,
2882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const WordStarts& word_starts,
2892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    size_t max_length) {
2902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (provided_matches.empty())
2912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return 0;
2922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // The actual matches we'll use for matching.  This is |provided_matches|
2942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // with all the matches not at a word boundary removed.
2952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  TermMatches matches;
2962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  MakeTermMatchesOnlyAtWordBoundaries(provided_matches, word_starts,
2972a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                      &matches);
2982a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (matches.empty())
3005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
3015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Score component for whether the input terms (if more than one) were found
3035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // in the same order in the match.  Start with kOrderMaxValue points divided
3045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // equally among (number of terms - 1); then discount each of those terms that
3055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is out-of-order in the match.
3065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kOrderMaxValue = 1000;
3075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int order_value = kOrderMaxValue;
3085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (matches.size() > 1) {
3095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int max_possible_out_of_order = matches.size() - 1;
3105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int out_of_order = 0;
3115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 1; i < matches.size(); ++i) {
3125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (matches[i - 1].term_num > matches[i].term_num)
3135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++out_of_order;
3145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
3155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /
3165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        max_possible_out_of_order;
3175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Score component for how early in the match string the first search term
3205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // appears.  Start with kStartMaxValue points and discount by
3215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // kStartMaxValue/kMaxSignificantChars points for each character later than
3225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the first at which the term begins. No points are earned if the start of
3235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the match occurs at or after kMaxSignificantChars.
3245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kStartMaxValue = 1000;
3255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int start_value = (kMaxSignificantChars -
3265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      std::min(kMaxSignificantChars, matches[0].offset)) * kStartMaxValue /
3275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      kMaxSignificantChars;
3285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Score component for how much of the matched string the input terms cover.
3305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // kCompleteMaxValue points times the fraction of the URL/page title string
3315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // that was matched.
3325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t term_length_total = std::accumulate(matches.begin(), matches.end(),
3335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                             0, AccumulateMatchLength);
3345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t kMaxSignificantLength = 50;
3355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t max_significant_length =
3365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      std::min(max_length, std::max(term_length_total, kMaxSignificantLength));
3375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kCompleteMaxValue = 1000;
3385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int complete_value =
3395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      term_length_total * kCompleteMaxValue / max_significant_length;
3405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kOrderRelevance = 1;
3425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kStartRelevance = 6;
3435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kCompleteRelevance = 3;
3445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int raw_score = order_value * kOrderRelevance +
3455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  start_value * kStartRelevance +
3465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                  complete_value * kCompleteRelevance;
3475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  raw_score /= (kOrderRelevance + kStartRelevance + kCompleteRelevance);
3485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Scale the raw score into a single score component in the same manner as
3505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // used in ScoredMatchForURL().
3515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const int kTermScoreLevel[] = { 1000, 750, 500, 200 };
3525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return ScoreForValue(raw_score, kTermScoreLevel);
3535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
3562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ScoredHistoryMatch::MakeTermMatchesOnlyAtWordBoundaries(
3572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const TermMatches& provided_matches,
3582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    const WordStarts& word_starts,
3592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    TermMatches* matches_at_word_boundaries) {
3602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  matches_at_word_boundaries->clear();
3612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Resize it to an upper-bound estimate of the correct size.
3622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  matches_at_word_boundaries->reserve(provided_matches.size());
3632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  WordStarts::const_iterator next_word_starts = word_starts.begin();
3642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  for (TermMatches::const_iterator iter = provided_matches.begin();
3652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)       iter != provided_matches.end(); ++iter) {
3662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // Advance next_word_starts until it's >= the position of the term
3672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // we're considering.
3682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    while ((next_word_starts != word_starts.end()) &&
3692a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)           (*next_word_starts < iter->offset)) {
3702a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      ++next_word_starts;
3712a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
3722a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if ((next_word_starts != word_starts.end()) &&
3732a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        (*next_word_starts == iter->offset)) {
3742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      // At word boundary: copy this element into |matches_at_word_boundaries|.
3752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      matches_at_word_boundaries->push_back(*iter);
3762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    }
3772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
3782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
3792a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
3802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// static
3815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)int ScoredHistoryMatch::ScoreForValue(int value, const int* value_ranks) {
3825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int i = 0;
3835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int rank_count = arraysize(kScoreRank);
3845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?
3855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         (value > value_ranks[i]) : (value < value_ranks[i])))
3865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ++i;
3875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (i >= rank_count)
3885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return 0;
3895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int score = kScoreRank[i];
3905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (i > 0) {
3915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    score += (value - value_ranks[i]) *
3925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (kScoreRank[i - 1] - kScoreRank[i]) /
3935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (value_ranks[i - 1] - value_ranks[i]);
3945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
3955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return score;
3965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
3975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
3985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Comparison function for sorting ScoredMatches by their scores with
3995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// intelligent tie-breaking.
4005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
4015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                           const ScoredHistoryMatch& m2) {
4025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (m1.raw_score != m2.raw_score)
4035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return m1.raw_score > m2.raw_score;
4045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This tie-breaking logic is inspired by / largely copied from the
4065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // ordering logic in history_url_provider.cc CompareHistoryMatch().
4075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4082a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // A URL that has been typed at all is better than one that has never been
4092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // typed.  (Note "!"s on each side.)
4102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (!m1.url_info.typed_count() != !m2.url_info.typed_count())
4112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return m1.url_info.typed_count() > m2.url_info.typed_count();
4122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Innermost matches (matches after any scheme or "www.") are better than
4142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // non-innermost matches.
4152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (m1.innermost_match != m2.innermost_match)
4162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    return m1.innermost_match;
4172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
4185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLs that have been typed more often are better.
4195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (m1.url_info.typed_count() != m2.url_info.typed_count())
4205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return m1.url_info.typed_count() > m2.url_info.typed_count();
4215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For URLs that have each been typed once, a host (alone) is better
4235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // than a page inside.
4245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (m1.url_info.typed_count() == 1) {
4255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (m1.IsHostOnly() != m2.IsHostOnly())
4265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return m1.IsHostOnly();
4275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLs that have been visited more often are better.
4305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (m1.url_info.visit_count() != m2.url_info.visit_count())
4315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return m1.url_info.visit_count() > m2.url_info.visit_count();
4325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // URLs that have been visited more recently are better.
4345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return m1.url_info.last_visit() > m2.url_info.last_visit();
4355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
4365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
4375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
4385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float ScoredHistoryMatch::GetTopicalityScore(
4395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const int num_terms,
4405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const string16& url,
4415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const TermMatches& url_matches,
4425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const TermMatches& title_matches,
4435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const RowWordStarts& word_starts) {
4445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Because the below thread is not thread safe, we check that we're
4455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // only calling it from one thread: the UI thread.  Specifically,
4465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // we check "if we've heard of the UI thread then we'd better
4475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // be on it."  The first part is necessary so unit tests pass.  (Many
4485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // unit tests don't set up the threading naming system; hence
4495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // CurrentlyOn(UI thread) will fail.)
4505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(
4515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      !content::BrowserThread::IsWellKnownThread(content::BrowserThread::UI) ||
4525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
4535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (raw_term_score_to_topicality_score == NULL) {
4545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_term_score_to_topicality_score = new float[kMaxRawTermScore];
4555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    FillInTermScoreToTopicalityScoreArray();
4565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
4575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // A vector that accumulates per-term scores.  The strongest match--a
4585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // match in the hostname at a word boundary--is worth 10 points.
4595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Everything else is less.  In general, a match that's not at a word
4605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // boundary is worth about 1/4th or 1/5th of a match at the word boundary
4615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // in the same part of the URL/title.
4625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<int> term_scores(num_terms, 0);
4635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<size_t>::const_iterator next_word_starts =
4645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      word_starts.url_word_starts_.begin();
4655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<size_t>::const_iterator end_word_starts =
4665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      word_starts.url_word_starts_.end();
4675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t question_mark_pos = url.find('?');
4685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t colon_pos = url.find(':');
4695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The + 3 skips the // that probably appears in the protocol
4705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // after the colon.  If the protocol doesn't have two slashes after
4715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the colon, that's okay--all this ends up doing is starting our
4725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // search for the next / a few characters into the hostname.  The
4735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // only times this can cause problems is if we have a protocol without
4745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a // after the colon and the hostname is only one or two characters.
4755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This isn't worth worrying about.
4765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t end_of_hostname_pos = (colon_pos != std::string::npos) ?
4775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url.find('/', colon_pos + 3) : url.find('/');
4785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t last_part_of_hostname_pos =
4795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (end_of_hostname_pos != std::string::npos) ?
4805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url.rfind('.', end_of_hostname_pos) :
4815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      url.rfind('.');
4825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Loop through all URL matches and score them appropriately.
4835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (TermMatches::const_iterator iter = url_matches.begin();
4845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != url_matches.end(); ++iter) {
4855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Advance next_word_starts until it's >= the position of the term
4865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // we're considering.
4875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((next_word_starts != end_word_starts) &&
4885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           (*next_word_starts < iter->offset)) {
4895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++next_word_starts;
4905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
4915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const bool at_word_boundary = (next_word_starts != end_word_starts) &&
4925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (*next_word_starts == iter->offset);
4935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if ((question_mark_pos != std::string::npos) &&
4945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (iter->offset > question_mark_pos)) {
4955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // match in CGI ?... fragment
4965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      term_scores[iter->term_num] += at_word_boundary ? 5 : 0;
4975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if ((end_of_hostname_pos != std::string::npos) &&
4985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (iter->offset > end_of_hostname_pos)) {
4995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // match in path
5005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      term_scores[iter->term_num] += at_word_boundary ? 8 : 1;
5015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if ((colon_pos == std::string::npos) ||
5025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)         (iter->offset > colon_pos)) {
5035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // match in hostname
5045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if ((last_part_of_hostname_pos == std::string::npos) ||
5055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          (iter->offset < last_part_of_hostname_pos)) {
5065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Either there are no dots in the hostname or this match isn't
5075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // the last dotted component.
5085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        term_scores[iter->term_num] += at_word_boundary ? 10 : 2;
5095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } // else: match in the last part of a dotted hostname (usually
5105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // this is the top-level domain .com, .net, etc.).  Do not
5115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // count this match for scoring.
5125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } // else: match in protocol.  Do not count this match for scoring.
5135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Now do the analogous loop over all matches in the title.
5155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  next_word_starts = word_starts.title_word_starts_.begin();
5165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  end_word_starts = word_starts.title_word_starts_.end();
5175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int word_num = 0;
5185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (TermMatches::const_iterator iter = title_matches.begin();
5195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       iter != title_matches.end(); ++iter) {
5205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Advance next_word_starts until it's >= the position of the term
5215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // we're considering.
5225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    while ((next_word_starts != end_word_starts) &&
5235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           (*next_word_starts < iter->offset)) {
5245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++next_word_starts;
5255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      ++word_num;
5265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (word_num >= 10) break;  // only count the first ten words
5285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const bool at_word_boundary = (next_word_starts != end_word_starts) &&
5295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (*next_word_starts == iter->offset);
5302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    term_scores[iter->term_num] += at_word_boundary ? 8 : 0;
5315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(mpearson): Restore logic for penalizing out-of-order matches.
5335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (Perhaps discount them by 0.8?)
5345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(mpearson): Consider: if the earliest match occurs late in the string,
5355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // should we discount it?
5365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(mpearson): Consider: do we want to score based on how much of the
5375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // input string the input covers?  (I'm leaning toward no.)
5385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Compute the topicality_score as the sum of transformed term_scores.
5405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  float topicality_score = 0;
5415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t i = 0; i < term_scores.size(); ++i) {
5425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    topicality_score += raw_term_score_to_topicality_score[
5435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (term_scores[i] >= kMaxRawTermScore)? kMaxRawTermScore - 1:
5445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        term_scores[i]];
5455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(mpearson): If there are multiple terms, consider taking the
5475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // geometric mean of per-term scores rather than sum as we're doing now
5485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (which is equivalent to the arthimatic mean).
5495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return topicality_score;
5515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
5545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float* ScoredHistoryMatch::raw_term_score_to_topicality_score = NULL;
5555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
5575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ScoredHistoryMatch::FillInTermScoreToTopicalityScoreArray() {
5585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int term_score = 0; term_score < kMaxRawTermScore; ++term_score) {
5595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    float topicality_score;
5605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (term_score < 10) {
5615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // If the term scores less than 10 points (no full-credit hit, or
5625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // no combination of hits that score that well), then the topicality
5635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // score is linear in the term score.
5645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      topicality_score = 0.1 * term_score;
5655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
5665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // For term scores of at least ten points, pass them through a log
5675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // function so a score of 10 points gets a 1.0 (to meet up exactly
5685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // with the linear component) and increases logarithmically until
5695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // maxing out at 30 points, with computes to a score around 2.1.
5705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      topicality_score = (1.0 + 2.25 * log10(0.1 * term_score));
5715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
5725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    raw_term_score_to_topicality_score[term_score] = topicality_score;
5735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
5755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
5775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float* ScoredHistoryMatch::days_ago_to_recency_score = NULL;
5785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
5795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
5805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float ScoredHistoryMatch::GetRecencyScore(int last_visit_days_ago) {
5815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Because the below thread is not thread safe, we check that we're
5825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // only calling it from one thread: the UI thread.  Specifically,
5835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // we check "if we've heard of the UI thread then we'd better
5845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // be on it."  The first part is necessary so unit tests pass.  (Many
5855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // unit tests don't set up the threading naming system; hence
5865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // CurrentlyOn(UI thread) will fail.)
5875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(
5885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      !content::BrowserThread::IsWellKnownThread(content::BrowserThread::UI) ||
5895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
5905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (days_ago_to_recency_score == NULL) {
5915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    days_ago_to_recency_score = new float[kDaysToPrecomputeRecencyScoresFor];
5925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    FillInDaysAgoToRecencyScoreArray();
5935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
5945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Lookup the score in days_ago_to_recency_score, treating
5955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // everything older than what we've precomputed as the oldest thing
5965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // we've precomputed.  The std::max is to protect against corruption
5975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // in the database (in case last_visit_days_ago is negative).
5985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return days_ago_to_recency_score[
5995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      std::max(
6005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      std::min(last_visit_days_ago, kDaysToPrecomputeRecencyScoresFor - 1),
6015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      0)];
6025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ScoredHistoryMatch::FillInDaysAgoToRecencyScoreArray() {
6055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (int days_ago = 0; days_ago < kDaysToPrecomputeRecencyScoresFor;
6065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)       days_ago++) {
6075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int unnormalized_recency_score;
6085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (days_ago <= 1) {
6095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unnormalized_recency_score = 100;
6105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (days_ago <= 7) {
6115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Linearly extrapolate between 1 and 7 days so 7 days has a score of 70.
6125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unnormalized_recency_score = 70 + (7 - days_ago) * (100 - 70) / (7 - 1);
6135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (days_ago <= 30) {
6145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Linearly extrapolate between 7 and 30 days so 30 days has a score
6155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // of 50.
6165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unnormalized_recency_score = 50 + (30 - days_ago) * (70 - 50) / (30 - 7);
6175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else if (days_ago <= 90) {
6185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Linearly extrapolate between 30 and 90 days so 90 days has a score
6195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // of 20.
6205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unnormalized_recency_score = 20 + (90 - days_ago) * (50 - 20) / (90 - 30);
6215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
6225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Linearly extrapolate between 90 and 365 days so 365 days has a score
6235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // of 10.
6245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      unnormalized_recency_score =
6255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)          10 + (365 - days_ago) * (20 - 10) / (365 - 90);
6265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    days_ago_to_recency_score[days_ago] = unnormalized_recency_score / 100.0;
6285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (days_ago > 0) {
6295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      DCHECK_LE(days_ago_to_recency_score[days_ago],
6305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                days_ago_to_recency_score[days_ago - 1]);
6315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
6325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
6335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
6365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)float ScoredHistoryMatch::GetPopularityScore(int typed_count,
6375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                             int visit_count) {
6385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The max()s are to guard against database corruption.
6395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return (std::max(typed_count, 0) * 5.0 + std::max(visit_count, 0) * 3.0) /
6405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      (5.0 + 3.0);
6415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
6425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// static
6442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)float ScoredHistoryMatch::GetFinalRelevancyScore(
6452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    float topicality_score, float recency_score, float popularity_score) {
6462a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Here's how to interpret intermediate_score: Suppose the omnibox
6472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // has one input term.  Suppose we have a URL that has 5 typed
6482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // visits with the most recent being within a day and the omnibox
6492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // input term has a single URL hostname hit at a word boundary.
6502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // This URL will have an intermediate_score of 5.0 (= 1 topicality *
6512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // 1 recency * 5 popularity).
6522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  float intermediate_score =
6532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      topicality_score * recency_score * popularity_score;
6542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // The below code takes intermediate_score from [0, infinity) to
6552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // relevancy scores in the range [0, 1400).
6562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  float attenuating_factor = 1.0;
6572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (intermediate_score < 4) {
6582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // The formula in the final return line in this function only works if
6592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // intermediate_score > 4.  For lower scores, we linearly interpolate
6602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // between 0 and the formula when intermediate_score = 4.0.
6612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    attenuating_factor = intermediate_score / 4.0;
6622a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    intermediate_score = 4.0;
6632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
6642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  DCHECK_GE(intermediate_score, 4.0);
6652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return attenuating_factor * 1400.0 * (2.0 - exp(2.0 / intermediate_score));
6662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
6672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
6685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void ScoredHistoryMatch::InitializeNewScoringField() {
6695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  enum NewScoringOption {
6705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    OLD_SCORING = 0,
6715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEW_SCORING = 1,
6725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL = 2,
6735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP = 3,
6745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP = 4,
6755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NUM_OPTIONS = 5
6765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
6775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // should always be overwritten
6785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  NewScoringOption new_scoring_option = NUM_OPTIONS;
6795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const std::string switch_value = CommandLine::ForCurrentProcess()->
6815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      GetSwitchValueASCII(switches::kOmniboxHistoryQuickProviderNewScoring);
6825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (switch_value == switches::kOmniboxHistoryQuickProviderNewScoringEnabled) {
6835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    new_scoring_option = NEW_SCORING;
6845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    use_new_scoring = true;
6855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else if (switch_value ==
6865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)             switches::kOmniboxHistoryQuickProviderNewScoringDisabled) {
6875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    new_scoring_option = OLD_SCORING;
6885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    use_new_scoring = false;
6895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  } else {
6905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // We'll assume any other flag means automatic.
6915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Automatic means eligible for the field trial.
6925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
6935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // For the field trial stuff to work correctly, we must be running
6945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // on the same thread as the thread that created the field trial,
6952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    // which happens via a call to OmniboxFieldTrial::Active in
6965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // chrome_browser_main.cc on the main thread.  Let's check this to
6975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // be sure.  We check "if we've heard of the UI thread then we'd better
6985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // be on it."  The first part is necessary so unit tests pass.  (Many
6995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // unit tests don't set up the threading naming system; hence
7005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // CurrentlyOn(UI thread) will fail.)
7015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    DCHECK(!content::BrowserThread::IsWellKnownThread(
7025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)               content::BrowserThread::UI) ||
7035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)           content::BrowserThread::CurrentlyOn(content::BrowserThread::UI));
7042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    if (OmniboxFieldTrial::InHQPNewScoringFieldTrial()) {
7052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      if (OmniboxFieldTrial::InHQPNewScoringFieldTrialExperimentGroup()) {
7065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        new_scoring_option = NEW_SCORING_FIELD_TRIAL_EXPERIMENT_GROUP;
7075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        use_new_scoring = true;
7085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
7095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        new_scoring_option = NEW_SCORING_FIELD_TRIAL_DEFAULT_GROUP;
7105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        use_new_scoring = false;
7115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
7125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    } else {
7135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      new_scoring_option = NEW_SCORING_AUTO_BUT_NOT_IN_FIELD_TRIAL;
7145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      use_new_scoring = false;
7155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
7165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
7175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Add a beacon to the logs that'll allow us to identify later what
7195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // new scoring state a user is in.  Do this by incrementing a bucket in
7205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // a histogram, where the bucket represents the user's new scoring state.
7215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  UMA_HISTOGRAM_ENUMERATION(
7225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      "Omnibox.HistoryQuickProviderNewScoringFieldTrialBeacon",
7235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      new_scoring_option, NUM_OPTIONS);
7245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void ScoredHistoryMatch::InitializeAlsoDoHUPLikeScoringFieldAndMaxScoreField() {
7275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  also_do_hup_like_scoring =
7282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      OmniboxFieldTrial::InHQPReplaceHUPScoringFieldTrial() &&
7292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)      OmniboxFieldTrial::InHQPReplaceHUPScoringFieldTrialExperimentGroup();
7302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // When doing HUP-like scoring, don't allow a non-inlineable match
7312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // to beat the score of good inlineable matches.  This is a problem
7322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // because if a non-inlineable match ends up with the highest score
7332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // from HistoryQuick provider, all HistoryQuick matches get demoted
7342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // to non-inlineable scores (scores less than 1200).  Without
7352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // HUP-like-scoring, these results would actually come from the HUP
7362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // and not be demoted, thus outscoring the demoted HQP results.
7372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // When the HQP provides these, we need to clamp the non-inlineable
7382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // results to preserve this behavior.
7392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  if (also_do_hup_like_scoring) {
7402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    max_assigned_score_for_non_inlineable_matches =
7412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        HistoryURLProvider::kScoreForBestInlineableResult - 1;
7422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
7435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
7445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
7455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace history
746