1dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/in_memory_url_index.h"
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <algorithm>
872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include <iterator>
93345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <limits>
10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <numeric>
113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
12dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "base/file_util.h"
1321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "base/i18n/break_iterator.h"
14dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "base/metrics/histogram.h"
153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string_util.h"
163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/time.h"
173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/utf_string_conversions.h"
18ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/autocomplete/autocomplete.h"
193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/autocomplete/history_provider_util.h"
203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/history/url_database.h"
21dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "chrome/browser/profiles/profile.h"
22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/common/url_constants.h"
23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "googleurl/src/url_util.h"
243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "net/base/escape.h"
253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "net/base/net_util.h"
26dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
2772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "ui/base/l10n/l10n_util.h"
283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
29dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenusing google::protobuf::RepeatedField;
30dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenusing google::protobuf::RepeatedPtrField;
31dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenusing in_memory_url_index::InMemoryURLIndexCacheItem;
32ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace history {
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
35dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem;
36dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry;
37dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem;
38dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem;
39dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry
40dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    CharWordMapEntry;
41dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem
42dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordIDHistoryMapItem;
43dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::
44dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry
45dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordIDHistoryMapEntry;
46dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem;
47dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry
48dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    HistoryInfoMapEntry;
49dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
50dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst size_t InMemoryURLIndex::kNoCachedResultForTerm = -1;
51dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
52ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Score ranges used to get a 'base' score for each of the scoring factors
53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// (such as recency of last visit, times visited, times the URL was typed,
54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// and the quality of the string match). There is a matching value range for
55ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// each of these scores for each factor.
56ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenconst int kScoreRank[] = { 1425, 1200, 900, 400 };
57ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch::ScoredHistoryMatch()
59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    : raw_score(0),
60ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      prefix_adjust(0) {}
61ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info)
63ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    : HistoryMatch(url_info, 0, false, false),
64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      raw_score(0),
65ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      prefix_adjust(0) {}
663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch::~ScoredHistoryMatch() {}
68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Comparison function for sorting ScoredMatches by their scores.
70ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1,
71ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                           const ScoredHistoryMatch& m2) {
72ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return m1.raw_score >= m2.raw_score;
733345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
75731df977c0511bca2206b5f333555b1205ff1f43Iain Merrickstruct InMemoryURLIndex::TermCharWordSet {
76dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  TermCharWordSet()  // Required for STL resize().
77dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      : char_(0),
78dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen        word_id_set_(),
79dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen        used_(false) {}
80dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  TermCharWordSet(const char16& uni_char,
81513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch                  const WordIDSet& word_id_set,
82513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch                  bool used)
83dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      : char_(uni_char),
84731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick        word_id_set_(word_id_set),
85731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick        used_(used) {}
86731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
87dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Predicate for STL algorithm use.
88dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  bool is_not_used() const { return !used_; }
89731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
90dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  char16 char_;
91731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  WordIDSet word_id_set_;
92731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick  bool used_;  // true if this set has been used for the current term search.
93731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick};
94731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Comparison function for sorting TermMatches by their offsets.
96ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) {
97ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return m1.offset < m2.offset;
98ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
99ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
100ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// std::accumulate helper function to add up TermMatches' lengths.
101ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenint AccumulateMatchLength(int total, const TermMatch& match) {
102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return total + match.length;
103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Converts a raw value for some particular scoring factor into a score
106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// component for that factor.  The conversion function is piecewise linear, with
107ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// input values provided in |value_ranks| and resulting output scores from
108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]).  A score
109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than
110ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// kScoreRank[3].
111ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen//
112ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }.
113ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the
114ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// linear function:
115ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen//   score = m * value + b, where
116ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen//   m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1])
117ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen//   b = value_ranks[1]
118ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Any value higher than 100 would be scored as if it were 100, and any value
119ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// lower than 10 scored 0.
120ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenint ScoreForValue(int value, const int* value_ranks) {
121ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int i = 0;
122ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int rank_count = arraysize(kScoreRank);
123ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ?
124ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen         (value > value_ranks[i]) : (value < value_ranks[i])))
125ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    ++i;
126ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (i >= rank_count)
127ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return 0;
128ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int score = kScoreRank[i];
129ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (i > 0) {
130ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    score += (value - value_ranks[i]) *
131ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        (kScoreRank[i - 1] - kScoreRank[i]) /
132ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        (value_ranks[i - 1] - value_ranks[i]);
133ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
134ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return score;
135ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
136ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
137dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir)
138dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    : history_dir_(history_dir),
139dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      history_item_count_(0) {
140dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
142dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Called only by unit tests.
143dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::InMemoryURLIndex()
144dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    : history_item_count_(0) {
145dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
147dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::~InMemoryURLIndex() {}
1483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Indexing
1503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickbool InMemoryURLIndex::Init(history::URLDatabase* history_db,
1523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                            const std::string& languages) {
1533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // TODO(mrossetti): Register for profile/language change notifications.
1543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  languages_ = languages;
155dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return ReloadFromHistory(history_db, false);
156dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
157dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
158dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::ShutDown() {
159dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Write our cache.
160dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SaveToCacheFile();
1613345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
1623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
163513209b27ff55e2841eac0e4120199c23acce758Ben Murdochbool InMemoryURLIndex::IndexRow(const URLRow& row) {
1643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  const GURL& gurl(row.url());
1653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  string16 url(net::FormatUrl(gurl, languages_,
1663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      net::kFormatUrlOmitUsernamePassword,
1673345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
1683345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      NULL, NULL, NULL));
1693345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1703345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  HistoryID history_id = static_cast<HistoryID>(row.id());
171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max());
1723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1733345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Add the row for quick lookup in the history info store.
1743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  URLRow new_row(GURL(url), row.id());
1753345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  new_row.set_visit_count(row.visit_count());
1763345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  new_row.set_typed_count(row.typed_count());
1773345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  new_row.set_last_visit(row.last_visit());
1783345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  new_row.set_title(row.title());
179dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  history_info_map_[history_id] = new_row;
1803345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Split URL into individual, unique words then add in the title words.
182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  url = l10n_util::ToLower(url);
183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  String16Set url_words = WordSetFromString16(url);
184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  String16Set title_words = WordSetFromString16(row.title());
185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  String16Set words;
186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::set_union(url_words.begin(), url_words.end(),
187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                 title_words.begin(), title_words.end(),
188ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                 std::insert_iterator<String16Set>(words, words.begin()));
189ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (String16Set::iterator word_iter = words.begin();
190ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       word_iter != words.end(); ++word_iter)
191ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    AddWordToIndex(*word_iter, history_id);
1923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
1933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ++history_item_count_;
1943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return true;
1953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
1963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
197dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db,
198dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                                         bool clear_cache) {
199dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  ClearPrivateData();
200dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
201dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!history_db)
202dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
203dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
204dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (clear_cache || !RestoreFromCacheFile()) {
205dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    base::TimeTicks beginning_time = base::TimeTicks::Now();
206dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // The index has to be built from scratch.
207dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    URLDatabase::URLEnumerator history_enum;
208dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (!history_db->InitURLEnumeratorForSignificant(&history_enum))
209dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      return false;
210dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    URLRow row;
211dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    while (history_enum.GetNextURL(&row)) {
212dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      if (!IndexRow(row))
213dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen        return false;
214dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    }
215dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime",
216dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                        base::TimeTicks::Now() - beginning_time);
217dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    SaveToCacheFile();
218dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
219dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
220dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
221dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
222dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::ClearPrivateData() {
223dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  history_item_count_ = 0;
224dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  word_list_.clear();
225dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  word_map_.clear();
226dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  char_word_map_.clear();
227dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  word_id_history_map_.clear();
228dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  term_char_word_set_cache_.clear();
229dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  history_info_map_.clear();
230dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
231dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
232dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreFromCacheFile() {
233dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // TODO(mrossetti): Figure out how to determine if the cache is up-to-date.
234dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // That is: ensure that the database has not been modified since the cache
235dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // was last saved. DB file modification date is inadequate. There are no
236dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // SQLite table checksums automatically stored.
237dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  base::TimeTicks beginning_time = base::TimeTicks::Now();
238dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  FilePath file_path;
239ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path))
240dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
241dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  std::string data;
242dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!file_util::ReadFileToString(file_path, &data)) {
243dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    LOG(WARNING) << "Failed to read InMemoryURLIndex cache from "
244dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                 << file_path.value();
245dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
246dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
247dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
248dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  InMemoryURLIndexCacheItem index_cache;
249dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!index_cache.ParseFromArray(data.c_str(), data.size())) {
250dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from "
251dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                 << file_path.value();
252dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
253dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
254dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
255dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!RestorePrivateData(index_cache)) {
256dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    ClearPrivateData();  // Back to square one -- must build from scratch.
257dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
258dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
259dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
260dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime",
261dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                      base::TimeTicks::Now() - beginning_time);
262dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_);
263dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size());
264dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size());
265dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size());
266dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
267dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
268dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
269dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::SaveToCacheFile() {
270dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  base::TimeTicks beginning_time = base::TimeTicks::Now();
271dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  InMemoryURLIndexCacheItem index_cache;
272dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SavePrivateData(&index_cache);
273dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  std::string data;
274dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!index_cache.SerializeToString(&data)) {
275dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache.";
276dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
277dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
278dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
279dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Write the cache to a file then swap it for the old cache, if any, and
280dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // delete the old cache.
281dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  FilePath file_path;
282dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!GetCacheFilePath(&file_path))
283dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
284dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  file_util::ScopedFILE file(file_util::OpenFile(file_path, "w"));
285dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!file.get())
286dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
287dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
288dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  int size = data.size();
289dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (file_util::WriteFile(file_path, data.c_str(), size) != size) {
290dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    LOG(WARNING) << "Failed to write " << file_path.value();
291dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
292dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
293dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime",
294dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                      base::TimeTicks::Now() - beginning_time);
295dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
296dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
297dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
298dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) {
299dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // The row may or may not already be in our index. If it is not already
300dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // indexed and it qualifies then it gets indexed. If it is already
301dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // indexed and still qualifies then it gets updated, otherwise it
302dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // is deleted from the index.
303dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id);
304dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (row_pos == history_info_map_.end()) {
305dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // This new row should be indexed if it qualifies.
306dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (RowQualifiesAsSignificant(row, base::Time()))
307dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      IndexRow(row);
308dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  } else if (RowQualifiesAsSignificant(row, base::Time())) {
309dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // This indexed row still qualifies and will be re-indexed.
310dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // The url won't have changed but the title, visit count, etc.
311dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // might have changed.
312dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    URLRow& old_row = row_pos->second;
313dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    old_row.set_visit_count(row.visit_count());
314dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    old_row.set_typed_count(row.typed_count());
315dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    old_row.set_last_visit(row.last_visit());
316dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // TODO(mrossetti): When we start indexing the title the next line
317dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // will need attention.
318dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    old_row.set_title(row.title());
319dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  } else {
320dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // This indexed row no longer qualifies and will be de-indexed.
321dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    history_info_map_.erase(row_id);
322dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
323dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // This invalidates the cache.
324dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  term_char_word_set_cache_.clear();
325dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // TODO(mrossetti): Record this transaction in the cache.
326dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
327dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
328dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::DeleteURL(URLID row_id) {
329dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Note that this does not remove any reference to this row from the
330dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // word_id_history_map_. That map will continue to contain (and return)
331dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // hits against this row until that map is rebuilt, but since the
332dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // history_info_map_ no longer references the row no erroneous results
333dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // will propagate to the user.
334dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  history_info_map_.erase(row_id);
335dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // This invalidates the word cache.
336dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  term_char_word_set_cache_.clear();
337dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // TODO(mrossetti): Record this transaction in the cache.
338dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
339dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
3403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Searching
3413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
3423345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms(
3433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const String16Vector& terms) {
3443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  ScoredHistoryMatches scored_items;
3453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (!terms.empty()) {
3463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // Reset used_ flags for term_char_word_set_cache_. We use a basic mark-
3473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // and-sweep approach.
3483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    ResetTermCharWordSetCache();
3493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
3503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // Lowercase the terms.
3513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    // TODO(mrossetti): Another opportunity for a transform algorithm.
3523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    String16Vector lower_terms;
3533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    for (String16Vector::const_iterator term_iter = terms.begin();
354ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen         term_iter != terms.end(); ++term_iter)
355ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      lower_terms.push_back(l10n_util::ToLower(*term_iter));
3563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
3573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    String16Vector::value_type all_terms(JoinString(lower_terms, ' '));
3583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    HistoryIDSet history_id_set = HistoryIDSetFromWords(all_terms);
3593345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
360ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    // Don't perform any scoring (and don't return any matches) if the
361ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    // candidate pool is large. (See comments in header.)
362ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const size_t kItemsToScoreLimit = 500;
363ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (history_id_set.size() <= kItemsToScoreLimit) {
364ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      // Pass over all of the candidates filtering out any without a proper
365ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      // substring match, inserting those which pass in order by score.
366ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      scored_items = std::for_each(history_id_set.begin(), history_id_set.end(),
367ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen          AddHistoryMatch(*this, lower_terms)).ScoredMatches();
368ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
369ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      // Select and sort only the top kMaxMatches results.
370ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      if (scored_items.size() > AutocompleteProvider::kMaxMatches) {
371ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        std::partial_sort(scored_items.begin(),
372ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                          scored_items.begin() +
373ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                              AutocompleteProvider::kMaxMatches,
374ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                          scored_items.end(),
375ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                          ScoredHistoryMatch::MatchScoreGreater);
376ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen          scored_items.resize(AutocompleteProvider::kMaxMatches);
377ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      } else {
378ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        std::sort(scored_items.begin(), scored_items.end(),
379ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                  ScoredHistoryMatch::MatchScoreGreater);
380ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      }
381ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
3823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
3833345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
3843345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Remove any stale TermCharWordSet's.
3853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  term_char_word_set_cache_.erase(
3863345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      std::remove_if(term_char_word_set_cache_.begin(),
3873345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                     term_char_word_set_cache_.end(),
388dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                     std::mem_fun_ref(&TermCharWordSet::is_not_used)),
3893345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      term_char_word_set_cache_.end());
3903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return scored_items;
3913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
3923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
3933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::ResetTermCharWordSetCache() {
3943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // TODO(mrossetti): Consider keeping more of the cache around for possible
3953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // repeat searches, except a 'shortcuts' approach might be better for that.
3963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // TODO(mrossetti): Change TermCharWordSet to a class and use for_each.
3973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (TermCharWordSetVector::iterator iter =
3983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick       term_char_word_set_cache_.begin();
3993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick       iter != term_char_word_set_cache_.end(); ++iter)
4003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    iter->used_ = false;
4013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
4023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
4033345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords(
4043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const string16& uni_string) {
4053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Break the terms down into individual terms (words), get the candidate
4063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // set for each term, and intersect each to get a final candidate list.
4073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Note that a single 'term' from the user's perspective might be
4083345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // a string like "http://www.somewebsite.com" which, from our perspective,
4093345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // is four words: 'http', 'www', 'somewebsite', and 'com'.
4103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  HistoryIDSet history_id_set;
4113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  String16Set words = WordSetFromString16(uni_string);
4123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  bool first_word = true;
4133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (String16Set::iterator iter = words.begin();
4143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick       iter != words.end(); ++iter) {
4153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    String16Set::value_type uni_word = *iter;
4163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    HistoryIDSet term_history_id_set =
4173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        InMemoryURLIndex::HistoryIDsForTerm(uni_word);
4183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    if (first_word) {
4193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      history_id_set.swap(term_history_id_set);
4203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      first_word = false;
4213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    } else {
4223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      HistoryIDSet old_history_id_set(history_id_set);
4233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      history_id_set.clear();
4243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      std::set_intersection(old_history_id_set.begin(),
4253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                            old_history_id_set.end(),
4263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                            term_history_id_set.begin(),
4273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                            term_history_id_set.end(),
4283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                            std::inserter(history_id_set,
4293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                                          history_id_set.begin()));
4303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
4313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
4323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return history_id_set;
4333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
4343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
4353345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm(
4363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const string16& uni_word) {
4373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  InMemoryURLIndex::HistoryIDSet history_id_set;
4383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
439dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // For each unique character in the word, in order of first appearance, get
440dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // the char/word_id map entry and intersect with the set in an incremental
441dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // manner.
442dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  Char16Vector uni_chars = Char16VectorFromString16(uni_word);
4433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  WordIDSet word_id_set(WordIDSetForTermChars(uni_chars));
4443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
445ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // TODO(mrossetti): At this point, as a possible optimization, we could
446ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // scan through all candidate words and make sure the |uni_word| is a
447ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // substring within the candidate words, eliminating those which aren't.
448ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // I'm not sure it would be worth the effort. And remember, we've got to do
449ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // a final substring match in order to get the highlighting ranges later
450ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // in the process in any case.
451ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
4523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // If any words resulted then we can compose a set of history IDs by unioning
4533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // the sets from each word.
4543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (!word_id_set.empty()) {
4553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    for (WordIDSet::iterator word_id_iter = word_id_set.begin();
4563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick         word_id_iter != word_id_set.end(); ++word_id_iter) {
4573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      WordID word_id = *word_id_iter;
4583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id);
4593345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      if (word_iter != word_id_history_map_.end()) {
4603345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        HistoryIDSet& word_history_id_set(word_iter->second);
4613345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick        history_id_set.insert(word_history_id_set.begin(),
4623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                              word_history_id_set.end());
4633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      }
4643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
4653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
4663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
4673345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return history_id_set;
4683345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
4693345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
4703345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Utility Functions
4713345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
4723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// static
4733345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16(
4743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const string16& uni_string) {
475ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  String16Vector words = WordVectorFromString16(uni_string, false);
476ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  String16Set word_set;
477ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (String16Vector::const_iterator iter = words.begin(); iter != words.end();
478ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       ++iter)
479ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    word_set.insert(l10n_util::ToLower(*iter));
480ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return word_set;
481ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
482ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
483ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static
484ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenInMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16(
485ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const string16& uni_string,
486ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    bool break_on_space) {
487ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  base::BreakIterator iter(&uni_string, break_on_space ?
488ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      base::BreakIterator::BREAK_SPACE : base::BreakIterator::BREAK_WORD);
489ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  String16Vector words;
490ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (!iter.Init())
491ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return words;
492ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  while (iter.Advance()) {
493ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (break_on_space || iter.IsWord()) {
494ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      string16 word = iter.GetString();
495ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      if (break_on_space)
496ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        TrimWhitespace(word, TRIM_ALL, &word);
497ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      if (!word.empty())
498ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        words.push_back(word);
4993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
5003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
5013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return words;
5023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
5033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
5043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// static
505dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16(
506dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const string16& uni_word) {
507dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  InMemoryURLIndex::Char16Vector characters;
508dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  InMemoryURLIndex::Char16Set unique_characters;
509dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (string16::const_iterator iter = uni_word.begin();
510dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != uni_word.end(); ++iter) {
511dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (!unique_characters.count(*iter)) {
512dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      unique_characters.insert(*iter);
513dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      characters.push_back(*iter);
514dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    }
515dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
516dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return characters;
517dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
518dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
519dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// static
520dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16(
5213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const string16& uni_word) {
5223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  Char16Set characters;
5233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (string16::const_iterator iter = uni_word.begin();
5243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick       iter != uni_word.end(); ++iter)
5253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    characters.insert(*iter);
5263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return characters;
5273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
5283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
5293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::AddWordToIndex(const string16& uni_word,
5303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                                      HistoryID history_id) {
5313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  WordMap::iterator word_pos = word_map_.find(uni_word);
5323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (word_pos != word_map_.end())
5333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    UpdateWordHistory(word_pos->second, history_id);
5343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  else
5353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    AddWordHistory(uni_word, history_id);
5363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
5373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
5383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) {
5393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id);
5403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    DCHECK(history_pos != word_id_history_map_.end());
5413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    HistoryIDSet& history_id_set(history_pos->second);
5423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    history_id_set.insert(history_id);
5433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
5443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
5453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Add a new word to the word list and the word map, and then create a
5463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// new entry in the word/history map.
5473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::AddWordHistory(const string16& uni_word,
5483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick                                      HistoryID history_id) {
5493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  word_list_.push_back(uni_word);
5503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  WordID word_id = word_list_.size() - 1;
551dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  word_map_[uni_word] = word_id;
5523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  HistoryIDSet history_id_set;
5533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  history_id_set.insert(history_id);
554dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  word_id_history_map_[word_id] = history_id_set;
5553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // For each character in the newly added word (i.e. a word that is not
5563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // already in the word index), add the word to the character index.
557dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  Char16Set characters = Char16SetFromString16(uni_word);
5583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (Char16Set::iterator uni_char_iter = characters.begin();
5593345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick       uni_char_iter != characters.end(); ++uni_char_iter) {
560dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    Char16Set::value_type uni_char = *uni_char_iter;
561dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
5623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    if (char_iter != char_word_map_.end()) {
5633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      // Update existing entry in the char/word index.
5643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      WordIDSet& word_id_set(char_iter->second);
5653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      word_id_set.insert(word_id);
5663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    } else {
5673345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      // Create a new entry in the char/word index.
5683345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      WordIDSet word_id_set;
5693345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      word_id_set.insert(word_id);
570dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      char_word_map_[uni_char] = word_id_set;
5713345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
5723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
5733345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
5743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
5753345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars(
576dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const InMemoryURLIndex::Char16Vector& uni_chars) {
577dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  size_t index = CachedResultsIndexForTerm(uni_chars);
578dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
579dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // If there were no unprocessed characters in the search term |uni_chars|
580dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // then we can use the cached one as-is as the results with no further
581dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // filtering.
582dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1)
583dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return term_char_word_set_cache_[index].word_id_set_;
584dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
585dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Some or all of the characters remain to be indexed so trim the cache.
586dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (index + 1 < term_char_word_set_cache_.size())
587dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    term_char_word_set_cache_.resize(index + 1);
5883345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  WordIDSet word_id_set;
589dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Take advantage of our cached starting point, if any.
590dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  Char16Vector::const_iterator c_iter = uni_chars.begin();
591dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (index != kNoCachedResultForTerm) {
592dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    word_id_set = term_char_word_set_cache_[index].word_id_set_;
593dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    c_iter += index + 1;
594dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
595dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Now process the remaining characters in the search term.
596dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (; c_iter != uni_chars.end(); ++c_iter) {
597dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    Char16Vector::value_type uni_char = *c_iter;
598dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char);
599dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (char_iter == char_word_map_.end()) {
600dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      // A character was not found so there are no matching results: bail.
601dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      word_id_set.clear();
602dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      break;
6033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
604dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordIDSet& char_word_id_set(char_iter->second);
605dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // It is possible for there to no longer be any words associated with
606dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // a particular character. Give up in that case.
607dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (char_word_id_set.empty()) {
608dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      word_id_set.clear();
609dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      break;
6103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
611dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
612dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (word_id_set.empty()) {
613dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      word_id_set = char_word_id_set;
614dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    } else {
615dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      WordIDSet old_word_id_set(word_id_set);
616dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      word_id_set.clear();
617dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      std::set_intersection(old_word_id_set.begin(),
618dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                            old_word_id_set.end(),
619dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                            char_word_id_set.begin(),
620dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                            char_word_id_set.end(),
621dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                            std::inserter(word_id_set,
622dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen                            word_id_set.begin()));
6233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    }
624dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // Add this new char/set instance to the cache.
625dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    term_char_word_set_cache_.push_back(TermCharWordSet(
626dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen        uni_char, word_id_set, true));
6273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
6283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  return word_id_set;
6293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
6303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
631dc0f95d653279beabeb9817299e2902918ba123eKristian Monsensize_t InMemoryURLIndex::CachedResultsIndexForTerm(
632dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const InMemoryURLIndex::Char16Vector& uni_chars) {
633dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin();
634dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (Char16Vector::const_iterator c_iter = uni_chars.begin();
635dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       (c_iter != uni_chars.end()) &&
636dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       (t_iter != term_char_word_set_cache_.end()) &&
637dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       (*c_iter == t_iter->char_);
638dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       ++c_iter, ++t_iter)
639dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    t_iter->used_ = true;  // Update the cache.
640dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return t_iter - term_char_word_set_cache_.begin() - 1;
641dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
642dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
6433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// static
644ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTermMatches InMemoryURLIndex::MatchTermInString(const string16& term,
645ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                                const string16& string,
646ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                                int term_num) {
647ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  TermMatches matches;
648ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (size_t location = string.find(term); location != string16::npos;
649ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       location = string.find(term, location + 1))
650ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    matches.push_back(TermMatch(term_num, location, term.size()));
651ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return matches;
652ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
653ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
654ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static
655ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) {
656ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (matches.empty())
657ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return matches;
658ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  TermMatches sorted_matches = matches;
659ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::sort(sorted_matches.begin(), sorted_matches.end(),
660ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen            MatchOffsetLess);
661ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  TermMatches clean_matches;
662ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  TermMatch last_match = sorted_matches[0];
663ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  clean_matches.push_back(last_match);
664ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (TermMatches::const_iterator iter = sorted_matches.begin() + 1;
665ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       iter != sorted_matches.end(); ++iter) {
666ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (iter->offset >= last_match.offset + last_match.length) {
667ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      last_match = *iter;
668ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      clean_matches.push_back(last_match);
669ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
670ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
671ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return clean_matches;
672ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
673ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
674ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static
675ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches(
676ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const TermMatches& matches) {
677ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t> offsets;
678ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i)
679ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    offsets.push_back(i->offset);
680ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return offsets;
681ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
682ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
683ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static
684ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches(
685ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const TermMatches& matches,
686ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const std::vector<size_t>& offsets) {
687ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  DCHECK_EQ(matches.size(), offsets.size());
688ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  TermMatches new_matches;
689ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  std::vector<size_t>::const_iterator offset_iter = offsets.begin();
690ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  for (TermMatches::const_iterator term_iter = matches.begin();
691ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       term_iter != matches.end(); ++term_iter, ++offset_iter) {
692ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (*offset_iter != string16::npos) {
693ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      TermMatch new_match(*term_iter);
694ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      new_match.offset = *offset_iter;
695ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      new_matches.push_back(new_match);
696ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
697ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
698ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return new_matches;
699ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
700ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
701ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static
702ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL(
703ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const URLRow& row,
704ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    const String16Vector& terms) {
705ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  ScoredHistoryMatch match(row);
7063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  GURL gurl = row.url();
7073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (!gurl.is_valid())
708ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return match;
709ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
710ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Figure out where each search term appears in the URL and/or page title
711ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // so that we can score as well as provide autocomplete highlighting.
712ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  string16 url = l10n_util::ToLower(UTF8ToUTF16(gurl.spec()));
713ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Strip any 'http://' prefix before matching.
714ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (url_util::FindAndCompareScheme(url, chrome::kHttpScheme, NULL)) {
715ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    match.prefix_adjust = strlen(chrome::kHttpScheme) + 3;  // Allow for '://'.
716ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    url = url.substr(match.prefix_adjust);
717ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  }
7183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
719ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  string16 title = l10n_util::ToLower(row.title());
720ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int term_num = 0;
7213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end();
722ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen       ++iter, ++term_num) {
7233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    string16 term = *iter;
724ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    TermMatches url_term_matches = MatchTermInString(term, url, term_num);
725ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    TermMatches title_term_matches = MatchTermInString(term, title, term_num);
726ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (url_term_matches.empty() && title_term_matches.empty())
727ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      return match;  // A term was not found in either URL or title - reject.
728ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(),
729ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                             url_term_matches.end());
730ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    match.title_matches.insert(match.title_matches.end(),
731ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                               title_term_matches.begin(),
732ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                               title_term_matches.end());
7333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
7343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
735ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Sort matches by offset and eliminate any which overlap.
736ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  match.url_matches = SortAndDeoverlap(match.url_matches);
737ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  match.title_matches = SortAndDeoverlap(match.title_matches);
738ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
739ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Get partial scores based on term matching. Note that the score for
740ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // each of the URL and title are adjusted by the fraction of the
741ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // terms appearing in each.
742ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int url_score = ScoreComponentForMatches(match.url_matches, url.size()) *
743ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      match.url_matches.size() / terms.size();
744ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int title_score =
745ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      ScoreComponentForMatches(match.title_matches, title.size()) *
746ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      static_cast<int>(match.title_matches.size()) /
747ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      static_cast<int>(terms.size());
748ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Arbitrarily pick the best.
749ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // TODO(mrossetti): It might make sense that a term which appears in both the
750ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // URL and the Title should boost the score a bit.
751ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int term_score = std::max(url_score, title_score);
752ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (term_score == 0)
753ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return match;
754ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
755ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Factor in recency of visit, visit count and typed count attributes of the
756ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // URLRow.
757ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kDaysAgoLevel[] = { 0, 10, 20, 30 };
758ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int score = ScoreForValue((base::Time::Now() -
759ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      row.last_visit()).InDays(), kDaysAgoLevel);
760ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kVisitCountLevel[] = { 30, 10, 5, 3 };
761ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel);
762ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kTypedCountLevel[] = { 10, 5, 3, 1 };
763ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel);
764ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
765ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Determine how many of the factors comprising the final score are
766ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // significant by summing the relative factors for each and subtracting how
767ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // many will be 'discarded' even if they are low.
768ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kVisitCountMultiplier = 2;
769ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kTypedCountMultiplier = 3;
770ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kSignificantFactors =
771ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      kVisitCountMultiplier +  // Visit count factor plus
772ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      kTypedCountMultiplier +  // typed count factor plus
773ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      2 -                      // one each for string match and last visit
774ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      2;                       // minus 2 insignificant factors.
775ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // The following, in effect, discards up to |kSignificantFactors| low scoring
776ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // elements which contribute little to the score but which can inordinately
777ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // drag down an otherwise good score.
778ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  match.raw_score = std::min(kScoreRank[0], (term_score + score +
779ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      (visit_count_value * kVisitCountMultiplier) + (typed_count_value *
780ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      kTypedCountMultiplier)) / kSignificantFactors);
781ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
782ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return match;
783ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen}
784ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
785ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenint InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches,
786ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                               size_t max_length) {
7873345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // TODO(mrossetti): This is good enough for now but must be fine-tuned.
788ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (matches.empty())
789ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    return 0;
790ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
791ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Score component for whether the input terms (if more than one) were found
792ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // in the same order in the match.  Start with kOrderMaxValue points divided
793ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // equally among (number of terms - 1); then discount each of those terms that
794ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // is out-of-order in the match.
795ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kOrderMaxValue = 250;
796ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int order_value = kOrderMaxValue;
797ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  if (matches.size() > 1) {
798ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    int max_possible_out_of_order = matches.size() - 1;
799ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    int out_of_order = 0;
800ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    for (size_t i = 1; i < matches.size(); ++i) {
801ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      if (matches[i - 1].term_num > matches[i].term_num)
802ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        ++out_of_order;
803ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    }
804ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue /
805ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen        max_possible_out_of_order;
8063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
8073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
808ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Score component for how early in the match string the first search term
809ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // appears.  Start with kStartMaxValue points and discount by
810ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // 1/kMaxSignificantStart points for each character later than the first at
811ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // which the term begins. No points are earned if the start of the match
812ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // occurs at or after kMaxSignificantStart.
8133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  const size_t kMaxSignificantStart = 20;
814ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kStartMaxValue = 250;
815ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int start_value = (kMaxSignificantStart -
816ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue /
817ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      kMaxSignificantStart;
818ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
819ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Score component for how much of the matched string the input terms cover.
820ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // kCompleteMaxValue points times the fraction of the URL/page title string
821ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // that was matched.
822ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  size_t term_length_total = std::accumulate(matches.begin(), matches.end(),
823ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen                                             0, AccumulateMatchLength);
824ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kCompleteMaxValue = 500;
825ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int complete_value = term_length_total * kCompleteMaxValue / max_length;
826ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
827ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  int raw_score = order_value + start_value + complete_value;
828ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  const int kTermScoreLevel[] = { 1000, 650, 500, 200 };
829ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen
830ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // Scale the sum of the three components above into a single score component
831ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  // on the same scale as that used in ScoredMatchForURL().
832ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  return ScoreForValue(raw_score, kTermScoreLevel);
8333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
8343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
835731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickInMemoryURLIndex::AddHistoryMatch::AddHistoryMatch(
836731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    const InMemoryURLIndex& index,
837731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    const String16Vector& lower_terms)
838ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen  : index_(index),
839ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    lower_terms_(lower_terms) {
840731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}
841731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
842731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickInMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {}
843731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
8443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::AddHistoryMatch::operator()(
8453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const InMemoryURLIndex::HistoryID history_id) {
8463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  HistoryInfoMap::const_iterator hist_pos =
8473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick      index_.history_info_map_.find(history_id);
848dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // Note that a history_id may be present in the word_id_history_map_ yet not
849dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // be found in the history_info_map_. This occurs when an item has been
850dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  // deleted by the user or the item no longer qualifies as a quick result.
8513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (hist_pos != index_.history_info_map_.end()) {
8523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const URLRow& hist_item = hist_pos->second;
853ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_));
854ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen    if (match.raw_score > 0)
855ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen      scored_matches_.push_back(match);
8563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  }
8573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
8583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
859dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) {
860dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (history_dir_.empty())
861dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
862dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache"));
863dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
864dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
865dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
866dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const {
867dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  DCHECK(cache);
868dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  cache->set_timestamp(base::Time::Now().ToInternalValue());
869dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  cache->set_history_item_count(history_item_count_);
870dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SaveWordList(cache);
871dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SaveWordMap(cache);
872dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SaveCharWordMap(cache);
873dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SaveWordIDHistoryMap(cache);
874dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  SaveHistoryInfoMap(cache);
875dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
876dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
877dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestorePrivateData(
878dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const InMemoryURLIndexCacheItem& cache) {
879dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  last_saved_ = base::Time::FromInternalValue(cache.timestamp());
880dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  history_item_count_ = cache.history_item_count();
881dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return (history_item_count_ == 0) || (RestoreWordList(cache) &&
882dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      RestoreWordMap(cache) && RestoreCharWordMap(cache) &&
883dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache));
884dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
885dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
886dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
887dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const {
888dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (word_list_.empty())
889dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return;
890dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  WordListItem* list_item = cache->mutable_word_list();
891dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  list_item->set_word_count(word_list_.size());
892dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (String16Vector::const_iterator iter = word_list_.begin();
893dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != word_list_.end(); ++iter)
894dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    list_item->add_word(UTF16ToUTF8(*iter));
895dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
896dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
897dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) {
898dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!cache.has_word_list())
899dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
900dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const WordListItem& list_item(cache.word_list());
901dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 expected_item_count = list_item.word_count();
902dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 actual_item_count = list_item.word_size();
903dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (actual_item_count == 0 || actual_item_count != expected_item_count)
904dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
905dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const RepeatedPtrField<std::string>& words(list_item.word());
906dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (RepeatedPtrField<std::string>::const_iterator iter = words.begin();
907dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != words.end(); ++iter)
908dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    word_list_.push_back(UTF8ToUTF16(*iter));
909dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
910dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
911dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
912dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const {
913dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (word_map_.empty())
914dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return;
915dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  WordMapItem* map_item = cache->mutable_word_map();
916dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  map_item->set_item_count(word_map_.size());
917dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (WordMap::const_iterator iter = word_map_.begin();
918dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != word_map_.end(); ++iter) {
919dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordMapEntry* map_entry = map_item->add_word_map_entry();
920dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_word(UTF16ToUTF8(iter->first));
921dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_word_id(iter->second);
922dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
923dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
924dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
925dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) {
926dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!cache.has_word_map())
927dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
928dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const WordMapItem& list_item(cache.word_map());
929dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 expected_item_count = list_item.item_count();
930dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 actual_item_count = list_item.word_map_entry_size();
931dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (actual_item_count == 0 || actual_item_count != expected_item_count)
932dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
933dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry());
934dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin();
935dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != entries.end(); ++iter)
936dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    word_map_[UTF8ToUTF16(iter->word())] = iter->word_id();
937dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
938dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
939dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
940dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const {
941dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (char_word_map_.empty())
942dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return;
943dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  CharWordMapItem* map_item = cache->mutable_char_word_map();
944dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  map_item->set_item_count(char_word_map_.size());
945dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (CharWordIDMap::const_iterator iter = char_word_map_.begin();
946dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != char_word_map_.end(); ++iter) {
947dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    CharWordMapEntry* map_entry = map_item->add_char_word_map_entry();
948dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_char_16(iter->first);
949dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const WordIDSet& word_id_set(iter->second);
950dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_item_count(word_id_set.size());
951dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    for (WordIDSet::const_iterator set_iter = word_id_set.begin();
952dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen         set_iter != word_id_set.end(); ++set_iter)
953dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      map_entry->add_word_id(*set_iter);
954dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
955dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
956dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
957dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreCharWordMap(
958dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const InMemoryURLIndexCacheItem& cache) {
959dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!cache.has_char_word_map())
960dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
961dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const CharWordMapItem& list_item(cache.char_word_map());
962dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 expected_item_count = list_item.item_count();
963dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 actual_item_count = list_item.char_word_map_entry_size();
964dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (actual_item_count == 0 || actual_item_count != expected_item_count)
965dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
966dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const RepeatedPtrField<CharWordMapEntry>&
967dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      entries(list_item.char_word_map_entry());
968dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter =
969dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       entries.begin(); iter != entries.end(); ++iter) {
970dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    expected_item_count = iter->item_count();
971dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    actual_item_count = iter->word_id_size();
972dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (actual_item_count == 0 || actual_item_count != expected_item_count)
973dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      return false;
974dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    char16 uni_char = static_cast<char16>(iter->char_16());
975dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordIDSet word_id_set;
976dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const RepeatedField<int32>& word_ids(iter->word_id());
977dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    for (RepeatedField<int32>::const_iterator jiter = word_ids.begin();
978dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen         jiter != word_ids.end(); ++jiter)
979dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      word_id_set.insert(*jiter);
980dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    char_word_map_[uni_char] = word_id_set;
981dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
982dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
983dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
984dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
985dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache)
986dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const {
987dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (word_id_history_map_.empty())
988dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return;
989dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map();
990dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  map_item->set_item_count(word_id_history_map_.size());
991dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin();
992dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != word_id_history_map_.end(); ++iter) {
993dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordIDHistoryMapEntry* map_entry =
994dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen        map_item->add_word_id_history_map_entry();
995dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_word_id(iter->first);
996dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const HistoryIDSet& history_id_set(iter->second);
997dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_item_count(history_id_set.size());
998dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    for (HistoryIDSet::const_iterator set_iter = history_id_set.begin();
999dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen         set_iter != history_id_set.end(); ++set_iter)
1000dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      map_entry->add_history_id(*set_iter);
1001dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
1002dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
1003dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
1004dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreWordIDHistoryMap(
1005dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const InMemoryURLIndexCacheItem& cache) {
1006dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!cache.has_word_id_history_map())
1007dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
1008dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const WordIDHistoryMapItem& list_item(cache.word_id_history_map());
1009dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 expected_item_count = list_item.item_count();
1010dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 actual_item_count = list_item.word_id_history_map_entry_size();
1011dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (actual_item_count == 0 || actual_item_count != expected_item_count)
1012dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
1013dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const RepeatedPtrField<WordIDHistoryMapEntry>&
1014dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      entries(list_item.word_id_history_map_entry());
1015dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter =
1016dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       entries.begin(); iter != entries.end(); ++iter) {
1017dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    expected_item_count = iter->item_count();
1018dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    actual_item_count = iter->history_id_size();
1019dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (actual_item_count == 0 || actual_item_count != expected_item_count)
1020dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      return false;
1021dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    WordID word_id = iter->word_id();
1022dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    HistoryIDSet history_id_set;
1023dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const RepeatedField<int64>& history_ids(iter->history_id());
1024dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    for (RepeatedField<int64>::const_iterator jiter = history_ids.begin();
1025dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen         jiter != history_ids.end(); ++jiter)
1026dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      history_id_set.insert(*jiter);
1027dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    word_id_history_map_[word_id] = history_id_set;
1028dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
1029dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
1030dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
1031dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
1032dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveHistoryInfoMap(
1033dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    InMemoryURLIndexCacheItem* cache) const {
1034dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (history_info_map_.empty())
1035dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return;
1036dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  HistoryInfoMapItem* map_item = cache->mutable_history_info_map();
1037dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  map_item->set_item_count(history_info_map_.size());
1038dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (HistoryInfoMap::const_iterator iter = history_info_map_.begin();
1039dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       iter != history_info_map_.end(); ++iter) {
1040dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry();
1041dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_history_id(iter->first);
1042dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const URLRow& url_row(iter->second);
1043dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // Note: We only save information that contributes to the index so there
1044dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // is no need to save term_char_word_set_cache_ (not persistent),
1045dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    // languages_, etc.
1046dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_visit_count(url_row.visit_count());
1047dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_typed_count(url_row.typed_count());
1048dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_last_visit(url_row.last_visit().ToInternalValue());
1049dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_url(url_row.url().spec());
1050dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    map_entry->set_title(UTF16ToUTF8(url_row.title()));
1051dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
1052dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
1053dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
1054dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreHistoryInfoMap(
1055dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    const InMemoryURLIndexCacheItem& cache) {
1056dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (!cache.has_history_info_map())
1057dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
1058dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const HistoryInfoMapItem& list_item(cache.history_info_map());
1059dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 expected_item_count = list_item.item_count();
1060dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  uint32 actual_item_count = list_item.history_info_map_entry_size();
1061dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (actual_item_count == 0 || actual_item_count != expected_item_count)
1062dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    return false;
1063dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  const RepeatedPtrField<HistoryInfoMapEntry>&
1064dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      entries(list_item.history_info_map_entry());
1065dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter =
1066dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen       entries.begin(); iter != entries.end(); ++iter) {
1067dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    HistoryID history_id = iter->history_id();
1068dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    GURL url(iter->url());
1069dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    URLRow url_row(url, history_id);
1070dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    url_row.set_visit_count(iter->visit_count());
1071dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    url_row.set_typed_count(iter->typed_count());
1072dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit()));
1073dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    if (iter->has_title()) {
1074dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      string16 title(UTF8ToUTF16(iter->title()));
1075dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen      url_row.set_title(title);
1076dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    }
1077dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen    history_info_map_[history_id] = url_row;
1078dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  }
1079dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  return true;
1080dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen}
1081dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen
1082c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace history
1083