1dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/in_memory_url_index.h" 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <algorithm> 872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include <iterator> 93345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <limits> 10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include <numeric> 113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 12dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "base/file_util.h" 1321d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "base/i18n/break_iterator.h" 14dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "base/metrics/histogram.h" 153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string_util.h" 163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/time.h" 173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/utf_string_conversions.h" 18ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/browser/autocomplete/autocomplete.h" 193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/autocomplete/history_provider_util.h" 203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "chrome/browser/history/url_database.h" 21dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "chrome/browser/profiles/profile.h" 22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "chrome/common/url_constants.h" 23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "googleurl/src/url_util.h" 243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "net/base/escape.h" 253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "net/base/net_util.h" 26dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen#include "third_party/protobuf/src/google/protobuf/repeated_field.h" 2772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "ui/base/l10n/l10n_util.h" 283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 29dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenusing google::protobuf::RepeatedField; 30dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenusing google::protobuf::RepeatedPtrField; 31dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenusing in_memory_url_index::InMemoryURLIndexCacheItem; 32ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace history { 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 35dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordListItem WordListItem; 36dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordMapItem_WordMapEntry WordMapEntry; 37dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordMapItem WordMapItem; 38dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_CharWordMapItem CharWordMapItem; 39dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_CharWordMapItem_CharWordMapEntry 40dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen CharWordMapEntry; 41dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_WordIDHistoryMapItem 42dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDHistoryMapItem; 43dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui:: 44dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen InMemoryURLIndexCacheItem_WordIDHistoryMapItem_WordIDHistoryMapEntry 45dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDHistoryMapEntry; 46dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem HistoryInfoMapItem; 47dc0f95d653279beabeb9817299e2902918ba123eKristian Monsentypedef imui::InMemoryURLIndexCacheItem_HistoryInfoMapItem_HistoryInfoMapEntry 48dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen HistoryInfoMapEntry; 49dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 50dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenconst size_t InMemoryURLIndex::kNoCachedResultForTerm = -1; 51dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 52ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Score ranges used to get a 'base' score for each of the scoring factors 53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// (such as recency of last visit, times visited, times the URL was typed, 54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// and the quality of the string match). There is a matching value range for 55ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// each of these scores for each factor. 56ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenconst int kScoreRank[] = { 1425, 1200, 900, 400 }; 57ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 58ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch::ScoredHistoryMatch() 59ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen : raw_score(0), 60ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen prefix_adjust(0) {} 61ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 62ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch::ScoredHistoryMatch(const URLRow& url_info) 63ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen : HistoryMatch(url_info, 0, false, false), 64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen raw_score(0), 65ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen prefix_adjust(0) {} 663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch::~ScoredHistoryMatch() {} 68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Comparison function for sorting ScoredMatches by their scores. 70ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool ScoredHistoryMatch::MatchScoreGreater(const ScoredHistoryMatch& m1, 71ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const ScoredHistoryMatch& m2) { 72ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return m1.raw_score >= m2.raw_score; 733345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 75731df977c0511bca2206b5f333555b1205ff1f43Iain Merrickstruct InMemoryURLIndex::TermCharWordSet { 76dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen TermCharWordSet() // Required for STL resize(). 77dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen : char_(0), 78dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set_(), 79dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen used_(false) {} 80dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen TermCharWordSet(const char16& uni_char, 81513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch const WordIDSet& word_id_set, 82513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch bool used) 83dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen : char_(uni_char), 84731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick word_id_set_(word_id_set), 85731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick used_(used) {} 86731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 87dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Predicate for STL algorithm use. 88dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen bool is_not_used() const { return !used_; } 89731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 90dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char16 char_; 91731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick WordIDSet word_id_set_; 92731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick bool used_; // true if this set has been used for the current term search. 93731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick}; 94731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 95ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Comparison function for sorting TermMatches by their offsets. 96ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool MatchOffsetLess(const TermMatch& m1, const TermMatch& m2) { 97ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return m1.offset < m2.offset; 98ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 99ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 100ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// std::accumulate helper function to add up TermMatches' lengths. 101ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenint AccumulateMatchLength(int total, const TermMatch& match) { 102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return total + match.length; 103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Converts a raw value for some particular scoring factor into a score 106ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// component for that factor. The conversion function is piecewise linear, with 107ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// input values provided in |value_ranks| and resulting output scores from 108ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// |kScoreRank| (mathematically, f(value_rank[i]) = kScoreRank[i]). A score 109ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// cannot be higher than kScoreRank[0], and drops directly to 0 if lower than 110ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// kScoreRank[3]. 111ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// 112ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// For example, take |value| == 70 and |value_ranks| == { 100, 50, 30, 10 }. 113ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Because 70 falls between ranks 0 (100) and 1 (50), the score is given by the 114ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// linear function: 115ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// score = m * value + b, where 116ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// m = (kScoreRank[0] - kScoreRank[1]) / (value_ranks[0] - value_ranks[1]) 117ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// b = value_ranks[1] 118ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Any value higher than 100 would be scored as if it were 100, and any value 119ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// lower than 10 scored 0. 120ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenint ScoreForValue(int value, const int* value_ranks) { 121ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int i = 0; 122ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int rank_count = arraysize(kScoreRank); 123ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen while ((i < rank_count) && ((value_ranks[0] < value_ranks[1]) ? 124ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen (value > value_ranks[i]) : (value < value_ranks[i]))) 125ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ++i; 126ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (i >= rank_count) 127ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return 0; 128ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int score = kScoreRank[i]; 129ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (i > 0) { 130ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen score += (value - value_ranks[i]) * 131ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen (kScoreRank[i - 1] - kScoreRank[i]) / 132ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen (value_ranks[i - 1] - value_ranks[i]); 133ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 134ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return score; 135ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 136ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 137dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::InMemoryURLIndex(const FilePath& history_dir) 138dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen : history_dir_(history_dir), 139dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_item_count_(0) { 140dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 142dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// Called only by unit tests. 143dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::InMemoryURLIndex() 144dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen : history_item_count_(0) { 145dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 147dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::~InMemoryURLIndex() {} 1483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Indexing 1503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickbool InMemoryURLIndex::Init(history::URLDatabase* history_db, 1523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const std::string& languages) { 1533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // TODO(mrossetti): Register for profile/language change notifications. 1543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick languages_ = languages; 155dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return ReloadFromHistory(history_db, false); 156dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 157dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 158dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::ShutDown() { 159dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Write our cache. 160dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveToCacheFile(); 1613345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 1623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 163513209b27ff55e2841eac0e4120199c23acce758Ben Murdochbool InMemoryURLIndex::IndexRow(const URLRow& row) { 1643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const GURL& gurl(row.url()); 1653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string16 url(net::FormatUrl(gurl, languages_, 1663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick net::kFormatUrlOmitUsernamePassword, 1673345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 1683345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick NULL, NULL, NULL)); 1693345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1703345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryID history_id = static_cast<HistoryID>(row.id()); 171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_LT(row.id(), std::numeric_limits<HistoryID>::max()); 1723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1733345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Add the row for quick lookup in the history info store. 1743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick URLRow new_row(GURL(url), row.id()); 1753345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick new_row.set_visit_count(row.visit_count()); 1763345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick new_row.set_typed_count(row.typed_count()); 1773345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick new_row.set_last_visit(row.last_visit()); 1783345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick new_row.set_title(row.title()); 179dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_info_map_[history_id] = new_row; 1803345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Split URL into individual, unique words then add in the title words. 182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen url = l10n_util::ToLower(url); 183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen String16Set url_words = WordSetFromString16(url); 184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen String16Set title_words = WordSetFromString16(row.title()); 185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen String16Set words; 186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::set_union(url_words.begin(), url_words.end(), 187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen title_words.begin(), title_words.end(), 188ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::insert_iterator<String16Set>(words, words.begin())); 189ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (String16Set::iterator word_iter = words.begin(); 190ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen word_iter != words.end(); ++word_iter) 191ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AddWordToIndex(*word_iter, history_id); 1923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 1933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ++history_item_count_; 1943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return true; 1953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 1963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 197dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::ReloadFromHistory(history::URLDatabase* history_db, 198dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen bool clear_cache) { 199dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen ClearPrivateData(); 200dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 201dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!history_db) 202dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 203dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 204dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (clear_cache || !RestoreFromCacheFile()) { 205dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen base::TimeTicks beginning_time = base::TimeTicks::Now(); 206dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // The index has to be built from scratch. 207dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen URLDatabase::URLEnumerator history_enum; 208dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!history_db->InitURLEnumeratorForSignificant(&history_enum)) 209dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 210dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen URLRow row; 211dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen while (history_enum.GetNextURL(&row)) { 212dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!IndexRow(row)) 213dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 214dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 215dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexingTime", 216dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen base::TimeTicks::Now() - beginning_time); 217dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveToCacheFile(); 218dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 219dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 220dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 221dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 222dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::ClearPrivateData() { 223dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_item_count_ = 0; 224dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_list_.clear(); 225dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_map_.clear(); 226dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char_word_map_.clear(); 227dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_history_map_.clear(); 228dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen term_char_word_set_cache_.clear(); 229dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_info_map_.clear(); 230dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 231dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 232dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreFromCacheFile() { 233dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // TODO(mrossetti): Figure out how to determine if the cache is up-to-date. 234dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // That is: ensure that the database has not been modified since the cache 235dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // was last saved. DB file modification date is inadequate. There are no 236dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // SQLite table checksums automatically stored. 237dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen base::TimeTicks beginning_time = base::TimeTicks::Now(); 238dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen FilePath file_path; 239ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!GetCacheFilePath(&file_path) || !file_util::PathExists(file_path)) 240dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 241dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen std::string data; 242dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!file_util::ReadFileToString(file_path, &data)) { 243dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen LOG(WARNING) << "Failed to read InMemoryURLIndex cache from " 244dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen << file_path.value(); 245dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 246dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 247dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 248dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen InMemoryURLIndexCacheItem index_cache; 249dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!index_cache.ParseFromArray(data.c_str(), data.size())) { 250dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen LOG(WARNING) << "Failed to parse InMemoryURLIndex cache data read from " 251dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen << file_path.value(); 252dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 253dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 254dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 255dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!RestorePrivateData(index_cache)) { 256dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen ClearPrivateData(); // Back to square one -- must build from scratch. 257dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 258dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 259dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 260dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexRestoreCacheTime", 261dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen base::TimeTicks::Now() - beginning_time); 262dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_COUNTS("History.InMemoryURLHistoryItems", history_item_count_); 263dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_COUNTS("History.InMemoryURLCacheSize", data.size()); 264dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLWords", word_map_.size()); 265dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_COUNTS_10000("History.InMemoryURLChars", char_word_map_.size()); 266dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 267dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 268dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 269dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::SaveToCacheFile() { 270dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen base::TimeTicks beginning_time = base::TimeTicks::Now(); 271dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen InMemoryURLIndexCacheItem index_cache; 272dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SavePrivateData(&index_cache); 273dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen std::string data; 274dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!index_cache.SerializeToString(&data)) { 275dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen LOG(WARNING) << "Failed to serialize the InMemoryURLIndex cache."; 276dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 277dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 278dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 279dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Write the cache to a file then swap it for the old cache, if any, and 280dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // delete the old cache. 281dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen FilePath file_path; 282dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!GetCacheFilePath(&file_path)) 283dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 284dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen file_util::ScopedFILE file(file_util::OpenFile(file_path, "w")); 285dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!file.get()) 286dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 287dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 288dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen int size = data.size(); 289dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (file_util::WriteFile(file_path, data.c_str(), size) != size) { 290dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen LOG(WARNING) << "Failed to write " << file_path.value(); 291dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 292dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 293dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen UMA_HISTOGRAM_TIMES("History.InMemoryURLIndexSaveCacheTime", 294dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen base::TimeTicks::Now() - beginning_time); 295dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 296dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 297dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 298dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::UpdateURL(URLID row_id, const URLRow& row) { 299dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // The row may or may not already be in our index. If it is not already 300dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // indexed and it qualifies then it gets indexed. If it is already 301dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // indexed and still qualifies then it gets updated, otherwise it 302dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // is deleted from the index. 303dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen HistoryInfoMap::iterator row_pos = history_info_map_.find(row_id); 304dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (row_pos == history_info_map_.end()) { 305dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // This new row should be indexed if it qualifies. 306dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (RowQualifiesAsSignificant(row, base::Time())) 307dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen IndexRow(row); 308dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } else if (RowQualifiesAsSignificant(row, base::Time())) { 309dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // This indexed row still qualifies and will be re-indexed. 310dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // The url won't have changed but the title, visit count, etc. 311dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // might have changed. 312dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen URLRow& old_row = row_pos->second; 313dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen old_row.set_visit_count(row.visit_count()); 314dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen old_row.set_typed_count(row.typed_count()); 315dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen old_row.set_last_visit(row.last_visit()); 316dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // TODO(mrossetti): When we start indexing the title the next line 317dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // will need attention. 318dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen old_row.set_title(row.title()); 319dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } else { 320dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // This indexed row no longer qualifies and will be de-indexed. 321dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_info_map_.erase(row_id); 322dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 323dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // This invalidates the cache. 324dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen term_char_word_set_cache_.clear(); 325dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // TODO(mrossetti): Record this transaction in the cache. 326dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 327dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 328dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::DeleteURL(URLID row_id) { 329dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Note that this does not remove any reference to this row from the 330dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // word_id_history_map_. That map will continue to contain (and return) 331dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // hits against this row until that map is rebuilt, but since the 332dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // history_info_map_ no longer references the row no erroneous results 333dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // will propagate to the user. 334dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_info_map_.erase(row_id); 335dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // This invalidates the word cache. 336dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen term_char_word_set_cache_.clear(); 337dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // TODO(mrossetti): Record this transaction in the cache. 338dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 339dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 3403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Searching 3413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 3423345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickScoredHistoryMatches InMemoryURLIndex::HistoryItemsForTerms( 3433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const String16Vector& terms) { 3443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ScoredHistoryMatches scored_items; 3453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (!terms.empty()) { 3463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Reset used_ flags for term_char_word_set_cache_. We use a basic mark- 3473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // and-sweep approach. 3483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick ResetTermCharWordSetCache(); 3493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 3503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Lowercase the terms. 3513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // TODO(mrossetti): Another opportunity for a transform algorithm. 3523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick String16Vector lower_terms; 3533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (String16Vector::const_iterator term_iter = terms.begin(); 354ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen term_iter != terms.end(); ++term_iter) 355ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen lower_terms.push_back(l10n_util::ToLower(*term_iter)); 3563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 3573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick String16Vector::value_type all_terms(JoinString(lower_terms, ' ')); 3583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet history_id_set = HistoryIDSetFromWords(all_terms); 3593345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 360ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Don't perform any scoring (and don't return any matches) if the 361ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // candidate pool is large. (See comments in header.) 362ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t kItemsToScoreLimit = 500; 363ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (history_id_set.size() <= kItemsToScoreLimit) { 364ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Pass over all of the candidates filtering out any without a proper 365ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // substring match, inserting those which pass in order by score. 366ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen scored_items = std::for_each(history_id_set.begin(), history_id_set.end(), 367ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AddHistoryMatch(*this, lower_terms)).ScoredMatches(); 368ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 369ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Select and sort only the top kMaxMatches results. 370ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (scored_items.size() > AutocompleteProvider::kMaxMatches) { 371ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::partial_sort(scored_items.begin(), 372ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen scored_items.begin() + 373ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen AutocompleteProvider::kMaxMatches, 374ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen scored_items.end(), 375ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ScoredHistoryMatch::MatchScoreGreater); 376ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen scored_items.resize(AutocompleteProvider::kMaxMatches); 377ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } else { 378ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::sort(scored_items.begin(), scored_items.end(), 379ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ScoredHistoryMatch::MatchScoreGreater); 380ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 381ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 3823345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 3833345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 3843345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Remove any stale TermCharWordSet's. 3853345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick term_char_word_set_cache_.erase( 3863345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::remove_if(term_char_word_set_cache_.begin(), 3873345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick term_char_word_set_cache_.end(), 388dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen std::mem_fun_ref(&TermCharWordSet::is_not_used)), 3893345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick term_char_word_set_cache_.end()); 3903345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return scored_items; 3913345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 3923345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 3933345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::ResetTermCharWordSetCache() { 3943345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // TODO(mrossetti): Consider keeping more of the cache around for possible 3953345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // repeat searches, except a 'shortcuts' approach might be better for that. 3963345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // TODO(mrossetti): Change TermCharWordSet to a class and use for_each. 3973345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (TermCharWordSetVector::iterator iter = 3983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick term_char_word_set_cache_.begin(); 3993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick iter != term_char_word_set_cache_.end(); ++iter) 4003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick iter->used_ = false; 4013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 4023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 4033345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDSetFromWords( 4043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string16& uni_string) { 4053345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Break the terms down into individual terms (words), get the candidate 4063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // set for each term, and intersect each to get a final candidate list. 4073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Note that a single 'term' from the user's perspective might be 4083345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // a string like "http://www.somewebsite.com" which, from our perspective, 4093345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // is four words: 'http', 'www', 'somewebsite', and 'com'. 4103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet history_id_set; 4113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick String16Set words = WordSetFromString16(uni_string); 4123345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick bool first_word = true; 4133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (String16Set::iterator iter = words.begin(); 4143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick iter != words.end(); ++iter) { 4153345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick String16Set::value_type uni_word = *iter; 4163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet term_history_id_set = 4173345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick InMemoryURLIndex::HistoryIDsForTerm(uni_word); 4183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (first_word) { 4193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick history_id_set.swap(term_history_id_set); 4203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick first_word = false; 4213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } else { 4223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet old_history_id_set(history_id_set); 4233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick history_id_set.clear(); 4243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::set_intersection(old_history_id_set.begin(), 4253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick old_history_id_set.end(), 4263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick term_history_id_set.begin(), 4273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick term_history_id_set.end(), 4283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::inserter(history_id_set, 4293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick history_id_set.begin())); 4303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 4313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 4323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return history_id_set; 4333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 4343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 4353345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::HistoryIDSet InMemoryURLIndex::HistoryIDsForTerm( 4363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string16& uni_word) { 4373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick InMemoryURLIndex::HistoryIDSet history_id_set; 4383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 439dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // For each unique character in the word, in order of first appearance, get 440dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // the char/word_id map entry and intersect with the set in an incremental 441dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // manner. 442dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen Char16Vector uni_chars = Char16VectorFromString16(uni_word); 4433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordIDSet word_id_set(WordIDSetForTermChars(uni_chars)); 4443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 445ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // TODO(mrossetti): At this point, as a possible optimization, we could 446ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // scan through all candidate words and make sure the |uni_word| is a 447ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // substring within the candidate words, eliminating those which aren't. 448ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // I'm not sure it would be worth the effort. And remember, we've got to do 449ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // a final substring match in order to get the highlighting ranges later 450ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // in the process in any case. 451ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 4523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // If any words resulted then we can compose a set of history IDs by unioning 4533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // the sets from each word. 4543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (!word_id_set.empty()) { 4553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (WordIDSet::iterator word_id_iter = word_id_set.begin(); 4563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick word_id_iter != word_id_set.end(); ++word_id_iter) { 4573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordID word_id = *word_id_iter; 4583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordIDHistoryMap::iterator word_iter = word_id_history_map_.find(word_id); 4593345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (word_iter != word_id_history_map_.end()) { 4603345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet& word_history_id_set(word_iter->second); 4613345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick history_id_set.insert(word_history_id_set.begin(), 4623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick word_history_id_set.end()); 4633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 4643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 4653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 4663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 4673345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return history_id_set; 4683345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 4693345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 4703345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Utility Functions 4713345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 4723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// static 4733345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::String16Set InMemoryURLIndex::WordSetFromString16( 4743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string16& uni_string) { 475ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen String16Vector words = WordVectorFromString16(uni_string, false); 476ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen String16Set word_set; 477ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (String16Vector::const_iterator iter = words.begin(); iter != words.end(); 478ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ++iter) 479ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen word_set.insert(l10n_util::ToLower(*iter)); 480ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return word_set; 481ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 482ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 483ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static 484ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenInMemoryURLIndex::String16Vector InMemoryURLIndex::WordVectorFromString16( 485ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const string16& uni_string, 486ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen bool break_on_space) { 487ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen base::BreakIterator iter(&uni_string, break_on_space ? 488ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen base::BreakIterator::BREAK_SPACE : base::BreakIterator::BREAK_WORD); 489ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen String16Vector words; 490ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!iter.Init()) 491ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return words; 492ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen while (iter.Advance()) { 493ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (break_on_space || iter.IsWord()) { 494ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen string16 word = iter.GetString(); 495ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (break_on_space) 496ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TrimWhitespace(word, TRIM_ALL, &word); 497ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!word.empty()) 498ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen words.push_back(word); 4993345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 5003345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 5013345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return words; 5023345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 5033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 5043345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// static 505dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::Char16Vector InMemoryURLIndex::Char16VectorFromString16( 506dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const string16& uni_word) { 507dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen InMemoryURLIndex::Char16Vector characters; 508dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen InMemoryURLIndex::Char16Set unique_characters; 509dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (string16::const_iterator iter = uni_word.begin(); 510dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != uni_word.end(); ++iter) { 511dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!unique_characters.count(*iter)) { 512dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen unique_characters.insert(*iter); 513dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen characters.push_back(*iter); 514dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 515dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 516dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return characters; 517dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 518dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 519dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen// static 520dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenInMemoryURLIndex::Char16Set InMemoryURLIndex::Char16SetFromString16( 5213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string16& uni_word) { 5223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick Char16Set characters; 5233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (string16::const_iterator iter = uni_word.begin(); 5243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick iter != uni_word.end(); ++iter) 5253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick characters.insert(*iter); 5263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return characters; 5273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 5283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 5293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::AddWordToIndex(const string16& uni_word, 5303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryID history_id) { 5313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordMap::iterator word_pos = word_map_.find(uni_word); 5323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (word_pos != word_map_.end()) 5333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick UpdateWordHistory(word_pos->second, history_id); 5343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick else 5353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick AddWordHistory(uni_word, history_id); 5363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 5373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 5383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::UpdateWordHistory(WordID word_id, HistoryID history_id) { 5393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordIDHistoryMap::iterator history_pos = word_id_history_map_.find(word_id); 5403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick DCHECK(history_pos != word_id_history_map_.end()); 5413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet& history_id_set(history_pos->second); 5423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick history_id_set.insert(history_id); 5433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 5443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 5453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Add a new word to the word list and the word map, and then create a 5463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// new entry in the word/history map. 5473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::AddWordHistory(const string16& uni_word, 5483345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryID history_id) { 5493345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick word_list_.push_back(uni_word); 5503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordID word_id = word_list_.size() - 1; 551dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_map_[uni_word] = word_id; 5523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryIDSet history_id_set; 5533345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick history_id_set.insert(history_id); 554dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_history_map_[word_id] = history_id_set; 5553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // For each character in the newly added word (i.e. a word that is not 5563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // already in the word index), add the word to the character index. 557dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen Char16Set characters = Char16SetFromString16(uni_word); 5583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (Char16Set::iterator uni_char_iter = characters.begin(); 5593345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick uni_char_iter != characters.end(); ++uni_char_iter) { 560dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen Char16Set::value_type uni_char = *uni_char_iter; 561dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); 5623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (char_iter != char_word_map_.end()) { 5633345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Update existing entry in the char/word index. 5643345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordIDSet& word_id_set(char_iter->second); 5653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick word_id_set.insert(word_id); 5663345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } else { 5673345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Create a new entry in the char/word index. 5683345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordIDSet word_id_set; 5693345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick word_id_set.insert(word_id); 570dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char_word_map_[uni_char] = word_id_set; 5713345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 5723345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 5733345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 5743345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 5753345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickInMemoryURLIndex::WordIDSet InMemoryURLIndex::WordIDSetForTermChars( 576dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const InMemoryURLIndex::Char16Vector& uni_chars) { 577dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen size_t index = CachedResultsIndexForTerm(uni_chars); 578dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 579dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // If there were no unprocessed characters in the search term |uni_chars| 580dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // then we can use the cached one as-is as the results with no further 581dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // filtering. 582dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (index != kNoCachedResultForTerm && index == uni_chars.size() - 1) 583dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return term_char_word_set_cache_[index].word_id_set_; 584dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 585dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Some or all of the characters remain to be indexed so trim the cache. 586dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (index + 1 < term_char_word_set_cache_.size()) 587dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen term_char_word_set_cache_.resize(index + 1); 5883345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick WordIDSet word_id_set; 589dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Take advantage of our cached starting point, if any. 590dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen Char16Vector::const_iterator c_iter = uni_chars.begin(); 591dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (index != kNoCachedResultForTerm) { 592dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set = term_char_word_set_cache_[index].word_id_set_; 593dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen c_iter += index + 1; 594dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 595dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Now process the remaining characters in the search term. 596dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (; c_iter != uni_chars.end(); ++c_iter) { 597dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen Char16Vector::value_type uni_char = *c_iter; 598dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen CharWordIDMap::iterator char_iter = char_word_map_.find(uni_char); 599dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (char_iter == char_word_map_.end()) { 600dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // A character was not found so there are no matching results: bail. 601dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set.clear(); 602dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen break; 6033345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 604dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDSet& char_word_id_set(char_iter->second); 605dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // It is possible for there to no longer be any words associated with 606dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // a particular character. Give up in that case. 607dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (char_word_id_set.empty()) { 608dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set.clear(); 609dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen break; 6103345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 611dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 612dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (word_id_set.empty()) { 613dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set = char_word_id_set; 614dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } else { 615dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDSet old_word_id_set(word_id_set); 616dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set.clear(); 617dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen std::set_intersection(old_word_id_set.begin(), 618dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen old_word_id_set.end(), 619dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char_word_id_set.begin(), 620dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char_word_id_set.end(), 621dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen std::inserter(word_id_set, 622dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set.begin())); 6233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 624dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Add this new char/set instance to the cache. 625dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen term_char_word_set_cache_.push_back(TermCharWordSet( 626dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uni_char, word_id_set, true)); 6273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 6283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return word_id_set; 6293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 6303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 631dc0f95d653279beabeb9817299e2902918ba123eKristian Monsensize_t InMemoryURLIndex::CachedResultsIndexForTerm( 632dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const InMemoryURLIndex::Char16Vector& uni_chars) { 633dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen TermCharWordSetVector::iterator t_iter = term_char_word_set_cache_.begin(); 634dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (Char16Vector::const_iterator c_iter = uni_chars.begin(); 635dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen (c_iter != uni_chars.end()) && 636dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen (t_iter != term_char_word_set_cache_.end()) && 637dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen (*c_iter == t_iter->char_); 638dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen ++c_iter, ++t_iter) 639dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen t_iter->used_ = true; // Update the cache. 640dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return t_iter - term_char_word_set_cache_.begin() - 1; 641dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 642dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 6433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// static 644ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTermMatches InMemoryURLIndex::MatchTermInString(const string16& term, 645ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const string16& string, 646ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int term_num) { 647ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatches matches; 648ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (size_t location = string.find(term); location != string16::npos; 649ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen location = string.find(term, location + 1)) 650ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen matches.push_back(TermMatch(term_num, location, term.size())); 651ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return matches; 652ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 653ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 654ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static 655ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTermMatches InMemoryURLIndex::SortAndDeoverlap(const TermMatches& matches) { 656ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (matches.empty()) 657ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return matches; 658ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatches sorted_matches = matches; 659ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::sort(sorted_matches.begin(), sorted_matches.end(), 660ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MatchOffsetLess); 661ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatches clean_matches; 662ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatch last_match = sorted_matches[0]; 663ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen clean_matches.push_back(last_match); 664ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (TermMatches::const_iterator iter = sorted_matches.begin() + 1; 665ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen iter != sorted_matches.end(); ++iter) { 666ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (iter->offset >= last_match.offset + last_match.length) { 667ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen last_match = *iter; 668ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen clean_matches.push_back(last_match); 669ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 670ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 671ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return clean_matches; 672ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 673ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 674ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static 675ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstd::vector<size_t> InMemoryURLIndex::OffsetsFromTermMatches( 676ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const TermMatches& matches) { 677ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t> offsets; 678ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (TermMatches::const_iterator i = matches.begin(); i != matches.end(); ++i) 679ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen offsets.push_back(i->offset); 680ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return offsets; 681ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 682ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 683ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static 684ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenTermMatches InMemoryURLIndex::ReplaceOffsetsInTermMatches( 685ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const TermMatches& matches, 686ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const std::vector<size_t>& offsets) { 687ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_EQ(matches.size(), offsets.size()); 688ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatches new_matches; 689ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<size_t>::const_iterator offset_iter = offsets.begin(); 690ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (TermMatches::const_iterator term_iter = matches.begin(); 691ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen term_iter != matches.end(); ++term_iter, ++offset_iter) { 692ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (*offset_iter != string16::npos) { 693ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatch new_match(*term_iter); 694ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen new_match.offset = *offset_iter; 695ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen new_matches.push_back(new_match); 696ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 697ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 698ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return new_matches; 699ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 700ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 701ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static 702ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenScoredHistoryMatch InMemoryURLIndex::ScoredMatchForURL( 703ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const URLRow& row, 704ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const String16Vector& terms) { 705ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ScoredHistoryMatch match(row); 7063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick GURL gurl = row.url(); 7073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (!gurl.is_valid()) 708ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return match; 709ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 710ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Figure out where each search term appears in the URL and/or page title 711ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // so that we can score as well as provide autocomplete highlighting. 712ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen string16 url = l10n_util::ToLower(UTF8ToUTF16(gurl.spec())); 713ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Strip any 'http://' prefix before matching. 714ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (url_util::FindAndCompareScheme(url, chrome::kHttpScheme, NULL)) { 715ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.prefix_adjust = strlen(chrome::kHttpScheme) + 3; // Allow for '://'. 716ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen url = url.substr(match.prefix_adjust); 717ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 7183345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 719ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen string16 title = l10n_util::ToLower(row.title()); 720ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int term_num = 0; 7213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick for (String16Vector::const_iterator iter = terms.begin(); iter != terms.end(); 722ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ++iter, ++term_num) { 7233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick string16 term = *iter; 724ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatches url_term_matches = MatchTermInString(term, url, term_num); 725ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen TermMatches title_term_matches = MatchTermInString(term, title, term_num); 726ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (url_term_matches.empty() && title_term_matches.empty()) 727ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return match; // A term was not found in either URL or title - reject. 728ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.url_matches.insert(match.url_matches.end(), url_term_matches.begin(), 729ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen url_term_matches.end()); 730ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.title_matches.insert(match.title_matches.end(), 731ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen title_term_matches.begin(), 732ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen title_term_matches.end()); 7333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 7343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 735ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Sort matches by offset and eliminate any which overlap. 736ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.url_matches = SortAndDeoverlap(match.url_matches); 737ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.title_matches = SortAndDeoverlap(match.title_matches); 738ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 739ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Get partial scores based on term matching. Note that the score for 740ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // each of the URL and title are adjusted by the fraction of the 741ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // terms appearing in each. 742ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int url_score = ScoreComponentForMatches(match.url_matches, url.size()) * 743ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.url_matches.size() / terms.size(); 744ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int title_score = 745ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ScoreComponentForMatches(match.title_matches, title.size()) * 746ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen static_cast<int>(match.title_matches.size()) / 747ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen static_cast<int>(terms.size()); 748ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Arbitrarily pick the best. 749ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // TODO(mrossetti): It might make sense that a term which appears in both the 750ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // URL and the Title should boost the score a bit. 751ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int term_score = std::max(url_score, title_score); 752ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (term_score == 0) 753ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return match; 754ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 755ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Factor in recency of visit, visit count and typed count attributes of the 756ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // URLRow. 757ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kDaysAgoLevel[] = { 0, 10, 20, 30 }; 758ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int score = ScoreForValue((base::Time::Now() - 759ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen row.last_visit()).InDays(), kDaysAgoLevel); 760ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kVisitCountLevel[] = { 30, 10, 5, 3 }; 761ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int visit_count_value = ScoreForValue(row.visit_count(), kVisitCountLevel); 762ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kTypedCountLevel[] = { 10, 5, 3, 1 }; 763ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int typed_count_value = ScoreForValue(row.typed_count(), kTypedCountLevel); 764ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 765ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Determine how many of the factors comprising the final score are 766ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // significant by summing the relative factors for each and subtracting how 767ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // many will be 'discarded' even if they are low. 768ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kVisitCountMultiplier = 2; 769ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kTypedCountMultiplier = 3; 770ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kSignificantFactors = 771ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen kVisitCountMultiplier + // Visit count factor plus 772ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen kTypedCountMultiplier + // typed count factor plus 773ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 2 - // one each for string match and last visit 774ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 2; // minus 2 insignificant factors. 775ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // The following, in effect, discards up to |kSignificantFactors| low scoring 776ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // elements which contribute little to the score but which can inordinately 777ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // drag down an otherwise good score. 778ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen match.raw_score = std::min(kScoreRank[0], (term_score + score + 779ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen (visit_count_value * kVisitCountMultiplier) + (typed_count_value * 780ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen kTypedCountMultiplier)) / kSignificantFactors); 781ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 782ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return match; 783ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 784ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 785ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenint InMemoryURLIndex::ScoreComponentForMatches(const TermMatches& matches, 786ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t max_length) { 7873345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // TODO(mrossetti): This is good enough for now but must be fine-tuned. 788ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (matches.empty()) 789ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return 0; 790ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 791ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Score component for whether the input terms (if more than one) were found 792ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // in the same order in the match. Start with kOrderMaxValue points divided 793ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // equally among (number of terms - 1); then discount each of those terms that 794ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // is out-of-order in the match. 795ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kOrderMaxValue = 250; 796ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int order_value = kOrderMaxValue; 797ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (matches.size() > 1) { 798ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int max_possible_out_of_order = matches.size() - 1; 799ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int out_of_order = 0; 800ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (size_t i = 1; i < matches.size(); ++i) { 801ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (matches[i - 1].term_num > matches[i].term_num) 802ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ++out_of_order; 803ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 804ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen order_value = (max_possible_out_of_order - out_of_order) * kOrderMaxValue / 805ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen max_possible_out_of_order; 8063345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 8073345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 808ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Score component for how early in the match string the first search term 809ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // appears. Start with kStartMaxValue points and discount by 810ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // 1/kMaxSignificantStart points for each character later than the first at 811ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // which the term begins. No points are earned if the start of the match 812ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // occurs at or after kMaxSignificantStart. 8133345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const size_t kMaxSignificantStart = 20; 814ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kStartMaxValue = 250; 815ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int start_value = (kMaxSignificantStart - 816ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::min(kMaxSignificantStart, matches[0].offset)) * kStartMaxValue / 817ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen kMaxSignificantStart; 818ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 819ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Score component for how much of the matched string the input terms cover. 820ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // kCompleteMaxValue points times the fraction of the URL/page title string 821ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // that was matched. 822ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t term_length_total = std::accumulate(matches.begin(), matches.end(), 823ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 0, AccumulateMatchLength); 824ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kCompleteMaxValue = 500; 825ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int complete_value = term_length_total * kCompleteMaxValue / max_length; 826ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 827ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int raw_score = order_value + start_value + complete_value; 828ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const int kTermScoreLevel[] = { 1000, 650, 500, 200 }; 829ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 830ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Scale the sum of the three components above into a single score component 831ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // on the same scale as that used in ScoredMatchForURL(). 832ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return ScoreForValue(raw_score, kTermScoreLevel); 8333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 8343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 835731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickInMemoryURLIndex::AddHistoryMatch::AddHistoryMatch( 836731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick const InMemoryURLIndex& index, 837731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick const String16Vector& lower_terms) 838ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen : index_(index), 839ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen lower_terms_(lower_terms) { 840731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick} 841731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 842731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickInMemoryURLIndex::AddHistoryMatch::~AddHistoryMatch() {} 843731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 8443345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid InMemoryURLIndex::AddHistoryMatch::operator()( 8453345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const InMemoryURLIndex::HistoryID history_id) { 8463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick HistoryInfoMap::const_iterator hist_pos = 8473345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick index_.history_info_map_.find(history_id); 848dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Note that a history_id may be present in the word_id_history_map_ yet not 849dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // be found in the history_info_map_. This occurs when an item has been 850dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // deleted by the user or the item no longer qualifies as a quick result. 8513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (hist_pos != index_.history_info_map_.end()) { 8523345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const URLRow& hist_item = hist_pos->second; 853ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen ScoredHistoryMatch match(ScoredMatchForURL(hist_item, lower_terms_)); 854ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (match.raw_score > 0) 855ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen scored_matches_.push_back(match); 8563345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick } 8573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 8583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 859dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::GetCacheFilePath(FilePath* file_path) { 860dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (history_dir_.empty()) 861dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 862dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen *file_path = history_dir_.Append(FILE_PATH_LITERAL("History Provider Cache")); 863dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 864dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 865dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 866dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SavePrivateData(InMemoryURLIndexCacheItem* cache) const { 867dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen DCHECK(cache); 868dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen cache->set_timestamp(base::Time::Now().ToInternalValue()); 869dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen cache->set_history_item_count(history_item_count_); 870dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveWordList(cache); 871dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveWordMap(cache); 872dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveCharWordMap(cache); 873dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveWordIDHistoryMap(cache); 874dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SaveHistoryInfoMap(cache); 875dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 876dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 877dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestorePrivateData( 878dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const InMemoryURLIndexCacheItem& cache) { 879dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen last_saved_ = base::Time::FromInternalValue(cache.timestamp()); 880dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_item_count_ = cache.history_item_count(); 881dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return (history_item_count_ == 0) || (RestoreWordList(cache) && 882dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen RestoreWordMap(cache) && RestoreCharWordMap(cache) && 883dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen RestoreWordIDHistoryMap(cache) && RestoreHistoryInfoMap(cache)); 884dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 885dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 886dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 887dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveWordList(InMemoryURLIndexCacheItem* cache) const { 888dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (word_list_.empty()) 889dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return; 890dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordListItem* list_item = cache->mutable_word_list(); 891dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen list_item->set_word_count(word_list_.size()); 892dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (String16Vector::const_iterator iter = word_list_.begin(); 893dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != word_list_.end(); ++iter) 894dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen list_item->add_word(UTF16ToUTF8(*iter)); 895dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 896dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 897dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreWordList(const InMemoryURLIndexCacheItem& cache) { 898dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!cache.has_word_list()) 899dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 900dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const WordListItem& list_item(cache.word_list()); 901dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 expected_item_count = list_item.word_count(); 902dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 actual_item_count = list_item.word_size(); 903dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 904dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 905dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedPtrField<std::string>& words(list_item.word()); 906dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedPtrField<std::string>::const_iterator iter = words.begin(); 907dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != words.end(); ++iter) 908dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_list_.push_back(UTF8ToUTF16(*iter)); 909dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 910dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 911dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 912dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveWordMap(InMemoryURLIndexCacheItem* cache) const { 913dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (word_map_.empty()) 914dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return; 915dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordMapItem* map_item = cache->mutable_word_map(); 916dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_item->set_item_count(word_map_.size()); 917dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (WordMap::const_iterator iter = word_map_.begin(); 918dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != word_map_.end(); ++iter) { 919dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordMapEntry* map_entry = map_item->add_word_map_entry(); 920dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_word(UTF16ToUTF8(iter->first)); 921dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_word_id(iter->second); 922dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 923dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 924dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 925dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreWordMap(const InMemoryURLIndexCacheItem& cache) { 926dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!cache.has_word_map()) 927dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 928dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const WordMapItem& list_item(cache.word_map()); 929dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 expected_item_count = list_item.item_count(); 930dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 actual_item_count = list_item.word_map_entry_size(); 931dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 932dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 933dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedPtrField<WordMapEntry>& entries(list_item.word_map_entry()); 934dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedPtrField<WordMapEntry>::const_iterator iter = entries.begin(); 935dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != entries.end(); ++iter) 936dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_map_[UTF8ToUTF16(iter->word())] = iter->word_id(); 937dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 938dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 939dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 940dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveCharWordMap(InMemoryURLIndexCacheItem* cache) const { 941dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (char_word_map_.empty()) 942dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return; 943dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen CharWordMapItem* map_item = cache->mutable_char_word_map(); 944dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_item->set_item_count(char_word_map_.size()); 945dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (CharWordIDMap::const_iterator iter = char_word_map_.begin(); 946dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != char_word_map_.end(); ++iter) { 947dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen CharWordMapEntry* map_entry = map_item->add_char_word_map_entry(); 948dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_char_16(iter->first); 949dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const WordIDSet& word_id_set(iter->second); 950dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_item_count(word_id_set.size()); 951dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (WordIDSet::const_iterator set_iter = word_id_set.begin(); 952dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen set_iter != word_id_set.end(); ++set_iter) 953dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->add_word_id(*set_iter); 954dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 955dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 956dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 957dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreCharWordMap( 958dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const InMemoryURLIndexCacheItem& cache) { 959dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!cache.has_char_word_map()) 960dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 961dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const CharWordMapItem& list_item(cache.char_word_map()); 962dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 expected_item_count = list_item.item_count(); 963dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 actual_item_count = list_item.char_word_map_entry_size(); 964dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 965dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 966dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedPtrField<CharWordMapEntry>& 967dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen entries(list_item.char_word_map_entry()); 968dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedPtrField<CharWordMapEntry>::const_iterator iter = 969dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen entries.begin(); iter != entries.end(); ++iter) { 970dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen expected_item_count = iter->item_count(); 971dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen actual_item_count = iter->word_id_size(); 972dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 973dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 974dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char16 uni_char = static_cast<char16>(iter->char_16()); 975dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDSet word_id_set; 976dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedField<int32>& word_ids(iter->word_id()); 977dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedField<int32>::const_iterator jiter = word_ids.begin(); 978dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen jiter != word_ids.end(); ++jiter) 979dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_set.insert(*jiter); 980dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen char_word_map_[uni_char] = word_id_set; 981dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 982dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 983dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 984dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 985dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveWordIDHistoryMap(InMemoryURLIndexCacheItem* cache) 986dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const { 987dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (word_id_history_map_.empty()) 988dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return; 989dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDHistoryMapItem* map_item = cache->mutable_word_id_history_map(); 990dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_item->set_item_count(word_id_history_map_.size()); 991dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (WordIDHistoryMap::const_iterator iter = word_id_history_map_.begin(); 992dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != word_id_history_map_.end(); ++iter) { 993dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordIDHistoryMapEntry* map_entry = 994dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_item->add_word_id_history_map_entry(); 995dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_word_id(iter->first); 996dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const HistoryIDSet& history_id_set(iter->second); 997dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_item_count(history_id_set.size()); 998dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (HistoryIDSet::const_iterator set_iter = history_id_set.begin(); 999dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen set_iter != history_id_set.end(); ++set_iter) 1000dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->add_history_id(*set_iter); 1001dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 1002dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 1003dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 1004dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreWordIDHistoryMap( 1005dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const InMemoryURLIndexCacheItem& cache) { 1006dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!cache.has_word_id_history_map()) 1007dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 1008dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const WordIDHistoryMapItem& list_item(cache.word_id_history_map()); 1009dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 expected_item_count = list_item.item_count(); 1010dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 actual_item_count = list_item.word_id_history_map_entry_size(); 1011dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 1012dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 1013dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedPtrField<WordIDHistoryMapEntry>& 1014dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen entries(list_item.word_id_history_map_entry()); 1015dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedPtrField<WordIDHistoryMapEntry>::const_iterator iter = 1016dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen entries.begin(); iter != entries.end(); ++iter) { 1017dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen expected_item_count = iter->item_count(); 1018dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen actual_item_count = iter->history_id_size(); 1019dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 1020dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 1021dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen WordID word_id = iter->word_id(); 1022dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen HistoryIDSet history_id_set; 1023dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedField<int64>& history_ids(iter->history_id()); 1024dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedField<int64>::const_iterator jiter = history_ids.begin(); 1025dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen jiter != history_ids.end(); ++jiter) 1026dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_id_set.insert(*jiter); 1027dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen word_id_history_map_[word_id] = history_id_set; 1028dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 1029dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 1030dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 1031dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 1032dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenvoid InMemoryURLIndex::SaveHistoryInfoMap( 1033dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen InMemoryURLIndexCacheItem* cache) const { 1034dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (history_info_map_.empty()) 1035dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return; 1036dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen HistoryInfoMapItem* map_item = cache->mutable_history_info_map(); 1037dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_item->set_item_count(history_info_map_.size()); 1038dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (HistoryInfoMap::const_iterator iter = history_info_map_.begin(); 1039dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen iter != history_info_map_.end(); ++iter) { 1040dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen HistoryInfoMapEntry* map_entry = map_item->add_history_info_map_entry(); 1041dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_history_id(iter->first); 1042dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const URLRow& url_row(iter->second); 1043dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Note: We only save information that contributes to the index so there 1044dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // is no need to save term_char_word_set_cache_ (not persistent), 1045dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // languages_, etc. 1046dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_visit_count(url_row.visit_count()); 1047dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_typed_count(url_row.typed_count()); 1048dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_last_visit(url_row.last_visit().ToInternalValue()); 1049dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_url(url_row.url().spec()); 1050dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen map_entry->set_title(UTF16ToUTF8(url_row.title())); 1051dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 1052dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 1053dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 1054dc0f95d653279beabeb9817299e2902918ba123eKristian Monsenbool InMemoryURLIndex::RestoreHistoryInfoMap( 1055dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const InMemoryURLIndexCacheItem& cache) { 1056dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (!cache.has_history_info_map()) 1057dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 1058dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const HistoryInfoMapItem& list_item(cache.history_info_map()); 1059dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 expected_item_count = list_item.item_count(); 1060dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen uint32 actual_item_count = list_item.history_info_map_entry_size(); 1061dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (actual_item_count == 0 || actual_item_count != expected_item_count) 1062dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return false; 1063dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const RepeatedPtrField<HistoryInfoMapEntry>& 1064dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen entries(list_item.history_info_map_entry()); 1065dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (RepeatedPtrField<HistoryInfoMapEntry>::const_iterator iter = 1066dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen entries.begin(); iter != entries.end(); ++iter) { 1067dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen HistoryID history_id = iter->history_id(); 1068dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen GURL url(iter->url()); 1069dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen URLRow url_row(url, history_id); 1070dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen url_row.set_visit_count(iter->visit_count()); 1071dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen url_row.set_typed_count(iter->typed_count()); 1072dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen url_row.set_last_visit(base::Time::FromInternalValue(iter->last_visit())); 1073dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (iter->has_title()) { 1074dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen string16 title(UTF8ToUTF16(iter->title())); 1075dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen url_row.set_title(title); 1076dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 1077dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen history_info_map_[history_id] = url_row; 1078dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 1079dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen return true; 1080dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 1081dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 1082c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace history 1083