13345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/bookmarks/bookmark_index.h" 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <algorithm> 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <iterator> 93345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <list> 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string16.h" 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/bookmarks/bookmark_model.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/bookmarks/bookmark_utils.h" 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/history_database.h" 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/query_parser.h" 1621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/profiles/profile.h" 1772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "ui/base/l10n/l10n_util.h" 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Used when finding the set of bookmarks that match a query. Each match 203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// represents a set of terms (as an interator into the Index) matching the 213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// query as well as the set of nodes that contain those terms in their titles. 223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickstruct BookmarkIndex::Match { 233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // List of terms matching the query. 243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::list<Index::const_iterator> terms; 253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // The set of nodes matching the terms. As an optimization this is empty 273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // when we match only one term, and is filled in when we get more than one 283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // term. We can do this as when we have only one matching term we know 293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // the set of matching nodes is terms.front()->second. 303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // 313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Use nodes_begin() and nodes_end() to get an iterator over the set as 323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // it handles the necessary switching between nodes and terms.front(). 333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick NodeSet nodes; 343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Returns an iterator to the beginning of the matching nodes. See 363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // description of nodes for why this should be used over nodes.begin(). 373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick NodeSet::const_iterator nodes_begin() const; 383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // Returns an iterator to the beginning of the matching nodes. See 403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick // description of nodes for why this should be used over nodes.end(). 413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick NodeSet::const_iterator nodes_end() const; 423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}; 433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 44c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochBookmarkIndex::NodeSet::const_iterator 45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch BookmarkIndex::Match::nodes_begin() const { 46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return nodes.empty() ? terms.front()->second.begin() : nodes.begin(); 47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 49c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochBookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const { 50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return nodes.empty() ? terms.front()->second.end() : nodes.end(); 51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 533345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickBookmarkIndex::BookmarkIndex(Profile* profile) : profile_(profile) { 543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 563345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickBookmarkIndex::~BookmarkIndex() { 573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::Add(const BookmarkNode* node) { 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!node->is_url()) 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::vector<string16> terms = ExtractQueryWords(node->GetTitle()); 63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < terms.size(); ++i) 64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch RegisterNode(terms[i], node); 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::Remove(const BookmarkNode* node) { 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!node->is_url()) 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 713345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::vector<string16> terms = ExtractQueryWords(node->GetTitle()); 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < terms.size(); ++i) 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch UnregisterNode(terms[i], node); 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::GetBookmarksWithTitlesMatching( 773345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick const string16& query, 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch size_t max_count, 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<bookmark_utils::TitleMatch>* results) { 803345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick std::vector<string16> terms = ExtractQueryWords(query); 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (terms.empty()) 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Matches matches; 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < terms.size(); ++i) { 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches)) 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NodeTypedCountPairs node_typed_counts; 91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch SortMatches(matches, &node_typed_counts); 92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We use a QueryParser to fill in match positions for us. It's not the most 94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // efficient way to go about this, but by the time we get here we know what 95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // matches and so this shouldn't be performance critical. 96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch QueryParser parser; 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ScopedVector<QueryNode> query_nodes; 983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick parser.ParseQuery(query, &query_nodes.get()); 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The highest typed counts should be at the beginning of the results vector 101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // so that the best matches will always be included in the results. The loop 102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // that calculates result relevance in HistoryContentsProvider::ConvertResults 103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // will run backwards to assure higher relevance will be attributed to the 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // best matches. 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin(); 106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i != node_typed_counts.end() && results->size() < max_count; ++i) 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch AddMatchToResults(i->first, &parser, query_nodes.get(), results); 108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::SortMatches(const Matches& matches, 111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NodeTypedCountPairs* node_typed_counts) const { 112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch HistoryService* const history_service = profile_ ? 113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : NULL; 114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch history::URLDatabase* url_db = history_service ? 116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch history_service->InMemoryDatabase() : NULL; 117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i) 119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ExtractBookmarkNodePairs(url_db, *i, node_typed_counts); 120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::sort(node_typed_counts->begin(), node_typed_counts->end(), 122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &NodeTypedCountPairSortFunc); 123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::ExtractBookmarkNodePairs( 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch history::URLDatabase* url_db, 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const Match& match, 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NodeTypedCountPairs* node_typed_counts) const { 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (NodeSet::const_iterator i = match.nodes_begin(); 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i != match.nodes_end(); ++i) { 132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch history::URLRow url; 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (url_db) 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch url_db->GetRowForURL((*i)->GetURL(), &url); 135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NodeTypedCountPair pair(*i, url.typed_count()); 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch node_typed_counts->push_back(pair); 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::AddMatchToResults( 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const BookmarkNode* node, 142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch QueryParser* parser, 143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::vector<QueryNode*>& query_nodes, 144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<bookmark_utils::TitleMatch>* results) { 145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bookmark_utils::TitleMatch title_match; 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Check that the result matches the query. The previous search 147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // was a simple per-word search, while the more complex matching 148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // of QueryParser may filter it out. For example, the query 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // ["thi"] will match the bookmark titled [Thinking], but since 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // ["thi"] is quoted we don't want to do a prefix match. 1513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (parser->DoesQueryMatch(node->GetTitle(), query_nodes, 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &(title_match.match_positions))) { 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch title_match.node = node; 154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch results->push_back(title_match); 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickbool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term, 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool first_term, 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Matches* matches) { 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Index::const_iterator i = index_.lower_bound(term); 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (i == index_.end()) 163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) { 166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Term is too short for prefix match, compare using exact match. 167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (i->first != term) 168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; // No bookmarks with this term. 169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (first_term) { 171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Match match; 172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match.terms.push_back(i); 173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch matches->push_back(match); 174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return true; 175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CombineMatchesInPlace(i, matches); 177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else if (first_term) { 178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This is the first term and we're doing a prefix match. Loop through 179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // index adding all entries that start with term to matches. 180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (i != index_.end() && 181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i->first.size() >= term.size() && 182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch term.compare(0, term.size(), i->first, 0, term.size()) == 0) { 183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Match match; 184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match.terms.push_back(i); 185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch matches->push_back(match); 186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++i; 187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Prefix match and not the first term. Loop through index combining 190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // current matches in matches with term, placing result in result. 191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Matches result; 192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (i != index_.end() && 193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i->first.size() >= term.size() && 194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch term.compare(0, term.size(), i->first, 0, term.size()) == 0) { 195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch CombineMatches(i, *matches, &result); 196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++i; 197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch matches->swap(result); 199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return !matches->empty(); 201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i, 204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Matches* matches) { 205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < matches->size(); ) { 206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Match* match = &((*matches)[i]); 207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NodeSet intersection; 208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::set_intersection(match->nodes_begin(), match->nodes_end(), 209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch index_i->second.begin(), index_i->second.end(), 210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::inserter(intersection, intersection.begin())); 211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (intersection.empty()) { 212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch matches->erase(matches->begin() + i); 213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match->terms.push_back(index_i); 215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match->nodes.swap(intersection); 216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ++i; 217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::CombineMatches(const Index::const_iterator& index_i, 222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const Matches& current_matches, 223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Matches* result) { 224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (size_t i = 0; i < current_matches.size(); ++i) { 225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const Match& match = current_matches[i]; 226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NodeSet intersection; 227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::set_intersection(match.nodes_begin(), match.nodes_end(), 228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch index_i->second.begin(), index_i->second.end(), 229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::inserter(intersection, intersection.begin())); 230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!intersection.empty()) { 231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch result->push_back(Match()); 232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Match& combined_match = result->back(); 233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch combined_match.terms = match.terms; 234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch combined_match.terms.push_back(index_i); 235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch combined_match.nodes.swap(intersection); 236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 2403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickstd::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) { 241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<string16> terms; 242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (query.empty()) 2433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return std::vector<string16>(); 244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch QueryParser parser; 245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // TODO: use ICU normalization. 2463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick parser.ExtractQueryWords(l10n_util::ToLower(query), &terms); 247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return terms; 248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 2503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid BookmarkIndex::RegisterNode(const string16& term, 251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const BookmarkNode* node) { 252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (std::find(index_[term].begin(), index_[term].end(), node) != 253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch index_[term].end()) { 254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We've already added node for term. 255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch index_[term].insert(node); 258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 2603345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid BookmarkIndex::UnregisterNode(const string16& term, 261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const BookmarkNode* node) { 262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Index::iterator i = index_.find(term); 263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (i == index_.end()) { 264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We can get here if the node has the same term more than once. For 265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // example, a bookmark with the title 'foo foo' would end up here. 266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i->second.erase(node); 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (i->second.empty()) 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch index_.erase(i); 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 272