13345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/bookmarks/bookmark_index.h"
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <algorithm>
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <iterator>
93345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <list>
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string16.h"
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/bookmarks/bookmark_model.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/bookmarks/bookmark_utils.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/history_database.h"
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/query_parser.h"
1621d179b334e59e9a3bfcaed4c4430bef1bc5759dKristian Monsen#include "chrome/browser/profiles/profile.h"
1772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "ui/base/l10n/l10n_util.h"
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// Used when finding the set of bookmarks that match a query. Each match
203345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// represents a set of terms (as an interator into the Index) matching the
213345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick// query as well as the set of nodes that contain those terms in their titles.
223345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickstruct BookmarkIndex::Match {
233345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // List of terms matching the query.
243345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  std::list<Index::const_iterator> terms;
253345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
263345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // The set of nodes matching the terms. As an optimization this is empty
273345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // when we match only one term, and is filled in when we get more than one
283345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // term. We can do this as when we have only one matching term we know
293345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // the set of matching nodes is terms.front()->second.
303345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  //
313345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Use nodes_begin() and nodes_end() to get an iterator over the set as
323345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // it handles the necessary switching between nodes and terms.front().
333345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  NodeSet nodes;
343345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Returns an iterator to the beginning of the matching nodes. See
363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // description of nodes for why this should be used over nodes.begin().
373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  NodeSet::const_iterator nodes_begin() const;
383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // Returns an iterator to the beginning of the matching nodes. See
403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  // description of nodes for why this should be used over nodes.end().
413345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  NodeSet::const_iterator nodes_end() const;
423345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick};
433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
44c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochBookmarkIndex::NodeSet::const_iterator
45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    BookmarkIndex::Match::nodes_begin() const {
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return nodes.empty() ? terms.front()->second.begin() : nodes.begin();
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
49c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochBookmarkIndex::NodeSet::const_iterator BookmarkIndex::Match::nodes_end() const {
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return nodes.empty() ? terms.front()->second.end() : nodes.end();
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
533345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickBookmarkIndex::BookmarkIndex(Profile* profile) : profile_(profile) {
543345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
553345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
563345a6884c488ff3a535c2c9acdd33d74b37e311Iain MerrickBookmarkIndex::~BookmarkIndex() {
573345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick}
583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::Add(const BookmarkNode* node) {
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!node->is_url())
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
623345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < terms.size(); ++i)
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    RegisterNode(terms[i], node);
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::Remove(const BookmarkNode* node) {
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!node->is_url())
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
713345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  std::vector<string16> terms = ExtractQueryWords(node->GetTitle());
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < terms.size(); ++i)
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    UnregisterNode(terms[i], node);
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::GetBookmarksWithTitlesMatching(
773345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    const string16& query,
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    size_t max_count,
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::vector<bookmark_utils::TitleMatch>* results) {
803345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  std::vector<string16> terms = ExtractQueryWords(query);
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (terms.empty())
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Matches matches;
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < terms.size(); ++i) {
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!GetBookmarksWithTitleMatchingTerm(terms[i], i == 0, &matches))
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return;
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  NodeTypedCountPairs node_typed_counts;
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  SortMatches(matches, &node_typed_counts);
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // We use a QueryParser to fill in match positions for us. It's not the most
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // efficient way to go about this, but by the time we get here we know what
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // matches and so this shouldn't be performance critical.
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  QueryParser parser;
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ScopedVector<QueryNode> query_nodes;
983345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  parser.ParseQuery(query, &query_nodes.get());
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // The highest typed counts should be at the beginning of the results vector
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // so that the best matches will always be included in the results. The loop
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // that calculates result relevance in HistoryContentsProvider::ConvertResults
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // will run backwards to assure higher relevance will be attributed to the
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // best matches.
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (NodeTypedCountPairs::const_iterator i = node_typed_counts.begin();
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       i != node_typed_counts.end() && results->size() < max_count; ++i)
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    AddMatchToResults(i->first, &parser, query_nodes.get(), results);
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::SortMatches(const Matches& matches,
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                NodeTypedCountPairs* node_typed_counts) const {
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  HistoryService* const history_service = profile_ ?
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      profile_->GetHistoryService(Profile::EXPLICIT_ACCESS) : NULL;
114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  history::URLDatabase* url_db = history_service ?
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      history_service->InMemoryDatabase() : NULL;
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (Matches::const_iterator i = matches.begin(); i != matches.end(); ++i)
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    ExtractBookmarkNodePairs(url_db, *i, node_typed_counts);
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::sort(node_typed_counts->begin(), node_typed_counts->end(),
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch            &NodeTypedCountPairSortFunc);
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::ExtractBookmarkNodePairs(
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    history::URLDatabase* url_db,
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const Match& match,
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NodeTypedCountPairs* node_typed_counts) const {
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (NodeSet::const_iterator i = match.nodes_begin();
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       i != match.nodes_end(); ++i) {
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    history::URLRow url;
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (url_db)
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      url_db->GetRowForURL((*i)->GetURL(), &url);
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NodeTypedCountPair pair(*i, url.typed_count());
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    node_typed_counts->push_back(pair);
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::AddMatchToResults(
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const BookmarkNode* node,
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    QueryParser* parser,
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const std::vector<QueryNode*>& query_nodes,
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::vector<bookmark_utils::TitleMatch>* results) {
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bookmark_utils::TitleMatch title_match;
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Check that the result matches the query.  The previous search
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // was a simple per-word search, while the more complex matching
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // of QueryParser may filter it out.  For example, the query
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // ["thi"] will match the bookmark titled [Thinking], but since
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // ["thi"] is quoted we don't want to do a prefix match.
1513345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (parser->DoesQueryMatch(node->GetTitle(), query_nodes,
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                             &(title_match.match_positions))) {
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    title_match.node = node;
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    results->push_back(title_match);
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1583345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickbool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const string16& term,
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                                      bool first_term,
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                                      Matches* matches) {
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Index::const_iterator i = index_.lower_bound(term);
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (i == index_.end())
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1653345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  if (!QueryParser::IsWordLongEnoughForPrefixSearch(term)) {
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Term is too short for prefix match, compare using exact match.
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (i->first != term)
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return false;  // No bookmarks with this term.
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (first_term) {
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      Match match;
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      match.terms.push_back(i);
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      matches->push_back(match);
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return true;
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    CombineMatchesInPlace(i, matches);
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } else if (first_term) {
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // This is the first term and we're doing a prefix match. Loop through
179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // index adding all entries that start with term to matches.
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    while (i != index_.end() &&
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           i->first.size() >= term.size() &&
182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      Match match;
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      match.terms.push_back(i);
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      matches->push_back(match);
186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++i;
187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } else {
189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Prefix match and not the first term. Loop through index combining
190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // current matches in matches with term, placing result in result.
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Matches result;
192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    while (i != index_.end() &&
193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           i->first.size() >= term.size() &&
194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch           term.compare(0, term.size(), i->first, 0, term.size()) == 0) {
195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      CombineMatches(i, *matches, &result);
196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++i;
197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    matches->swap(result);
199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return !matches->empty();
201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator& index_i,
204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                          Matches* matches) {
205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < matches->size(); ) {
206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Match* match = &((*matches)[i]);
207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NodeSet intersection;
208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::set_intersection(match->nodes_begin(), match->nodes_end(),
209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                          index_i->second.begin(), index_i->second.end(),
210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                          std::inserter(intersection, intersection.begin()));
211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (intersection.empty()) {
212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      matches->erase(matches->begin() + i);
213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    } else {
214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      match->terms.push_back(index_i);
215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      match->nodes.swap(intersection);
216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ++i;
217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid BookmarkIndex::CombineMatches(const Index::const_iterator& index_i,
222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                   const Matches& current_matches,
223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                   Matches* result) {
224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (size_t i = 0; i < current_matches.size(); ++i) {
225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    const Match& match = current_matches[i];
226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NodeSet intersection;
227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::set_intersection(match.nodes_begin(), match.nodes_end(),
228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                          index_i->second.begin(), index_i->second.end(),
229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                          std::inserter(intersection, intersection.begin()));
230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!intersection.empty()) {
231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      result->push_back(Match());
232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      Match& combined_match = result->back();
233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      combined_match.terms = match.terms;
234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      combined_match.terms.push_back(index_i);
235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      combined_match.nodes.swap(intersection);
236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
2403345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickstd::vector<string16> BookmarkIndex::ExtractQueryWords(const string16& query) {
241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::vector<string16> terms;
242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (query.empty())
2433345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick    return std::vector<string16>();
244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  QueryParser parser;
245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // TODO: use ICU normalization.
2463345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  parser.ExtractQueryWords(l10n_util::ToLower(query), &terms);
247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return terms;
248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
2503345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid BookmarkIndex::RegisterNode(const string16& term,
251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                 const BookmarkNode* node) {
252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (std::find(index_[term].begin(), index_[term].end(), node) !=
253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      index_[term].end()) {
254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // We've already added node for term.
255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  index_[term].insert(node);
258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
2603345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickvoid BookmarkIndex::UnregisterNode(const string16& term,
261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                   const BookmarkNode* node) {
262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  Index::iterator i = index_.find(term);
263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (i == index_.end()) {
264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // We can get here if the node has the same term more than once. For
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // example, a bookmark with the title 'foo foo' would end up here.
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  i->second.erase(node);
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (i->second.empty())
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    index_.erase(i);
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
272