1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <limits>
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <set>
7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <string>
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/text_database.h"
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/statement.h"
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/transaction.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/file_util.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h"
15731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/metrics/histogram.h"
163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string_number_conversions.h"
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h"
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/utf_string_conversions.h"
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/diagnostics/sqlite_diagnostics.h"
20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// There are two tables in each database, one full-text search (FTS) table which
22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// indexes the contents and title of the pages. The other is a regular SQLITE
23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// table which contains non-indexed information about the page. All columns of
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// a FTS table are indexed using the text search algorithm, which isn't what we
25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// want for things like times. If this were in the FTS table, there would be
26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// different words in the index for each time number.
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// "pages" FTS table:
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//   url    URL of the page so searches will match the URL.
30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//   title  Title of the page.
31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//   body   Body of the page.
32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// "info" regular table:
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//   time     Time the corresponding FTS entry was visited.
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch//
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// We do joins across these two tables by using their internal rowids, which we
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// keep in sync between the two tables. The internal rowid is the only part of
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// an FTS table that is indexed like a normal table, and the index over it is
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// free since sqlite always indexes the internal rowid.
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace history {
42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace {
44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Version 1 uses FTS2 for index files.
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Version 2 uses FTS3.
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const int kCurrentVersionNumber = 2;
48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const int kCompatibleVersionNumber = 2;
49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Snippet computation relies on the index of the columns in the original
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// create statement. These are the 0-based indices (as strings) of the
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// corresponding columns.
53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kTitleColumnIndex[] = "1";
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kBodyColumnIndex[] = "2";
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// The string prepended to the database identifier to generate the filename.
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst FilePath::CharType kFilePrefix[] = FILE_PATH_LITERAL("History Index ");
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
61731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickTextDatabase::Match::Match() {}
62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
63731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickTextDatabase::Match::~Match() {}
64731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTextDatabase::TextDatabase(const FilePath& path,
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                           DBIdent id,
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                           bool allow_create)
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    : path_(path),
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      ident_(id),
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      allow_create_(allow_create) {
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Compute the file name.
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  file_name_ = path_.Append(IDToFileName(ident_));
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTextDatabase::~TextDatabase() {
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst FilePath::CharType* TextDatabase::file_base() {
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return kFilePrefix;
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochFilePath TextDatabase::IDToFileName(DBIdent id) {
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Identifiers are intended to be a combination of the year and month, for
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // example, 200801 for January 2008. We convert this to
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // "History Index 2008-01". However, we don't make assumptions about this
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // scheme: the caller should assign IDs as it feels fit with the knowledge
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // that they will apppear on disk in this form.
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  FilePath::StringType filename(file_base());
914a5e2dc747d50c653511c68ccb2cfbfb740bd5a7Ben Murdoch  base::StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"),
924a5e2dc747d50c653511c68ccb2cfbfb740bd5a7Ben Murdoch                      id / 100, id % 100);
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return FilePath(filename);
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTextDatabase::DBIdent TextDatabase::FileNameToID(const FilePath& file_path) {
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  FilePath::StringType file_name = file_path.BaseName().value();
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // We don't actually check the prefix here. Since the file system could
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // be case insensitive in ways we can't predict (NTFS), checking could
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // potentially be the wrong thing to do. Instead, we just look for a suffix.
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static const size_t kIDStringLength = 7;  // Room for "xxxx-xx".
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (file_name.length() < kIDStringLength)
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return 0;
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const FilePath::StringType suffix(
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      &file_name[file_name.length() - kIDStringLength]);
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (suffix.length() != kIDStringLength ||
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      suffix[4] != FILE_PATH_LITERAL('-')) {
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return 0;
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
1143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  int year, month;
115513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  base::StringToInt(suffix.begin(), suffix.begin() + 4, &year);
116513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch  base::StringToInt(suffix.begin() + 5, suffix.begin() + 7, &month);
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return year * 100 + month;
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool TextDatabase::Init() {
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Make sure, if we're not allowed to create the file, that it exists.
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!allow_create_) {
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!file_util::PathExists(file_name_))
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return false;
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Set the exceptional sqlite error handler.
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  db_.set_error_delegate(GetErrorHandlerForTextDb());
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Set the database page size to something a little larger to give us
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // better performance (we're typically seek rather than bandwidth limited).
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // This only has an effect before any tables have been created, otherwise
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // this is a NOP. Must be a power of 2 and a max of 8192.
1353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick  db_.set_page_size(4096);
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // The default cache size is 2000 which give >8MB of data. Since we will often
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // have 2-3 of these objects, each with their own 8MB, this adds up very fast.
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // We therefore reduce the size so when there are multiple objects, we're not
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // too big.
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  db_.set_cache_size(512);
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Run the database in exclusive mode. Nobody else should be accessing the
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // database while we're running, and this will give somewhat improved perf.
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  db_.set_exclusive_locking();
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Attach the database to our index file.
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!db_.Open(file_name_))
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Meta table tracking version information.
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber))
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) {
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // This version is too new. We don't bother notifying the user on this
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // error, and just fail to use the file. Normally if they have version skew,
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // they will get it for the main history file and it won't be necessary
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // here. If that's not the case, since this is only indexed data, it's
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // probably better to just not give FTS results than strange errors when
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // everything else is working OK.
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    LOG(WARNING) << "Text database is too new.";
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return CreateTables();
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::BeginTransaction() {
169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  db_.BeginTransaction();
170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::CommitTransaction() {
173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  db_.CommitTransaction();
174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool TextDatabase::CreateTables() {
177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // FTS table of page contents.
178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!db_.DoesTableExist("pages")) {
179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3("
180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     "TOKENIZE icu,"
181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     "url LONGVARCHAR,"
182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     "title LONGVARCHAR,"
183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                     "body LONGVARCHAR)"))
184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return false;
185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Non-FTS table containing URLs and times so we can efficiently find them
188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // using a regular index (all FTS columns are special and are treated as
189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // full-text-search, which is not what we want when retrieving this data).
190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!db_.DoesTableExist("info")) {
191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Note that there is no point in creating an index over time. Since
192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // we must always query the entire FTS table (it can not efficiently do
193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // subsets), we will always end up doing that first, and joining the info
194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // table off of that.
195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)"))
196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return false;
197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Create the index. This will fail when the index already exists, so we just
200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // ignore the error.
201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  db_.Execute("CREATE INDEX info_time ON info(time)");
202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return true;
203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool TextDatabase::AddPageData(base::Time time,
206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                               const std::string& url,
207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                               const std::string& title,
208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                               const std::string& contents) {
209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Transaction committer(&db_);
210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!committer.Begin())
211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Add to the pages table.
214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE,
215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "INSERT INTO pages (url, title, body) VALUES (?,?,?)"));
216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!add_to_pages) {
217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NOTREACHED() << db_.GetErrorMessage();
218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  add_to_pages.BindString(0, url);
221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  add_to_pages.BindString(1, title);
222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  add_to_pages.BindString(2, contents);
223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!add_to_pages.Run()) {
224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NOTREACHED() << db_.GetErrorMessage();
225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int64 rowid = db_.GetLastInsertRowId();
229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Add to the info table with the same rowid.
231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE,
232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "INSERT INTO info (rowid, time) VALUES (?,?)"));
233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!add_to_info) {
234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NOTREACHED() << db_.GetErrorMessage();
235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  add_to_info.BindInt64(0, rowid);
238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  add_to_info.BindInt64(1, time.ToInternalValue());
239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!add_to_info.Run()) {
240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    NOTREACHED() << db_.GetErrorMessage();
241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return false;
242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  return committer.Commit();
245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::DeletePageData(base::Time time, const std::string& url) {
248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // First get all rows that match. Selecing on time (which has an index) allows
249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // us to avoid brute-force searches on the full-text-index table (there will
250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // generally be only one match per time).
251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE,
252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "SELECT info.rowid "
253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "FROM info JOIN pages ON info.rowid = pages.rowid "
254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "WHERE info.time=? AND pages.url=?"));
255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!select_ids)
256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  select_ids.BindInt64(0, time.ToInternalValue());
258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  select_ids.BindString(1, url);
259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  std::set<int64> rows_to_delete;
260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  while (select_ids.Step())
261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    rows_to_delete.insert(select_ids.ColumnInt64(0));
262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Delete from the pages table.
264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE,
265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "DELETE FROM pages WHERE rowid=?"));
266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!delete_page)
267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (std::set<int64>::const_iterator i = rows_to_delete.begin();
269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       i != rows_to_delete.end(); ++i) {
270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    delete_page.BindInt64(0, *i);
271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!delete_page.Run()) {
272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      NOTREACHED();
273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return;
274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    delete_page.Reset();
276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Delete from the info table.
279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE,
280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "DELETE FROM info WHERE rowid=?"));
281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!delete_info)
282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  for (std::set<int64>::const_iterator i = rows_to_delete.begin();
284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch       i != rows_to_delete.end(); ++i) {
285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    delete_info.BindInt64(0, *i);
286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (!delete_info.Run()) {
287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      NOTREACHED();
288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      return;
289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    }
290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    delete_info.Reset();
291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::Optimize() {
295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      "SELECT OPTIMIZE(pages) FROM pages LIMIT 1"));
297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!statement)
298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  statement.Run();
300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::GetTextMatches(const std::string& query,
303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                  const QueryOptions& options,
304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                  std::vector<Match>* results,
305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                  URLSet* found_urls,
306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                  base::Time* first_time_searched) {
307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  *first_time_searched = options.begin_time;
308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE,
310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    "SELECT url, title, time, offsets(pages), body "
311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        "FROM pages LEFT OUTER JOIN info ON pages.rowid = info.rowid "
312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        "WHERE pages MATCH ? AND time >= ? AND time < ? "
313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        "ORDER BY time DESC "
314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch        "LIMIT ?"));
315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  if (!statement)
316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    return;
317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
318c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // When their values indicate "unspecified", saturate the numbers to the max
319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // or min to get the correct result.
320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int64 effective_begin_time = options.begin_time.is_null() ?
321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      0 : options.begin_time.ToInternalValue();
322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int64 effective_end_time = options.end_time.is_null() ?
323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      std::numeric_limits<int64>::max() : options.end_time.ToInternalValue();
324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  int effective_max_count = options.max_count ?
325c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      options.max_count : std::numeric_limits<int>::max();
326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  statement.BindString(0, query);
328c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  statement.BindInt64(1, effective_begin_time);
329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  statement.BindInt64(2, effective_end_time);
330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  statement.BindInt(3, effective_max_count);
331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
332c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  while (statement.Step()) {
333c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // TODO(brettw) allow canceling the query in the middle.
334c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // if (canceled_or_something)
335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    //   break;
336c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
337c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    GURL url(statement.ColumnString(0));
338c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    URLSet::const_iterator found_url = found_urls->find(url);
339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    if (found_url != found_urls->end())
340c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      continue;  // Don't add this duplicate.
341c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
342c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Fill the results into the vector (avoid copying the URL with Swap()).
343c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    results->resize(results->size() + 1);
344c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Match& match = results->at(results->size() - 1);
345c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    match.url.Swap(&url);
346c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
347c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    match.title = statement.ColumnString16(1);
348c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    match.time = base::Time::FromInternalValue(statement.ColumnInt64(2));
349c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
350c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Extract any matches in the title.
351c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::string offsets_str = statement.ColumnString(3);
352c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex,
353c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                   &match.title_match_positions);
354c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1),
355c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                         &match.title_match_positions);
356c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
357c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Extract the matches in the body.
358c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Snippet::MatchPositions match_positions;
359c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex,
360c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                                   &match_positions);
361c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
362c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Compute the snippet based on those matches.
363c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    std::string body = statement.ColumnString(4);
364c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    match.snippet.ComputeSnippet(match_positions, body);
365c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
366c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
367c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // When we have returned all the results possible (or determined that there
368c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // are none), then we have searched all the time requested, so we can
369c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // set the first_time_searched to that value.
370dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen  if (results->empty() ||
371c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      options.max_count == 0 ||  // Special case for wanting all the results.
372c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch      static_cast<int>(results->size()) < options.max_count) {
373c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    *first_time_searched = options.begin_time;
374c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  } else {
375c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Since we got the results in order, we know the last item is the last
376c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // time we considered.
377c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    *first_time_searched = results->back().time;
378c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  }
379c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
380c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  statement.Reset();
381c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}
382c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
383c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace history
384