1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <limits> 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <set> 7c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <string> 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/text_database.h" 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/statement.h" 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/transaction.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/file_util.h" 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/logging.h" 15731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick#include "base/metrics/histogram.h" 163345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "base/string_number_conversions.h" 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string_util.h" 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/utf_string_conversions.h" 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/diagnostics/sqlite_diagnostics.h" 20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// There are two tables in each database, one full-text search (FTS) table which 22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// indexes the contents and title of the pages. The other is a regular SQLITE 23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// table which contains non-indexed information about the page. All columns of 24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// a FTS table are indexed using the text search algorithm, which isn't what we 25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// want for things like times. If this were in the FTS table, there would be 26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// different words in the index for each time number. 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// "pages" FTS table: 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// url URL of the page so searches will match the URL. 30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// title Title of the page. 31c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// body Body of the page. 32c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 33c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// "info" regular table: 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// time Time the corresponding FTS entry was visited. 35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// We do joins across these two tables by using their internal rowids, which we 37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// keep in sync between the two tables. The internal rowid is the only part of 38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// an FTS table that is indexed like a normal table, and the index over it is 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// free since sqlite always indexes the internal rowid. 40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace history { 42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace { 44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Version 1 uses FTS2 for index files. 46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Version 2 uses FTS3. 47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const int kCurrentVersionNumber = 2; 48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochstatic const int kCompatibleVersionNumber = 2; 49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Snippet computation relies on the index of the columns in the original 51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// create statement. These are the 0-based indices (as strings) of the 52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// corresponding columns. 53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kTitleColumnIndex[] = "1"; 54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst char kBodyColumnIndex[] = "2"; 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// The string prepended to the database identifier to generate the filename. 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst FilePath::CharType kFilePrefix[] = FILE_PATH_LITERAL("History Index "); 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 61731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickTextDatabase::Match::Match() {} 62731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 63731df977c0511bca2206b5f333555b1205ff1f43Iain MerrickTextDatabase::Match::~Match() {} 64731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTextDatabase::TextDatabase(const FilePath& path, 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DBIdent id, 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool allow_create) 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch : path_(path), 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ident_(id), 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch allow_create_(allow_create) { 71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Compute the file name. 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch file_name_ = path_.Append(IDToFileName(ident_)); 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTextDatabase::~TextDatabase() { 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochconst FilePath::CharType* TextDatabase::file_base() { 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return kFilePrefix; 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochFilePath TextDatabase::IDToFileName(DBIdent id) { 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Identifiers are intended to be a combination of the year and month, for 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // example, 200801 for January 2008. We convert this to 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // "History Index 2008-01". However, we don't make assumptions about this 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // scheme: the caller should assign IDs as it feels fit with the knowledge 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // that they will apppear on disk in this form. 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch FilePath::StringType filename(file_base()); 914a5e2dc747d50c653511c68ccb2cfbfb740bd5a7Ben Murdoch base::StringAppendF(&filename, FILE_PATH_LITERAL("%d-%02d"), 924a5e2dc747d50c653511c68ccb2cfbfb740bd5a7Ben Murdoch id / 100, id % 100); 93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return FilePath(filename); 94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// static 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen MurdochTextDatabase::DBIdent TextDatabase::FileNameToID(const FilePath& file_path) { 98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch FilePath::StringType file_name = file_path.BaseName().value(); 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We don't actually check the prefix here. Since the file system could 101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // be case insensitive in ways we can't predict (NTFS), checking could 102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // potentially be the wrong thing to do. Instead, we just look for a suffix. 103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static const size_t kIDStringLength = 7; // Room for "xxxx-xx". 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (file_name.length() < kIDStringLength) 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return 0; 106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const FilePath::StringType suffix( 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &file_name[file_name.length() - kIDStringLength]); 108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (suffix.length() != kIDStringLength || 110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch suffix[4] != FILE_PATH_LITERAL('-')) { 111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return 0; 112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 1143345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick int year, month; 115513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch base::StringToInt(suffix.begin(), suffix.begin() + 4, &year); 116513209b27ff55e2841eac0e4120199c23acce758Ben Murdoch base::StringToInt(suffix.begin() + 5, suffix.begin() + 7, &month); 117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return year * 100 + month; 119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool TextDatabase::Init() { 122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Make sure, if we're not allowed to create the file, that it exists. 123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!allow_create_) { 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!file_util::PathExists(file_name_)) 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Set the exceptional sqlite error handler. 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch db_.set_error_delegate(GetErrorHandlerForTextDb()); 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Set the database page size to something a little larger to give us 132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // better performance (we're typically seek rather than bandwidth limited). 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This only has an effect before any tables have been created, otherwise 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // this is a NOP. Must be a power of 2 and a max of 8192. 1353345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick db_.set_page_size(4096); 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The default cache size is 2000 which give >8MB of data. Since we will often 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // have 2-3 of these objects, each with their own 8MB, this adds up very fast. 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // We therefore reduce the size so when there are multiple objects, we're not 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // too big. 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch db_.set_cache_size(512); 142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Run the database in exclusive mode. Nobody else should be accessing the 144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // database while we're running, and this will give somewhat improved perf. 145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch db_.set_exclusive_locking(); 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Attach the database to our index file. 148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!db_.Open(file_name_)) 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Meta table tracking version information. 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!meta_table_.Init(&db_, kCurrentVersionNumber, kCompatibleVersionNumber)) 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (meta_table_.GetCompatibleVersionNumber() > kCurrentVersionNumber) { 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This version is too new. We don't bother notifying the user on this 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // error, and just fail to use the file. Normally if they have version skew, 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // they will get it for the main history file and it won't be necessary 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // here. If that's not the case, since this is only indexed data, it's 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // probably better to just not give FTS results than strange errors when 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // everything else is working OK. 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch LOG(WARNING) << "Text database is too new."; 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return CreateTables(); 166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 167c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 168c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::BeginTransaction() { 169c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch db_.BeginTransaction(); 170c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 171c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 172c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::CommitTransaction() { 173c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch db_.CommitTransaction(); 174c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 175c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 176c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool TextDatabase::CreateTables() { 177c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // FTS table of page contents. 178c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!db_.DoesTableExist("pages")) { 179c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!db_.Execute("CREATE VIRTUAL TABLE pages USING fts3(" 180c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "TOKENIZE icu," 181c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "url LONGVARCHAR," 182c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "title LONGVARCHAR," 183c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "body LONGVARCHAR)")) 184c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 185c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 186c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 187c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Non-FTS table containing URLs and times so we can efficiently find them 188c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // using a regular index (all FTS columns are special and are treated as 189c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // full-text-search, which is not what we want when retrieving this data). 190c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!db_.DoesTableExist("info")) { 191c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Note that there is no point in creating an index over time. Since 192c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // we must always query the entire FTS table (it can not efficiently do 193c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // subsets), we will always end up doing that first, and joining the info 194c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // table off of that. 195c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!db_.Execute("CREATE TABLE info(time INTEGER NOT NULL)")) 196c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 197c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 198c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 199c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Create the index. This will fail when the index already exists, so we just 200c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // ignore the error. 201c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch db_.Execute("CREATE INDEX info_time ON info(time)"); 202c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return true; 203c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 204c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 205c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochbool TextDatabase::AddPageData(base::Time time, 206c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& url, 207c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& title, 208c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& contents) { 209c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Transaction committer(&db_); 210c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!committer.Begin()) 211c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 212c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 213c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Add to the pages table. 214c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement add_to_pages(db_.GetCachedStatement(SQL_FROM_HERE, 215c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "INSERT INTO pages (url, title, body) VALUES (?,?,?)")); 216c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!add_to_pages) { 217c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED() << db_.GetErrorMessage(); 218c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 219c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 220c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_to_pages.BindString(0, url); 221c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_to_pages.BindString(1, title); 222c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_to_pages.BindString(2, contents); 223c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!add_to_pages.Run()) { 224c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED() << db_.GetErrorMessage(); 225c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 226c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 227c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 228c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int64 rowid = db_.GetLastInsertRowId(); 229c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 230c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Add to the info table with the same rowid. 231c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement add_to_info(db_.GetCachedStatement(SQL_FROM_HERE, 232c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "INSERT INTO info (rowid, time) VALUES (?,?)")); 233c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!add_to_info) { 234c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED() << db_.GetErrorMessage(); 235c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 236c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 237c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_to_info.BindInt64(0, rowid); 238c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch add_to_info.BindInt64(1, time.ToInternalValue()); 239c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!add_to_info.Run()) { 240c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED() << db_.GetErrorMessage(); 241c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return false; 242c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 243c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 244c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return committer.Commit(); 245c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 246c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 247c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::DeletePageData(base::Time time, const std::string& url) { 248c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // First get all rows that match. Selecing on time (which has an index) allows 249c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // us to avoid brute-force searches on the full-text-index table (there will 250c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // generally be only one match per time). 251c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement select_ids(db_.GetCachedStatement(SQL_FROM_HERE, 252c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "SELECT info.rowid " 253c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "FROM info JOIN pages ON info.rowid = pages.rowid " 254c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "WHERE info.time=? AND pages.url=?")); 255c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!select_ids) 256c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 257c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch select_ids.BindInt64(0, time.ToInternalValue()); 258c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch select_ids.BindString(1, url); 259c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::set<int64> rows_to_delete; 260c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (select_ids.Step()) 261c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch rows_to_delete.insert(select_ids.ColumnInt64(0)); 262c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 263c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Delete from the pages table. 264c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement delete_page(db_.GetCachedStatement(SQL_FROM_HERE, 265c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "DELETE FROM pages WHERE rowid=?")); 266c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!delete_page) 267c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 268c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::set<int64>::const_iterator i = rows_to_delete.begin(); 269c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i != rows_to_delete.end(); ++i) { 270c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch delete_page.BindInt64(0, *i); 271c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!delete_page.Run()) { 272c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED(); 273c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 274c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 275c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch delete_page.Reset(); 276c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 277c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 278c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Delete from the info table. 279c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement delete_info(db_.GetCachedStatement(SQL_FROM_HERE, 280c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "DELETE FROM info WHERE rowid=?")); 281c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!delete_info) 282c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 283c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch for (std::set<int64>::const_iterator i = rows_to_delete.begin(); 284c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch i != rows_to_delete.end(); ++i) { 285c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch delete_info.BindInt64(0, *i); 286c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!delete_info.Run()) { 287c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch NOTREACHED(); 288c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 289c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 290c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch delete_info.Reset(); 291c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 292c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 293c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 294c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::Optimize() { 295c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, 296c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "SELECT OPTIMIZE(pages) FROM pages LIMIT 1")); 297c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!statement) 298c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 299c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch statement.Run(); 300c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 301c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 302c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochvoid TextDatabase::GetTextMatches(const std::string& query, 303c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const QueryOptions& options, 304c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<Match>* results, 305c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch URLSet* found_urls, 306c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::Time* first_time_searched) { 307c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *first_time_searched = options.begin_time; 308c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 309c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Statement statement(db_.GetCachedStatement(SQL_FROM_HERE, 310c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "SELECT url, title, time, offsets(pages), body " 311c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "FROM pages LEFT OUTER JOIN info ON pages.rowid = info.rowid " 312c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "WHERE pages MATCH ? AND time >= ? AND time < ? " 313c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "ORDER BY time DESC " 314c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch "LIMIT ?")); 315c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (!statement) 316c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch return; 317c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 318c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // When their values indicate "unspecified", saturate the numbers to the max 319c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // or min to get the correct result. 320c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int64 effective_begin_time = options.begin_time.is_null() ? 321c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 0 : options.begin_time.ToInternalValue(); 322c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int64 effective_end_time = options.end_time.is_null() ? 323c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::numeric_limits<int64>::max() : options.end_time.ToInternalValue(); 324c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch int effective_max_count = options.max_count ? 325c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch options.max_count : std::numeric_limits<int>::max(); 326c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 327c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch statement.BindString(0, query); 328c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch statement.BindInt64(1, effective_begin_time); 329c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch statement.BindInt64(2, effective_end_time); 330c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch statement.BindInt(3, effective_max_count); 331c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 332c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch while (statement.Step()) { 333c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // TODO(brettw) allow canceling the query in the middle. 334c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // if (canceled_or_something) 335c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // break; 336c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 337c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL url(statement.ColumnString(0)); 338c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch URLSet::const_iterator found_url = found_urls->find(url); 339c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch if (found_url != found_urls->end()) 340c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch continue; // Don't add this duplicate. 341c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 342c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Fill the results into the vector (avoid copying the URL with Swap()). 343c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch results->resize(results->size() + 1); 344c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Match& match = results->at(results->size() - 1); 345c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match.url.Swap(&url); 346c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 347c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match.title = statement.ColumnString16(1); 348c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); 349c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 350c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Extract any matches in the title. 351c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string offsets_str = statement.ColumnString(3); 352c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Snippet::ExtractMatchPositions(offsets_str, kTitleColumnIndex, 353c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &match.title_match_positions); 354c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Snippet::ConvertMatchPositionsToWide(statement.ColumnString(1), 355c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &match.title_match_positions); 356c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 357c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Extract the matches in the body. 358c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Snippet::MatchPositions match_positions; 359c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Snippet::ExtractMatchPositions(offsets_str, kBodyColumnIndex, 360c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch &match_positions); 361c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 362c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Compute the snippet based on those matches. 363c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::string body = statement.ColumnString(4); 364c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch match.snippet.ComputeSnippet(match_positions, body); 365c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 366c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 367c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // When we have returned all the results possible (or determined that there 368c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // are none), then we have searched all the time requested, so we can 369c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // set the first_time_searched to that value. 370dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (results->empty() || 371c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch options.max_count == 0 || // Special case for wanting all the results. 372c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static_cast<int>(results->size()) < options.max_count) { 373c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *first_time_searched = options.begin_time; 374c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } else { 375c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Since we got the results in order, we know the last item is the last 376c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // time we considered. 377c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch *first_time_searched = results->back().time; 378c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch } 379c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 380c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch statement.Reset(); 381c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} 382c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 383c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace history 384