1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2009 The Chromium Authors. All rights reserved. 2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be 3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file. 4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ 6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ 73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once 8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <set> 10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <vector> 11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/connection.h" 13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/meta_table.h" 14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/basictypes.h" 15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/file_path.h" 16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string16.h" 17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/history_types.h" 18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/gurl.h" 19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace history { 21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Encapsulation of a full-text indexed database file. 23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochclass TextDatabase { 24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch public: 25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch typedef int DBIdent; 26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch typedef std::set<GURL> URLSet; 28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Returned from the search function. 30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch struct Match { 31731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick Match(); 32731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick ~Match(); 33731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick 34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // URL of the match. 35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch GURL url; 36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The title is returned because the title in the text database and the URL 38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // database may differ. This happens because we capture the title when the 39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // body is captured, and don't update it later. 40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch string16 title; 41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Time the page that was returned was visited. 43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::Time time; 44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Identifies any found matches in the title of the document. These are not 46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // included in the snippet. 47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Snippet::MatchPositions title_match_positions; 48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Snippet of the match we generated from the body. 50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch Snippet snippet; 51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch }; 52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Note: You must call init which must succeed before using this class. 54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Computes the matches for the query, returning results in decreasing order 56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // of visit time. 57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // This function will attach the new database to the given database 59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // connection. This allows one sqlite3 object to share many TextDatabases, 60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // meaning that they will all share the same cache, which allows us to limit 61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the total size that text indexing databasii can take up. 62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // |file_name| is the name of the file on disk. 64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // ID is the identifier for the database. It should uniquely identify it among 66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // other databases on disk and in the sqlite connection. 67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // |allow_create| indicates if we want to allow creation of the file if it 69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // doesn't exist. For files associated with older time periods, we don't want 70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // to create them if they don't exist, so this flag would be false. 71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch TextDatabase(const FilePath& path, 72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DBIdent id, 73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool allow_create); 74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch ~TextDatabase(); 75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Initializes the database connection and creates the file if the class 77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // was created with |allow_create|. If the file couldn't be opened or 78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // created, this will return false. No other functions should be called 79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // after this. 80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool Init(); 81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Allows updates to be batched. This gives higher performance when multiple 83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // updates are happening because every insert doesn't require a sync to disk. 84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Transactions can be nested, only the outermost one will actually count. 85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void BeginTransaction(); 86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void CommitTransaction(); 87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // For testing, returns the file name of the database so it can be deleted 89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // after the test. This is valid even before Init() is called. 90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const FilePath& file_name() const { return file_name_; } 91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Returns a NULL-terminated string that is the base of history index files, 93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // which is the part before the database identifier. For example 94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // "History Index *". This is for finding existing database files. 95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static const FilePath::CharType* file_base(); 96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Converts a filename on disk (optionally including a path) to a database 98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // identifier. If the filename doesn't have the correct format, returns 0. 99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static DBIdent FileNameToID(const FilePath& file_path); 100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Changing operations ------------------------------------------------------- 102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Adds the given data to the page. Returns true on success. The data should 104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // already be converted to UTF-8. 105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool AddPageData(base::Time time, 106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& url, 107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& title, 108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const std::string& contents); 109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Deletes the indexed data exactly matching the given URL/time pair. 111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void DeletePageData(base::Time time, const std::string& url); 112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Optimizes the tree inside the database. This will, in addition to making 114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // access faster, remove any deleted data from the database (normally it is 115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // added again as "removed" and it is manually cleaned up when it decides to 116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // optimize it naturally). It is bad for privacy if a user is deleting a 117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // page from history but it still exists in the full text database in some 118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // form. This function will clean that up. 119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void Optimize(); 120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Querying ------------------------------------------------------------------ 122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Executes the given query. See QueryOptions for more info on input. 124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The results are appended to any existing ones in |*results|, and the first 126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // time considered for the output is in |first_time_searched| 127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // (see QueryResults for more). 128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Any URLs found will be added to |unique_urls|. If a URL is already in the 130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // set, additional results will not be added (giving the ability to uniquify 131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // URL results). 132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // 133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Callers must run QueryParser on the user text and pass the results of the 134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // QueryParser to this method as the query string. 135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch void GetTextMatches(const std::string& query, 136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const QueryOptions& options, 137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch std::vector<Match>* results, 138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch URLSet* unique_urls, 139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch base::Time* first_time_searched); 140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Converts the given database identifier to a filename. This does not include 142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // the path, just the file and extension. 143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch static FilePath IDToFileName(DBIdent id); 144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch private: 146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Ensures that the tables and indices are created. Returns true on success. 147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch bool CreateTables(); 148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // The sql database. Not valid until Init is called. 150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::Connection db_; 151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const FilePath path_; 153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const DBIdent ident_; 154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch const bool allow_create_; 155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch // Full file name of the file on disk, computed in Init(). 157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch FilePath file_name_; 158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch sql::MetaTable meta_table_; 160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch DISALLOW_COPY_AND_ASSIGN(TextDatabase); 162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}; 163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch} // namespace history 165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch 166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_ 167