1c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Use of this source code is governed by a BSD-style license that can be
3c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// found in the LICENSE file.
4c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
5c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
6c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
8c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
9c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <set>
10c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include <vector>
11c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
12c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/connection.h"
13c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "app/sql/meta_table.h"
14c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/basictypes.h"
15c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/file_path.h"
16c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "base/string16.h"
17c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "chrome/browser/history/history_types.h"
18c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#include "googleurl/src/gurl.h"
19c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
20c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochnamespace history {
21c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
22c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch// Encapsulation of a full-text indexed database file.
23c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdochclass TextDatabase {
24c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch public:
25c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  typedef int DBIdent;
26c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
27c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  typedef std::set<GURL> URLSet;
28c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
29c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Returned from the search function.
30c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  struct Match {
31731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    Match();
32731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick    ~Match();
33731df977c0511bca2206b5f333555b1205ff1f43Iain Merrick
34c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // URL of the match.
35c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    GURL url;
36c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
37c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // The title is returned because the title in the text database and the URL
38c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // database may differ. This happens because we capture the title when the
39c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // body is captured, and don't update it later.
40c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    string16 title;
41c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
42c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Time the page that was returned was visited.
43c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    base::Time time;
44c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
45c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Identifies any found matches in the title of the document. These are not
46c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // included in the snippet.
47c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Snippet::MatchPositions title_match_positions;
48c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
49c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    // Snippet of the match we generated from the body.
50c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch    Snippet snippet;
51c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  };
52c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
53c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Note: You must call init which must succeed before using this class.
54c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
55c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Computes the matches for the query, returning results in decreasing order
56c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // of visit time.
57c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
58c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // This function will attach the new database to the given database
59c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // connection. This allows one sqlite3 object to share many TextDatabases,
60c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // meaning that they will all share the same cache, which allows us to limit
61c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the total size that text indexing databasii can take up.
62c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
63c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // |file_name| is the name of the file on disk.
64c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
65c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // ID is the identifier for the database. It should uniquely identify it among
66c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // other databases on disk and in the sqlite connection.
67c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
68c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // |allow_create| indicates if we want to allow creation of the file if it
69c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // doesn't exist. For files associated with older time periods, we don't want
70c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // to create them if they don't exist, so this flag would be false.
71c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  TextDatabase(const FilePath& path,
72c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch               DBIdent id,
73c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch               bool allow_create);
74c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  ~TextDatabase();
75c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
76c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Initializes the database connection and creates the file if the class
77c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // was created with |allow_create|. If the file couldn't be opened or
78c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // created, this will return false. No other functions should be called
79c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // after this.
80c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool Init();
81c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
82c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Allows updates to be batched. This gives higher performance when multiple
83c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // updates are happening because every insert doesn't require a sync to disk.
84c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Transactions can be nested, only the outermost one will actually count.
85c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void BeginTransaction();
86c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void CommitTransaction();
87c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
88c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // For testing, returns the file name of the database so it can be deleted
89c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // after the test. This is valid even before Init() is called.
90c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const FilePath& file_name() const { return file_name_; }
91c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
92c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Returns a NULL-terminated string that is the base of history index files,
93c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // which is the part before the database identifier. For example
94c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // "History Index *". This is for finding existing database files.
95c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static const FilePath::CharType* file_base();
96c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
97c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Converts a filename on disk (optionally including a path) to a database
98c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // identifier. If the filename doesn't have the correct format, returns 0.
99c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static DBIdent FileNameToID(const FilePath& file_path);
100c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
101c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Changing operations -------------------------------------------------------
102c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
103c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Adds the given data to the page. Returns true on success. The data should
104c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // already be converted to UTF-8.
105c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool AddPageData(base::Time time,
106c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   const std::string& url,
107c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   const std::string& title,
108c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                   const std::string& contents);
109c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
110c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Deletes the indexed data exactly matching the given URL/time pair.
111c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void DeletePageData(base::Time time, const std::string& url);
112c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
113c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Optimizes the tree inside the database. This will, in addition to making
114c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // access faster, remove any deleted data from the database (normally it is
115c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // added again as "removed" and it is manually cleaned up when it decides to
116c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // optimize it naturally). It is bad for privacy if a user is deleting a
117c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // page from history but it still exists in the full text database in some
118c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // form. This function will clean that up.
119c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void Optimize();
120c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
121c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Querying ------------------------------------------------------------------
122c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
123c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Executes the given query. See QueryOptions for more info on input.
124c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
125c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // The results are appended to any existing ones in |*results|, and the first
126c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // time considered for the output is in |first_time_searched|
127c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // (see QueryResults for more).
128c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
129c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Any URLs found will be added to |unique_urls|. If a URL is already in the
130c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // set, additional results will not be added (giving the ability to uniquify
131c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // URL results).
132c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  //
133c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Callers must run QueryParser on the user text and pass the results of the
134c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // QueryParser to this method as the query string.
135c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  void GetTextMatches(const std::string& query,
136c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                      const QueryOptions& options,
137c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                      std::vector<Match>* results,
138c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                      URLSet* unique_urls,
139c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch                      base::Time* first_time_searched);
140c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
141c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Converts the given database identifier to a filename. This does not include
142c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // the path, just the file and extension.
143c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  static FilePath IDToFileName(DBIdent id);
144c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
145c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch private:
146c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Ensures that the tables and indices are created. Returns true on success.
147c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  bool CreateTables();
148c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
149c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // The sql database. Not valid until Init is called.
150c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::Connection db_;
151c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
152c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const FilePath path_;
153c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const DBIdent ident_;
154c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  const bool allow_create_;
155c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
156c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  // Full file name of the file on disk, computed in Init().
157c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  FilePath file_name_;
158c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
159c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  sql::MetaTable meta_table_;
160c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
161c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch  DISALLOW_COPY_AND_ASSIGN(TextDatabase);
162c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch};
163c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
164c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch}  // namespace history
165c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch
166c407dc5cd9bdc5668497f21b26b09d988ab439deBen Murdoch#endif  // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
167