1// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
6#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
7#pragma once
8
9#include <set>
10#include <vector>
11
12#include "app/sql/connection.h"
13#include "app/sql/meta_table.h"
14#include "base/basictypes.h"
15#include "base/file_path.h"
16#include "base/string16.h"
17#include "chrome/browser/history/history_types.h"
18#include "googleurl/src/gurl.h"
19
20namespace history {
21
22// Encapsulation of a full-text indexed database file.
23class TextDatabase {
24 public:
25  typedef int DBIdent;
26
27  typedef std::set<GURL> URLSet;
28
29  // Returned from the search function.
30  struct Match {
31    Match();
32    ~Match();
33
34    // URL of the match.
35    GURL url;
36
37    // The title is returned because the title in the text database and the URL
38    // database may differ. This happens because we capture the title when the
39    // body is captured, and don't update it later.
40    string16 title;
41
42    // Time the page that was returned was visited.
43    base::Time time;
44
45    // Identifies any found matches in the title of the document. These are not
46    // included in the snippet.
47    Snippet::MatchPositions title_match_positions;
48
49    // Snippet of the match we generated from the body.
50    Snippet snippet;
51  };
52
53  // Note: You must call init which must succeed before using this class.
54  //
55  // Computes the matches for the query, returning results in decreasing order
56  // of visit time.
57  //
58  // This function will attach the new database to the given database
59  // connection. This allows one sqlite3 object to share many TextDatabases,
60  // meaning that they will all share the same cache, which allows us to limit
61  // the total size that text indexing databasii can take up.
62  //
63  // |file_name| is the name of the file on disk.
64  //
65  // ID is the identifier for the database. It should uniquely identify it among
66  // other databases on disk and in the sqlite connection.
67  //
68  // |allow_create| indicates if we want to allow creation of the file if it
69  // doesn't exist. For files associated with older time periods, we don't want
70  // to create them if they don't exist, so this flag would be false.
71  TextDatabase(const FilePath& path,
72               DBIdent id,
73               bool allow_create);
74  ~TextDatabase();
75
76  // Initializes the database connection and creates the file if the class
77  // was created with |allow_create|. If the file couldn't be opened or
78  // created, this will return false. No other functions should be called
79  // after this.
80  bool Init();
81
82  // Allows updates to be batched. This gives higher performance when multiple
83  // updates are happening because every insert doesn't require a sync to disk.
84  // Transactions can be nested, only the outermost one will actually count.
85  void BeginTransaction();
86  void CommitTransaction();
87
88  // For testing, returns the file name of the database so it can be deleted
89  // after the test. This is valid even before Init() is called.
90  const FilePath& file_name() const { return file_name_; }
91
92  // Returns a NULL-terminated string that is the base of history index files,
93  // which is the part before the database identifier. For example
94  // "History Index *". This is for finding existing database files.
95  static const FilePath::CharType* file_base();
96
97  // Converts a filename on disk (optionally including a path) to a database
98  // identifier. If the filename doesn't have the correct format, returns 0.
99  static DBIdent FileNameToID(const FilePath& file_path);
100
101  // Changing operations -------------------------------------------------------
102
103  // Adds the given data to the page. Returns true on success. The data should
104  // already be converted to UTF-8.
105  bool AddPageData(base::Time time,
106                   const std::string& url,
107                   const std::string& title,
108                   const std::string& contents);
109
110  // Deletes the indexed data exactly matching the given URL/time pair.
111  void DeletePageData(base::Time time, const std::string& url);
112
113  // Optimizes the tree inside the database. This will, in addition to making
114  // access faster, remove any deleted data from the database (normally it is
115  // added again as "removed" and it is manually cleaned up when it decides to
116  // optimize it naturally). It is bad for privacy if a user is deleting a
117  // page from history but it still exists in the full text database in some
118  // form. This function will clean that up.
119  void Optimize();
120
121  // Querying ------------------------------------------------------------------
122
123  // Executes the given query. See QueryOptions for more info on input.
124  //
125  // The results are appended to any existing ones in |*results|, and the first
126  // time considered for the output is in |first_time_searched|
127  // (see QueryResults for more).
128  //
129  // Any URLs found will be added to |unique_urls|. If a URL is already in the
130  // set, additional results will not be added (giving the ability to uniquify
131  // URL results).
132  //
133  // Callers must run QueryParser on the user text and pass the results of the
134  // QueryParser to this method as the query string.
135  void GetTextMatches(const std::string& query,
136                      const QueryOptions& options,
137                      std::vector<Match>* results,
138                      URLSet* unique_urls,
139                      base::Time* first_time_searched);
140
141  // Converts the given database identifier to a filename. This does not include
142  // the path, just the file and extension.
143  static FilePath IDToFileName(DBIdent id);
144
145 private:
146  // Ensures that the tables and indices are created. Returns true on success.
147  bool CreateTables();
148
149  // The sql database. Not valid until Init is called.
150  sql::Connection db_;
151
152  const FilePath path_;
153  const DBIdent ident_;
154  const bool allow_create_;
155
156  // Full file name of the file on disk, computed in Init().
157  FilePath file_name_;
158
159  sql::MetaTable meta_table_;
160
161  DISALLOW_COPY_AND_ASSIGN(TextDatabase);
162};
163
164}  // namespace history
165
166#endif  // CHROME_BROWSER_HISTORY_TEXT_DATABASE_H_
167