1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
6#define CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
7#pragma once
8
9#include <set>
10#include <vector>
11
12#include "base/basictypes.h"
13#include "base/file_path.h"
14#include "base/gtest_prod_util.h"
15#include "base/string16.h"
16#include "base/task.h"
17#include "chrome/browser/history/history_types.h"
18#include "chrome/browser/history/text_database.h"
19#include "chrome/browser/history/query_parser.h"
20#include "chrome/browser/history/url_database.h"
21#include "content/common/mru_cache.h"
22
23namespace history {
24
25class HistoryPublisher;
26class VisitDatabase;
27
28// Manages a set of text databases representing different time periods. This
29// will page them in and out as necessary, and will manage queries for times
30// spanning multiple databases.
31//
32// It will also keep a list of partial changes, such as page adds and title and
33// body sets, all of which come in at different times for a given page. When
34// all data is received or enough time has elapsed since adding, the indexed
35// data will be comitted.
36//
37// This allows us to minimize inserts and modifications, which are slow for the
38// full text database, since each page's information is added exactly once.
39//
40// Note: be careful to delete the relevant entries from this uncommitted list
41// when clearing history or this information may get added to the database soon
42// after the clear.
43class TextDatabaseManager {
44 public:
45  // Tracks a set of changes (only deletes need to be supported now) to the
46  // databases. This is opaque to the caller, but allows it to pass back a list
47  // of all database that it has caused a change to.
48  //
49  // This is necessary for the feature where we optimize full text databases
50  // which have changed as a result of the user deleting history via
51  // OptimizeChangedDatabases. We want to do each affected database only once at
52  // the end of the delete, but we don't want the caller to have to worry about
53  // our internals.
54  class ChangeSet {
55   public:
56    ChangeSet();
57    ~ChangeSet();
58
59   private:
60    friend class TextDatabaseManager;
61
62    typedef std::set<TextDatabase::DBIdent> DBSet;
63
64    void Add(TextDatabase::DBIdent id) { changed_databases_.insert(id); }
65
66    DBSet changed_databases_;
67  };
68
69  // You must call Init() to complete initialization.
70  //
71  // |dir| is the directory that will hold the full text database files (there
72  // will be many files named by their date ranges).
73  //
74  // The visit database is a pointer owned by the caller for the main database
75  // (of recent visits). The visit database will be updated to refer to the
76  // added text database entries.
77  explicit TextDatabaseManager(const FilePath& dir,
78                               URLDatabase* url_database,
79                               VisitDatabase* visit_database);
80  ~TextDatabaseManager();
81
82  // Must call before using other functions. If it returns false, no other
83  // functions should be called.
84  bool Init(const HistoryPublisher* history_publisher);
85
86  // Returns the directory that holds the full text database files.
87  const FilePath& GetDir() { return dir_; }
88
89  // Allows scoping updates. This also allows things to go faster since every
90  // page add doesn't need to be committed to disk (slow). Note that files will
91  // still get created during a transaction.
92  void BeginTransaction();
93  void CommitTransaction();
94
95  // Sets specific information for the given page to be added to the database.
96  // In normal operation, URLs will be added as the user visits them, the titles
97  // and bodies will come in some time after that. These changes will be
98  // automatically coalesced and added to the database some time in the future
99  // using AddPageData().
100  //
101  // AddPageURL must be called for a given URL (+ its corresponding ID) before
102  // either the title or body set. The visit ID specifies the visit that will
103  // get updated to refer to the full text indexed information. The visit time
104  // should be the time corresponding to that visit in the database.
105  void AddPageURL(const GURL& url, URLID url_id, VisitID visit_id,
106                  base::Time visit_time);
107  void AddPageTitle(const GURL& url, const string16& title);
108  void AddPageContents(const GURL& url, const string16& body);
109
110  // Adds the given data to the appropriate database file, returning true on
111  // success. The visit database row identified by |visit_id| will be updated
112  // to refer to the full text index entry. If the visit ID is 0, the visit
113  // database will not be updated.
114  bool AddPageData(const GURL& url,
115                   URLID url_id,
116                   VisitID visit_id,
117                   base::Time visit_time,
118                   const string16& title,
119                   const string16& body);
120
121  // Deletes the instance of indexed data identified by the given time and URL.
122  // Any changes will be tracked in the optional change set for use when calling
123  // OptimizeChangedDatabases later. change_set can be NULL.
124  void DeletePageData(base::Time time, const GURL& url,
125                      ChangeSet* change_set);
126
127  // The text database manager keeps a list of changes that are made to the
128  // file AddPageURL/Title/Body that may not be committed to the database yet.
129  // This function removes entires from this list happening between the given
130  // time range. It is called when the user clears their history for a time
131  // range, and we don't want any of our data to "leak." If restrict_urls is
132  // not empty, only changes on those URLs are deleted.
133  //
134  // Either or both times my be is_null to be unbounded in that direction. When
135  // non-null, the range is [begin, end).
136  void DeleteFromUncommitted(const std::set<GURL>& restrict_urls,
137                             base::Time begin, base::Time end);
138
139  // Deletes all full text search data by removing the files from the disk.
140  // This must be called OUTSIDE of a transaction since it actually deletes the
141  // files rather than messing with the database.
142  void DeleteAll();
143
144  // Calls optimize on all the databases identified in a given change set (see
145  // the definition of ChangeSet above for more). Optimizing means that old data
146  // will be removed rather than marked unused.
147  void OptimizeChangedDatabases(const ChangeSet& change_set);
148
149  // Executes the given query. See QueryOptions for more info on input.
150  //
151  // The results are filled into |results|, and the first time considered for
152  // the output is in |first_time_searched| (see QueryResults for more).
153  //
154  // This function will return more than one match per URL if there is more than
155  // one entry for that URL in the database.
156  void GetTextMatches(const string16& query,
157                      const QueryOptions& options,
158                      std::vector<TextDatabase::Match>* results,
159                      base::Time* first_time_searched);
160
161 private:
162  // These tests call ExpireRecentChangesForTime to force expiration.
163  FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, InsertPartial);
164  FRIEND_TEST_ALL_PREFIXES(TextDatabaseManagerTest, PartialComplete);
165  FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, DeleteURLAndFavicon);
166  FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest, FlushRecentURLsUnstarred);
167  FRIEND_TEST_ALL_PREFIXES(ExpireHistoryTest,
168                           FlushRecentURLsUnstarredRestricted);
169
170  // Stores "recent stuff" that has happened with the page, since the page
171  // visit, title, and body all come in at different times.
172  class PageInfo {
173   public:
174    PageInfo(URLID url_id, VisitID visit_id, base::Time visit_time);
175    ~PageInfo();
176
177    // Getters.
178    URLID url_id() const { return url_id_; }
179    VisitID visit_id() const { return visit_id_; }
180    base::Time visit_time() const { return visit_time_; }
181    const string16& title() const { return title_; }
182    const string16& body() const { return body_; }
183
184    // Setters, we can only update the title and body.
185    void set_title(const string16& ttl);
186    void set_body(const string16& bdy);
187
188    // Returns true if both the title or body of the entry has been set. Since
189    // both the title and body setters will "fix" empty strings to be a space,
190    // these indicate if the setter was ever called.
191    bool has_title() const { return !title_.empty(); }
192    bool has_body() { return !body_.empty(); }
193
194    // Returns true if this entry was added too long ago and we should give up
195    // waiting for more data. The current time is passed in as an argument so we
196    // can check many without re-querying the timer.
197    bool Expired(base::TimeTicks now) const;
198
199   private:
200    URLID url_id_;
201    VisitID visit_id_;
202
203    // Time of the visit of the URL. This will be the value stored in the URL
204    // and visit tables for the entry.
205    base::Time visit_time_;
206
207    // When this page entry was created. We have a cap on the maximum time that
208    // an entry will be in the queue before being flushed to the database.
209    base::TimeTicks added_time_;
210
211    // Will be the string " " when they are set to distinguish set and unset.
212    string16 title_;
213    string16 body_;
214  };
215
216  // Converts the given time to a database identifier or vice-versa.
217  static TextDatabase::DBIdent TimeToID(base::Time time);
218  static base::Time IDToTime(TextDatabase::DBIdent id);
219
220  // Returns a text database for the given identifier or time. This file will
221  // be created if it doesn't exist and |for_writing| is set. On error,
222  // including the case where the file doesn't exist and |for_writing|
223  // is false, it will return NULL.
224  //
225  // When |for_writing| is set, a transaction on the database will be opened
226  // if there is a transaction open on this manager.
227  //
228  // The pointer will be tracked in the cache. The caller should not store it
229  // or delete it since it will get automatically deleted as necessary.
230  TextDatabase* GetDB(TextDatabase::DBIdent id, bool for_writing);
231  TextDatabase* GetDBForTime(base::Time time, bool for_writing);
232
233  // Populates the present_databases_ list based on which files are on disk.
234  // When the list is already initialized, this will do nothing, so you can
235  // call it whenever you want to ensure the present_databases_ set is filled.
236  void InitDBList();
237
238  // Schedules a call to ExpireRecentChanges in the future.
239  void ScheduleFlushOldChanges();
240
241  // Checks the recent_changes_ list and commits partial data that has been
242  // around too long.
243  void FlushOldChanges();
244
245  // Given "now," this will expire old things from the recent_changes_ list.
246  // This is used as the backend for FlushOldChanges and is called directly
247  // by the unit tests with fake times.
248  void FlushOldChangesForTime(base::TimeTicks now);
249
250  // Directory holding our index files.
251  const FilePath dir_;
252
253  // Non-owning pointers to the recent history databases for URLs and visits.
254  URLDatabase* url_database_;
255  VisitDatabase* visit_database_;
256
257  // Lists recent additions that we have not yet filled out with the title and
258  // body. Sorted by time, we will flush them when they are complete or have
259  // been in the queue too long without modification.
260  //
261  // We kind of abuse the MRUCache because we never move things around in it
262  // using Get. Instead, we keep them in the order they were inserted, since
263  // this is the metric we use to measure age. The MRUCache gives us an ordered
264  // list with fast lookup by URL.
265  typedef MRUCache<GURL, PageInfo> RecentChangeList;
266  RecentChangeList recent_changes_;
267
268  // Nesting levels of transactions. Since sqlite only allows one open
269  // transaction, we simulate nested transactions by mapping the outermost one
270  // to a real transaction. Since this object never needs to do ROLLBACK, losing
271  // the ability for all transactions to rollback is inconsequential.
272  int transaction_nesting_;
273
274  // The cache owns the TextDatabase pointers, they will be automagically
275  // deleted when the cache entry is removed or expired.
276  typedef OwningMRUCache<TextDatabase::DBIdent, TextDatabase*> DBCache;
277  DBCache db_cache_;
278
279  // Tells us about the existence of database files on disk. All existing
280  // databases will be in here, and non-existant ones will not, so we don't
281  // have to check the disk every time.
282  //
283  // This set is populated LAZILY by InitDBList(), you should call that function
284  // before accessing the list.
285  //
286  // Note that iterators will work on the keys in-order. Normally, reverse
287  // iterators will be used to iterate the keys in reverse-order.
288  typedef std::set<TextDatabase::DBIdent> DBIdentSet;
289  DBIdentSet present_databases_;
290  bool present_databases_loaded_;  // Set by InitDBList when populated.
291
292  // Lists all databases with open transactions. These will have to be closed
293  // when the transaction is committed.
294  DBIdentSet open_transactions_;
295
296  QueryParser query_parser_;
297
298  // Generates tasks for our periodic checking of expired "recent changes".
299  ScopedRunnableMethodFactory<TextDatabaseManager> factory_;
300
301  // This object is created and managed by the history backend. We maintain an
302  // opaque pointer to the object for our use.
303  // This can be NULL if there are no indexers registered to receive indexing
304  // data from us.
305  const HistoryPublisher* history_publisher_;
306
307  DISALLOW_COPY_AND_ASSIGN(TextDatabaseManager);
308};
309
310}  // namespace history
311
312#endif  // CHROME_BROWSER_HISTORY_TEXT_DATABASE_MANAGER_H_
313