1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
6#define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
7
8#if defined(OS_WIN)
9#include <windows.h>
10#endif
11#include <set>
12#include <vector>
13
14#include "base/callback.h"
15#include "base/callback_forward.h"
16#include "base/files/file_path.h"
17#include "base/gtest_prod_util.h"
18#include "base/memory/shared_memory.h"
19#include "base/threading/sequenced_worker_pool.h"
20#include "components/visitedlink/common/visitedlink_common.h"
21
22#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
23#include "base/logging.h"
24#endif
25
26class GURL;
27
28namespace content {
29class BrowserContext;
30}
31
32namespace visitedlink {
33
34class VisitedLinkDelegate;
35
36// Controls the link coloring database. The master controls all writing to the
37// database as well as disk I/O. There should be only one master.
38//
39// This class will defer writing operations to the file thread. This means that
40// class destruction, the file may still be open since operations are pending on
41// another thread.
42class VisitedLinkMaster : public VisitedLinkCommon {
43 public:
44  // Listens to the link coloring database events. The master is given this
45  // event as a constructor argument and dispatches events using it.
46  class Listener {
47   public:
48    virtual ~Listener() {}
49
50    // Called when link coloring database has been created or replaced. The
51    // argument is the new table handle.
52    virtual void NewTable(base::SharedMemory*) = 0;
53
54    // Called when new link has been added. The argument is the fingerprint
55    // (hash) of the link.
56    virtual void Add(Fingerprint fingerprint) = 0;
57
58    // Called when link coloring state has been reset. This may occur when
59    // entire or parts of history were deleted.
60    virtual void Reset() = 0;
61  };
62
63  VisitedLinkMaster(content::BrowserContext* browser_context,
64                    VisitedLinkDelegate* delegate,
65                    bool persist_to_disk);
66
67  // In unit test mode, we allow the caller to optionally specify the database
68  // filename so that it can be run from a unit test. The directory where this
69  // file resides must exist in this mode. You can also specify the default
70  // table size to test table resizing. If this parameter is 0, we will use the
71  // defaults.
72  //
73  // In the unit test mode, we also allow the caller to provide a history
74  // service pointer (the history service can't be fetched from the browser
75  // process when we're in unit test mode). This can be NULL to try to access
76  // the main version, which will probably fail (which can be good for testing
77  // this failure mode).
78  //
79  // When |suppress_rebuild| is set, we'll not attempt to load data from
80  // history if the file can't be loaded. This should generally be set for
81  // testing except when you want to test the rebuild process explicitly.
82  VisitedLinkMaster(Listener* listener,
83                    VisitedLinkDelegate* delegate,
84                    bool persist_to_disk,
85                    bool suppress_rebuild,
86                    const base::FilePath& filename,
87                    int32 default_table_size);
88  virtual ~VisitedLinkMaster();
89
90  // Must be called immediately after object creation. Nothing else will work
91  // until this is called. Returns true on success, false means that this
92  // object won't work.
93  bool Init();
94
95  base::SharedMemory* shared_memory() { return shared_memory_; }
96
97  // Adds a URL to the table.
98  void AddURL(const GURL& url);
99
100  // Adds a set of URLs to the table.
101  void AddURLs(const std::vector<GURL>& url);
102
103  // See DeleteURLs.
104  class URLIterator {
105   public:
106    // HasNextURL must return true when this is called. Returns the next URL
107    // then advances the iterator. Note that the returned reference is only
108    // valid until the next call of NextURL.
109    virtual const GURL& NextURL() = 0;
110
111    // Returns true if still has URLs to be iterated.
112    virtual bool HasNextURL() const = 0;
113
114   protected:
115    virtual ~URLIterator() {}
116  };
117
118  // Deletes the specified URLs from |rows| from the table.
119  void DeleteURLs(URLIterator* iterator);
120
121  // Clears the visited links table by deleting the file from disk. Used as
122  // part of history clearing.
123  void DeleteAllURLs();
124
125  // Returns the Delegate of this Master.
126  VisitedLinkDelegate* GetDelegate();
127
128#if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
129  // This is a debugging function that can be called to double-check internal
130  // data structures. It will assert if the check fails.
131  void DebugValidate();
132
133  // Sets a task to execute when the next rebuild from history is complete.
134  // This is used by unit tests to wait for the rebuild to complete before
135  // they continue. The pointer will be owned by this object after the call.
136  void set_rebuild_complete_task(const base::Closure& task) {
137    DCHECK(rebuild_complete_task_.is_null());
138    rebuild_complete_task_ = task;
139  }
140
141  // returns the number of items in the table for testing verification
142  int32 GetUsedCount() const {
143    return used_items_;
144  }
145
146  // Returns the listener.
147  VisitedLinkMaster::Listener* GetListener() const {
148    return listener_.get();
149  }
150
151  // Call to cause the entire database file to be re-written from scratch
152  // to disk. Used by the performance tester.
153  void RewriteFile() {
154    WriteFullTable();
155  }
156#endif
157
158 private:
159  FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
160  FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
161  FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
162
163  // Object to rebuild the table on the history thread (see the .cc file).
164  class TableBuilder;
165
166  // Byte offsets of values in the header.
167  static const int32 kFileHeaderSignatureOffset;
168  static const int32 kFileHeaderVersionOffset;
169  static const int32 kFileHeaderLengthOffset;
170  static const int32 kFileHeaderUsedOffset;
171  static const int32 kFileHeaderSaltOffset;
172
173  // The signature at the beginning of a file.
174  static const int32 kFileSignature;
175
176  // version of the file format this module currently uses
177  static const int32 kFileCurrentVersion;
178
179  // Bytes in the file header, including the salt.
180  static const size_t kFileHeaderSize;
181
182  // When creating a fresh new table, we use this many entries.
183  static const unsigned kDefaultTableSize;
184
185  // When the user is deleting a boatload of URLs, we don't really want to do
186  // individual writes for each of them. When the count exceeds this threshold,
187  // we will write the whole table to disk at once instead of individual items.
188  static const size_t kBigDeleteThreshold;
189
190  // Backend for the constructors initializing the members.
191  void InitMembers();
192
193  // If a rebuild is in progress, we save the URL in the temporary list.
194  // Otherwise, we add this to the table. Returns the index of the
195  // inserted fingerprint or null_hash_ on failure.
196  Hash TryToAddURL(const GURL& url);
197
198  // File I/O functions
199  // ------------------
200  // These functions are only called if |persist_to_disk_| is true.
201
202  // Posts the given task to the blocking worker pool with our options.
203  void PostIOTask(const tracked_objects::Location& from_here,
204                  const base::Closure& task);
205
206  // Writes the entire table to disk. It will leave the table file open and
207  // the handle to it will be stored in file_.
208  void WriteFullTable();
209
210  // Try to load the table from the database file. If the file doesn't exist or
211  // is corrupt, this will return failure.
212  bool InitFromFile();
213
214  // Reads the header of the link coloring database from disk. Assumes the
215  // file pointer is at the beginning of the file and that there are no pending
216  // asynchronous I/O operations.
217  //
218  // Returns true on success and places the size of the table in num_entries
219  // and the number of nonzero fingerprints in used_count. This will fail if
220  // the version of the file is not the current version of the database.
221  bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
222                      uint8 salt[LINK_SALT_LENGTH]);
223
224  // Fills *filename with the name of the link database filename
225  bool GetDatabaseFileName(base::FilePath* filename);
226
227  // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
228  // the write to a background thread.
229  void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size);
230
231  // Helper function to schedule and asynchronous write of the used count to
232  // disk (this is a common operation).
233  void WriteUsedItemCountToFile();
234
235  // Helper function to schedule an asynchronous write of the given range of
236  // hash functions to disk. The range is inclusive on both ends. The range can
237  // wrap around at 0 and this function will handle it.
238  void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
239
240  // Synchronous read from the file. Assumes there are no pending asynchronous
241  // I/O functions. Returns true if the entire buffer was successfully filled.
242  bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
243
244  // General table handling
245  // ----------------------
246
247  // Called to add a fingerprint to the table. If |send_notifications| is true
248  // and the item is added successfully, Listener::Add will be invoked.
249  // Returns the index of the inserted fingerprint or null_hash_ if there was a
250  // duplicate and this item was skippped.
251  Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
252
253  // Deletes all fingerprints from the given vector from the current hash table
254  // and syncs it to disk if there are changes. This does not update the
255  // deleted_since_rebuild_ list, the caller must update this itself if there
256  // is an update pending.
257  void DeleteFingerprintsFromCurrentTable(
258      const std::set<Fingerprint>& fingerprints);
259
260  // Removes the indicated fingerprint from the table. If the update_file flag
261  // is set, the changes will also be written to disk. Returns true if the
262  // fingerprint was deleted, false if it was not in the table to delete.
263  bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
264
265  // Creates a new empty table, call if InitFromFile() fails. Normally, when
266  // |suppress_rebuild| is false, the table will be rebuilt from history,
267  // keeping us in sync. When |suppress_rebuild| is true, the new table will be
268  // empty and we will not consult history. This is used when clearing the
269  // database and for unit tests.
270  bool InitFromScratch(bool suppress_rebuild);
271
272  // Allocates the Fingerprint structure and length. When init_to_empty is set,
273  // the table will be filled with 0s and used_items_ will be set to 0 as well.
274  // If the flag is not set, these things are untouched and it is the
275  // responsibility of the caller to fill them (like when we are reading from
276  // a file).
277  bool CreateURLTable(int32 num_entries, bool init_to_empty);
278
279  // A wrapper for CreateURLTable, this will allocate a new table, initialized
280  // to empty. The caller is responsible for saving the shared memory pointer
281  // and handles before this call (they will be replaced with new ones) and
282  // releasing them later. This is designed for callers that make a new table
283  // and then copy values from the old table to the new one, then release the
284  // old table.
285  //
286  // Returns true on success. On failure, the old table will be restored. The
287  // caller should not attemp to release the pointer/handle in this case.
288  bool BeginReplaceURLTable(int32 num_entries);
289
290  // unallocates the Fingerprint table
291  void FreeURLTable();
292
293  // For growing the table. ResizeTableIfNecessary will check to see if the
294  // table should be resized and calls ResizeTable if needed. Returns true if
295  // we decided to resize the table.
296  bool ResizeTableIfNecessary();
297
298  // Resizes the table (growing or shrinking) as necessary to accomodate the
299  // current count.
300  void ResizeTable(int32 new_size);
301
302  // Returns the desired table size for |item_count| URLs.
303  uint32 NewTableSizeForCount(int32 item_count) const;
304
305  // Computes the table load as fraction. For example, if 1/4 of the entries are
306  // full, this value will be 0.25
307  float ComputeTableLoad() const {
308    return static_cast<float>(used_items_) / static_cast<float>(table_length_);
309  }
310
311  // Initializes a rebuild of the visited link database based on the browser
312  // history. This will set table_builder_ while working, and there should not
313  // already be a rebuild in place when called. See the definition for more
314  // details on how this works.
315  //
316  // Returns true on success. Failure means we're not attempting to rebuild
317  // the database because something failed.
318  bool RebuildTableFromDelegate();
319
320  // Callback that the table rebuilder uses when the rebuild is complete.
321  // |success| is true if the fingerprint generation succeeded, in which case
322  // |fingerprints| will contain the computed fingerprints. On failure, there
323  // will be no fingerprints.
324  void OnTableRebuildComplete(bool success,
325                              const std::vector<Fingerprint>& fingerprints);
326
327  // Increases or decreases the given hash value by one, wrapping around as
328  // necessary. Used for probing.
329  inline Hash IncrementHash(Hash hash) {
330    if (hash >= table_length_ - 1)
331      return 0;  // Wrap around.
332    return hash + 1;
333  }
334  inline Hash DecrementHash(Hash hash) {
335    if (hash <= 0)
336      return table_length_ - 1;  // Wrap around.
337    return hash - 1;
338  }
339
340#ifndef NDEBUG
341  // Indicates whether any asynchronous operation has ever been completed.
342  // We do some synchronous reads that require that no asynchronous operations
343  // are pending, yet we don't track whether they have been completed. This
344  // flag is a sanity check that these reads only happen before any
345  // asynchronous writes have been fired.
346  bool posted_asynchronous_operation_;
347#endif
348
349  // Reference to the browser context that this object belongs to
350  // (it knows the path to where the data is stored)
351  content::BrowserContext* browser_context_;
352
353  // Client owns the delegate and is responsible for it being valid through
354  // the life time this VisitedLinkMaster.
355  VisitedLinkDelegate* delegate_;
356
357  // VisitedLinkEventListener to handle incoming events.
358  scoped_ptr<Listener> listener_;
359
360  // Lazily initialized sequence token for posting file tasks.
361  base::SequencedWorkerPool::SequenceToken sequence_token_;
362
363  // When non-NULL, indicates we are in database rebuild mode and points to
364  // the class collecting fingerprint information from the history system.
365  // The pointer is owned by this class, but it must remain valid while the
366  // history query is running. We must only delete it when the query is done.
367  scoped_refptr<TableBuilder> table_builder_;
368
369  // Indicates URLs added and deleted since we started rebuilding the table.
370  std::set<Fingerprint> added_since_rebuild_;
371  std::set<Fingerprint> deleted_since_rebuild_;
372
373  // TODO(brettw) Support deletion, we need to track whether anything was
374  // deleted during the rebuild here. Then we should delete any of these
375  // entries from the complete table later.
376  // std::vector<Fingerprint> removed_since_rebuild_;
377
378  // The currently open file with the table in it. This may be NULL if we're
379  // rebuilding and haven't written a new version yet or if |persist_to_disk_|
380  // is false. Writing to the file may be safely ignored in this case. Also
381  // |file_| may be non-NULL but point to a NULL pointer. That would mean that
382  // opening of the file is already scheduled in a background thread and any
383  // writing to the file can also be scheduled to the background thread as it's
384  // guaranteed to be executed after the opening.
385  // The class owns both the |file_| pointer and the pointer pointed
386  // by |*file_|.
387  FILE** file_;
388
389  // If true, will try to persist the hash table to disk. Will rebuild from
390  // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
391  bool persist_to_disk_;
392
393  // Shared memory consists of a SharedHeader followed by the table.
394  base::SharedMemory *shared_memory_;
395
396  // When we generate new tables, we increment the serial number of the
397  // shared memory object.
398  int32 shared_memory_serial_;
399
400  // Number of non-empty items in the table, used to compute fullness.
401  int32 used_items_;
402
403  // Testing values -----------------------------------------------------------
404  //
405  // The following fields exist for testing purposes. They are not used in
406  // release builds. It'd be nice to eliminate them in release builds, but we
407  // don't want to change the signature of the object between the unit test and
408  // regular builds. Instead, we just have "default" values that these take
409  // in release builds that give "regular" behavior.
410
411  // Overridden database file name for testing
412  base::FilePath database_name_override_;
413
414  // When nonzero, overrides the table size for new databases for testing
415  int32 table_size_override_;
416
417  // When set, indicates the task that should be run after the next rebuild from
418  // history is complete.
419  base::Closure rebuild_complete_task_;
420
421  // Set to prevent us from attempting to rebuild the database from global
422  // history if we have an error opening the file. This is used for testing,
423  // will be false in production.
424  bool suppress_rebuild_;
425
426  DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
427};
428
429// NOTE: These methods are defined inline here, so we can share the compilation
430//       of visitedlink_master.cc between the browser and the unit/perf tests.
431
432#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
433inline void VisitedLinkMaster::DebugValidate() {
434  int32 used_count = 0;
435  for (int32 i = 0; i < table_length_; i++) {
436    if (hash_table_[i])
437      used_count++;
438  }
439  DCHECK_EQ(used_count, used_items_);
440}
441#endif
442
443}  // namespace visitedlink
444
445#endif  // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
446