1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
6#define CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
7#pragma once
8
9#if defined(OS_WIN)
10#include <windows.h>
11#endif
12#include <set>
13#include <vector>
14
15#include "base/file_path.h"
16#include "base/gtest_prod_util.h"
17#include "base/memory/ref_counted.h"
18#include "base/shared_memory.h"
19#include "chrome/browser/history/history.h"
20#include "chrome/common/visitedlink_common.h"
21
22class GURL;
23class Profile;
24
25// Controls the link coloring database. The master controls all writing to the
26// database as well as disk I/O. There should be only one master.
27//
28// This class will defer writing operations to the file thread. This means that
29// class destruction, the file may still be open since operations are pending on
30// another thread.
31class VisitedLinkMaster : public VisitedLinkCommon {
32 public:
33  // Listens to the link coloring database events. The master is given this
34  // event as a constructor argument and dispatches events using it.
35  class Listener {
36   public:
37    virtual ~Listener() {}
38
39    // Called when link coloring database has been created or replaced. The
40    // argument is the new table handle.
41    virtual void NewTable(base::SharedMemory*) = 0;
42
43    // Called when new link has been added. The argument is the fingerprint
44    // (hash) of the link.
45    virtual void Add(Fingerprint fingerprint) = 0;
46
47    // Called when link coloring state has been reset. This may occur when
48    // entire or parts of history were deleted.
49    virtual void Reset() = 0;
50  };
51
52  // The |listener| may not be NULL.
53  VisitedLinkMaster(Listener* listener, Profile* profile);
54
55  // In unit test mode, we allow the caller to optionally specify the database
56  // filename so that it can be run from a unit test. The directory where this
57  // file resides must exist in this mode. You can also specify the default
58  // table size to test table resizing. If this parameter is 0, we will use the
59  // defaults.
60  //
61  // In the unit test mode, we also allow the caller to provide a history
62  // service pointer (the history service can't be fetched from the browser
63  // process when we're in unit test mode). This can be NULL to try to access
64  // the main version, which will probably fail (which can be good for testing
65  // this failure mode).
66  //
67  // When |suppress_rebuild| is set, we'll not attempt to load data from
68  // history if the file can't be loaded. This should generally be set for
69  // testing except when you want to test the rebuild process explicitly.
70  VisitedLinkMaster(Listener* listener,
71                    HistoryService* history_service,
72                    bool suppress_rebuild,
73                    const FilePath& filename,
74                    int32 default_table_size);
75  virtual ~VisitedLinkMaster();
76
77  // Must be called immediately after object creation. Nothing else will work
78  // until this is called. Returns true on success, false means that this
79  // object won't work.
80  bool Init();
81
82  base::SharedMemory* shared_memory() { return shared_memory_; }
83
84  // Adds a URL to the table.
85  void AddURL(const GURL& url);
86
87  // Adds a set of URLs to the table.
88  void AddURLs(const std::vector<GURL>& url);
89
90  // Deletes the specified URLs from the table.
91  void DeleteURLs(const std::set<GURL>& urls);
92
93  // Clears the visited links table by deleting the file from disk. Used as
94  // part of history clearing.
95  void DeleteAllURLs();
96
97#if defined(UNIT_TEST) || !defined(NDEBUG) || defined(PERF_TEST)
98  // This is a debugging function that can be called to double-check internal
99  // data structures. It will assert if the check fails.
100  void DebugValidate();
101
102  // Sets a task to execute when the next rebuild from history is complete.
103  // This is used by unit tests to wait for the rebuild to complete before
104  // they continue. The pointer will be owned by this object after the call.
105  void set_rebuild_complete_task(Task* task) {
106    DCHECK(!rebuild_complete_task_.get());
107    rebuild_complete_task_.reset(task);
108  }
109
110  // returns the number of items in the table for testing verification
111  int32 GetUsedCount() const {
112    return used_items_;
113  }
114
115  // Call to cause the entire database file to be re-written from scratch
116  // to disk. Used by the performance tester.
117  bool RewriteFile() {
118    return WriteFullTable();
119  }
120#endif
121
122 private:
123  FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
124  FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
125  FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
126
127  // Object to rebuild the table on the history thread (see the .cc file).
128  class TableBuilder;
129
130  // Byte offsets of values in the header.
131  static const int32 kFileHeaderSignatureOffset;
132  static const int32 kFileHeaderVersionOffset;
133  static const int32 kFileHeaderLengthOffset;
134  static const int32 kFileHeaderUsedOffset;
135  static const int32 kFileHeaderSaltOffset;
136
137  // The signature at the beginning of a file.
138  static const int32 kFileSignature;
139
140  // version of the file format this module currently uses
141  static const int32 kFileCurrentVersion;
142
143  // Bytes in the file header, including the salt.
144  static const size_t kFileHeaderSize;
145
146  // When creating a fresh new table, we use this many entries.
147  static const unsigned kDefaultTableSize;
148
149  // When the user is deleting a boatload of URLs, we don't really want to do
150  // individual writes for each of them. When the count exceeds this threshold,
151  // we will write the whole table to disk at once instead of individual items.
152  static const size_t kBigDeleteThreshold;
153
154  // Backend for the constructors initializing the members.
155  void InitMembers(Listener* listener, Profile* profile);
156
157  // If a rebuild is in progress, we save the URL in the temporary list.
158  // Otherwise, we add this to the table. Returns the index of the
159  // inserted fingerprint or null_hash_ on failure.
160  Hash TryToAddURL(const GURL& url);
161
162  // File I/O functions
163  // ------------------
164
165  // Writes the entire table to disk, returning true on success. It will leave
166  // the table file open and the handle to it in file_
167  bool WriteFullTable();
168
169  // Try to load the table from the database file. If the file doesn't exist or
170  // is corrupt, this will return failure.
171  bool InitFromFile();
172
173  // Reads the header of the link coloring database from disk. Assumes the
174  // file pointer is at the beginning of the file and that there are no pending
175  // asynchronous I/O operations.
176  //
177  // Returns true on success and places the size of the table in num_entries
178  // and the number of nonzero fingerprints in used_count. This will fail if
179  // the version of the file is not the current version of the database.
180  bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count,
181                      uint8 salt[LINK_SALT_LENGTH]);
182
183  // Fills *filename with the name of the link database filename
184  bool GetDatabaseFileName(FilePath* filename);
185
186  // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
187  // the write to a background thread.
188  void WriteToFile(FILE* hfile, off_t offset, void* data, int32 data_size);
189
190  // Helper function to schedule and asynchronous write of the used count to
191  // disk (this is a common operation).
192  void WriteUsedItemCountToFile();
193
194  // Helper function to schedule an asynchronous write of the given range of
195  // hash functions to disk. The range is inclusive on both ends. The range can
196  // wrap around at 0 and this function will handle it.
197  void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
198
199  // Synchronous read from the file. Assumes there are no pending asynchronous
200  // I/O functions. Returns true if the entire buffer was successfully filled.
201  bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size);
202
203  // General table handling
204  // ----------------------
205
206  // Called to add a fingerprint to the table. If |send_notifications| is true
207  // and the item is added successfully, Listener::Add will be invoked.
208  // Returns the index of the inserted fingerprint or null_hash_ if there was a
209  // duplicate and this item was skippped.
210  Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
211
212  // Deletes all fingerprints from the given vector from the current hash table
213  // and syncs it to disk if there are changes. This does not update the
214  // deleted_since_rebuild_ list, the caller must update this itself if there
215  // is an update pending.
216  void DeleteFingerprintsFromCurrentTable(
217      const std::set<Fingerprint>& fingerprints);
218
219  // Removes the indicated fingerprint from the table. If the update_file flag
220  // is set, the changes will also be written to disk. Returns true if the
221  // fingerprint was deleted, false if it was not in the table to delete.
222  bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
223
224  // Creates a new empty table, call if InitFromFile() fails. Normally, when
225  // |suppress_rebuild| is false, the table will be rebuilt from history,
226  // keeping us in sync. When |suppress_rebuild| is true, the new table will be
227  // empty and we will not consult history. This is used when clearing the
228  // database and for unit tests.
229  bool InitFromScratch(bool suppress_rebuild);
230
231  // Allocates the Fingerprint structure and length. When init_to_empty is set,
232  // the table will be filled with 0s and used_items_ will be set to 0 as well.
233  // If the flag is not set, these things are untouched and it is the
234  // responsibility of the caller to fill them (like when we are reading from
235  // a file).
236  bool CreateURLTable(int32 num_entries, bool init_to_empty);
237
238  // A wrapper for CreateURLTable, this will allocate a new table, initialized
239  // to empty. The caller is responsible for saving the shared memory pointer
240  // and handles before this call (they will be replaced with new ones) and
241  // releasing them later. This is designed for callers that make a new table
242  // and then copy values from the old table to the new one, then release the
243  // old table.
244  //
245  // Returns true on success. On failure, the old table will be restored. The
246  // caller should not attemp to release the pointer/handle in this case.
247  bool BeginReplaceURLTable(int32 num_entries);
248
249  // unallocates the Fingerprint table
250  void FreeURLTable();
251
252  // For growing the table. ResizeTableIfNecessary will check to see if the
253  // table should be resized and calls ResizeTable if needed. Returns true if
254  // we decided to resize the table.
255  bool ResizeTableIfNecessary();
256
257  // Resizes the table (growing or shrinking) as necessary to accomodate the
258  // current count.
259  void ResizeTable(int32 new_size);
260
261  // Returns the desired table size for |item_count| URLs.
262  uint32 NewTableSizeForCount(int32 item_count) const;
263
264  // Computes the table load as fraction. For example, if 1/4 of the entries are
265  // full, this value will be 0.25
266  float ComputeTableLoad() const {
267    return static_cast<float>(used_items_) / static_cast<float>(table_length_);
268  }
269
270  // Initializes a rebuild of the visited link database based on the browser
271  // history. This will set table_builder_ while working, and there should not
272  // already be a rebuild in place when called. See the definition for more
273  // details on how this works.
274  //
275  // Returns true on success. Failure means we're not attempting to rebuild
276  // the database because something failed.
277  bool RebuildTableFromHistory();
278
279  // Callback that the table rebuilder uses when the rebuild is complete.
280  // |success| is true if the fingerprint generation succeeded, in which case
281  // |fingerprints| will contain the computed fingerprints. On failure, there
282  // will be no fingerprints.
283  void OnTableRebuildComplete(bool success,
284                              const std::vector<Fingerprint>& fingerprints);
285
286  // Increases or decreases the given hash value by one, wrapping around as
287  // necessary. Used for probing.
288  inline Hash IncrementHash(Hash hash) {
289    if (hash >= table_length_ - 1)
290      return 0;  // Wrap around.
291    return hash + 1;
292  }
293  inline Hash DecrementHash(Hash hash) {
294    if (hash <= 0)
295      return table_length_ - 1;  // Wrap around.
296    return hash - 1;
297  }
298
299  Listener* listener_;
300
301#ifndef NDEBUG
302  // Indicates whether any asynchronous operation has ever been completed.
303  // We do some synchronous reads that require that no asynchronous operations
304  // are pending, yet we don't track whether they have been completed. This
305  // flag is a sanity check that these reads only happen before any
306  // asynchronous writes have been fired.
307  bool posted_asynchronous_operation_;
308#endif
309
310  // Reference to the user profile that this object belongs to
311  // (it knows the path to where the data is stored)
312  Profile* profile_;
313
314  // When non-NULL, indicates we are in database rebuild mode and points to
315  // the class collecting fingerprint information from the history system.
316  // The pointer is owned by this class, but it must remain valid while the
317  // history query is running. We must only delete it when the query is done.
318  scoped_refptr<TableBuilder> table_builder_;
319
320  // Indicates URLs added and deleted since we started rebuilding the table.
321  std::set<Fingerprint> added_since_rebuild_;
322  std::set<Fingerprint> deleted_since_rebuild_;
323
324  // TODO(brettw) Support deletion, we need to track whether anything was
325  // deleted during the rebuild here. Then we should delete any of these
326  // entries from the complete table later.
327  // std::vector<Fingerprint> removed_since_rebuild_;
328
329  // The currently open file with the table in it. This may be NULL if we're
330  // rebuilding and haven't written a new version yet. Writing to the file may
331  // be safely ignored in this case.
332  FILE* file_;
333
334  // Shared memory consists of a SharedHeader followed by the table.
335  base::SharedMemory *shared_memory_;
336
337  // When we generate new tables, we increment the serial number of the
338  // shared memory object.
339  int32 shared_memory_serial_;
340
341  // Number of non-empty items in the table, used to compute fullness.
342  int32 used_items_;
343
344  // Testing values -----------------------------------------------------------
345  //
346  // The following fields exist for testing purposes. They are not used in
347  // release builds. It'd be nice to eliminate them in release builds, but we
348  // don't want to change the signature of the object between the unit test and
349  // regular builds. Instead, we just have "default" values that these take
350  // in release builds that give "regular" behavior.
351
352  // Overridden database file name for testing
353  FilePath database_name_override_;
354
355  // When nonzero, overrides the table size for new databases for testing
356  int32 table_size_override_;
357
358  // When non-NULL, overrides the history service to use rather than as the
359  // BrowserProcess. This is provided for unit tests.
360  HistoryService* history_service_override_;
361
362  // When non-NULL, indicates the task that should be run after the next
363  // rebuild from history is complete.
364  scoped_ptr<Task> rebuild_complete_task_;
365
366  // Set to prevent us from attempting to rebuild the database from global
367  // history if we have an error opening the file. This is used for testing,
368  // will be false in production.
369  bool suppress_rebuild_;
370
371  DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
372};
373
374// NOTE: These methods are defined inline here, so we can share the compilation
375//       of visitedlink_master.cc between the browser and the unit/perf tests.
376
377#if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
378inline void VisitedLinkMaster::DebugValidate() {
379  int32 used_count = 0;
380  for (int32 i = 0; i < table_length_; i++) {
381    if (hash_table_[i])
382      used_count++;
383  }
384  DCHECK_EQ(used_count, used_items_);
385}
386#endif
387
388#endif  // CHROME_BROWSER_VISITEDLINK_VISITEDLINK_MASTER_H_
389