1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
6#define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
7
8#include <vector>
9
10#include "base/basictypes.h"
11
12class GURL;
13
14namespace visitedlink {
15
16// number of bytes in the salt
17#define LINK_SALT_LENGTH 8
18
19// A multiprocess-safe database of the visited links for the browser. There
20// should be exactly one process that has write access (implemented by
21// VisitedLinkMaster), while all other processes should be read-only
22// (implemented by VisitedLinkSlave). These other processes add links by calling
23// the writer process to add them for it. The writer may also notify the readers
24// to replace their table when the table is resized.
25//
26// IPC is not implemented in these classes. This is done through callback
27// functions supplied by the creator of these objects to allow more flexibility,
28// especially for testing.
29//
30// This class defines the common base for these others. We implement accessors
31// for looking things up in the hash table, and for computing hash values and
32// fingerprints. Both the master and the slave inherit from this, and add their
33// own code to set up and change these values as their design requires. The
34// slave pretty much just sets up the shared memory and saves the pointer. The
35// master does a lot of work to manage the table, reading and writing it to and
36// from disk, and resizing it when it gets too full.
37//
38// To ask whether a page is in history, we compute a 64-bit fingerprint of the
39// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
40// we consider it visited. Otherwise, it is unvisited. Note that it is possible
41// to get collisions, which is the penalty for not storing all URL strings in
42// memory (which could get to be more than we want to have in memory). We use
43// a salt value for the links on one computer so that an attacker can not
44// manually create a link that causes a collision.
45class VisitedLinkCommon {
46 public:
47  // A number that identifies the URL.
48  typedef uint64 Fingerprint;
49  typedef std::vector<Fingerprint> Fingerprints;
50
51  // A hash value of a fingerprint
52  typedef int32 Hash;
53
54  // A fingerprint or hash value that does not exist
55  static const Fingerprint null_fingerprint_;
56  static const Hash null_hash_;
57
58  VisitedLinkCommon();
59  virtual ~VisitedLinkCommon();
60
61  // Returns the fingerprint for the given URL.
62  Fingerprint ComputeURLFingerprint(const char* canonical_url,
63                                    size_t url_len) const {
64    return ComputeURLFingerprint(canonical_url, url_len, salt_);
65  }
66
67  // Looks up the given key in the table. The fingerprint for the URL is
68  // computed if you call one with the string argument. Returns true if found.
69  // Does not modify the hastable.
70  bool IsVisited(const char* canonical_url, size_t url_len) const;
71  bool IsVisited(const GURL& url) const;
72  bool IsVisited(Fingerprint fingerprint) const;
73
74#ifdef UNIT_TEST
75  // Returns statistics about DB usage
76  void GetUsageStatistics(int32* table_size,
77                          VisitedLinkCommon::Fingerprint** fingerprints) {
78    *table_size = table_length_;
79    *fingerprints = hash_table_;
80  }
81#endif
82
83 protected:
84  // This structure is at the beginning of the shared memory so that the slaves
85  // can get stats on the table
86  struct SharedHeader {
87    // see goes into table_length_
88    uint32 length;
89
90    // goes into salt_
91    uint8 salt[LINK_SALT_LENGTH];
92  };
93
94  // Returns the fingerprint at the given index into the URL table. This
95  // function should be called instead of accessing the table directly to
96  // contain endian issues.
97  Fingerprint FingerprintAt(int32 table_offset) const {
98    if (!hash_table_)
99      return null_fingerprint_;
100    return hash_table_[table_offset];
101  }
102
103  // Computes the fingerprint of the given canonical URL. It is static so the
104  // same algorithm can be re-used by the table rebuilder, so you will have to
105  // pass the salt as a parameter. See the non-static version above if you
106  // want to use the current class' salt.
107  static Fingerprint ComputeURLFingerprint(const char* canonical_url,
108                                           size_t url_len,
109                                           const uint8 salt[LINK_SALT_LENGTH]);
110
111  // Computes the hash value of the given fingerprint, this is used as a lookup
112  // into the hashtable.
113  static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
114    if (table_length == 0)
115      return null_hash_;
116    return static_cast<Hash>(fingerprint % table_length);
117  }
118  // Uses the current hashtable.
119  Hash HashFingerprint(Fingerprint fingerprint) const {
120    return HashFingerprint(fingerprint, table_length_);
121  }
122
123  // pointer to the first item
124  VisitedLinkCommon::Fingerprint* hash_table_;
125
126  // the number of items in the hash table
127  int32 table_length_;
128
129  // salt used for each URL when computing the fingerprint
130  uint8 salt_[LINK_SALT_LENGTH];
131
132 private:
133  DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
134};
135
136}  // namespace visitedlink
137
138#endif  // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_
139