1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_COMMON_VISITEDLINK_COMMON_H__
6#define CHROME_COMMON_VISITEDLINK_COMMON_H__
7#pragma once
8
9#include <vector>
10
11#include "base/basictypes.h"
12
13class GURL;
14
15// number of bytes in the salt
16#define LINK_SALT_LENGTH 8
17
18// A multiprocess-safe database of the visited links for the browser. There
19// should be exactly one process that has write access (implemented by
20// VisitedLinkMaster), while all other processes should be read-only
21// (implemented by VisitedLinkSlave). These other processes add links by calling
22// the writer process to add them for it. The writer may also notify the readers
23// to replace their table when the table is resized.
24//
25// IPC is not implemented in these classes. This is done through callback
26// functions supplied by the creator of these objects to allow more flexibility,
27// especially for testing.
28//
29// This class defines the common base for these others. We implement accessors
30// for looking things up in the hash table, and for computing hash values and
31// fingerprints. Both the master and the slave inherit from this, and add their
32// own code to set up and change these values as their design requires. The
33// slave pretty much just sets up the shared memory and saves the pointer. The
34// master does a lot of work to manage the table, reading and writing it to and
35// from disk, and resizing it when it gets too full.
36//
37// To ask whether a page is in history, we compute a 64-bit fingerprint of the
38// URL. This URL is hashed and we see if it is in the URL hashtable. If it is,
39// we consider it visited. Otherwise, it is unvisited. Note that it is possible
40// to get collisions, which is the penalty for not storing all URL strings in
41// memory (which could get to be more than we want to have in memory). We use
42// a salt value for the links on one computer so that an attacker can not
43// manually create a link that causes a collision.
44class VisitedLinkCommon {
45 public:
46  // A number that identifies the URL.
47  typedef uint64 Fingerprint;
48  typedef std::vector<Fingerprint> Fingerprints;
49
50  // A hash value of a fingerprint
51  typedef int32 Hash;
52
53  // A fingerprint or hash value that does not exist
54  static const Fingerprint null_fingerprint_;
55  static const Hash null_hash_;
56
57  VisitedLinkCommon();
58  virtual ~VisitedLinkCommon();
59
60  // Returns the fingerprint for the given URL.
61  Fingerprint ComputeURLFingerprint(const char* canonical_url,
62                                    size_t url_len) const {
63    return ComputeURLFingerprint(canonical_url, url_len, salt_);
64  }
65
66  // Looks up the given key in the table. The fingerprint for the URL is
67  // computed if you call one with the string argument. Returns true if found.
68  // Does not modify the hastable.
69  bool IsVisited(const char* canonical_url, size_t url_len) const;
70  bool IsVisited(const GURL& url) const;
71  bool IsVisited(Fingerprint fingerprint) const;
72
73#ifdef UNIT_TEST
74  // Returns statistics about DB usage
75  void GetUsageStatistics(int32* table_size,
76                          VisitedLinkCommon::Fingerprint** fingerprints) {
77    *table_size = table_length_;
78    *fingerprints = hash_table_;
79  }
80#endif
81
82 protected:
83  // This structure is at the beginning of the shared memory so that the slaves
84  // can get stats on the table
85  struct SharedHeader {
86    // see goes into table_length_
87    uint32 length;
88
89    // goes into salt_
90    uint8 salt[LINK_SALT_LENGTH];
91  };
92
93  // Returns the fingerprint at the given index into the URL table. This
94  // function should be called instead of accessing the table directly to
95  // contain endian issues.
96  Fingerprint FingerprintAt(int32 table_offset) const {
97    if (!hash_table_)
98      return null_fingerprint_;
99    return hash_table_[table_offset];
100  }
101
102  // Computes the fingerprint of the given canonical URL. It is static so the
103  // same algorithm can be re-used by the table rebuilder, so you will have to
104  // pass the salt as a parameter. See the non-static version above if you
105  // want to use the current class' salt.
106  static Fingerprint ComputeURLFingerprint(const char* canonical_url,
107                                           size_t url_len,
108                                           const uint8 salt[LINK_SALT_LENGTH]);
109
110  // Computes the hash value of the given fingerprint, this is used as a lookup
111  // into the hashtable.
112  static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) {
113    if (table_length == 0)
114      return null_hash_;
115    return static_cast<Hash>(fingerprint % table_length);
116  }
117  // Uses the current hashtable.
118  Hash HashFingerprint(Fingerprint fingerprint) const {
119    return HashFingerprint(fingerprint, table_length_);
120  }
121
122  // pointer to the first item
123  VisitedLinkCommon::Fingerprint* hash_table_;
124
125  // the number of items in the hash table
126  int32 table_length_;
127
128  // salt used for each URL when computing the fingerprint
129  uint8 salt_[LINK_SALT_LENGTH];
130
131 private:
132  DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon);
133};
134
135#endif  // CHROME_COMMON_VISITEDLINK_COMMON_H_
136