1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ 6#define COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ 7 8#include <vector> 9 10#include "base/basictypes.h" 11 12class GURL; 13 14namespace visitedlink { 15 16// number of bytes in the salt 17#define LINK_SALT_LENGTH 8 18 19// A multiprocess-safe database of the visited links for the browser. There 20// should be exactly one process that has write access (implemented by 21// VisitedLinkMaster), while all other processes should be read-only 22// (implemented by VisitedLinkSlave). These other processes add links by calling 23// the writer process to add them for it. The writer may also notify the readers 24// to replace their table when the table is resized. 25// 26// IPC is not implemented in these classes. This is done through callback 27// functions supplied by the creator of these objects to allow more flexibility, 28// especially for testing. 29// 30// This class defines the common base for these others. We implement accessors 31// for looking things up in the hash table, and for computing hash values and 32// fingerprints. Both the master and the slave inherit from this, and add their 33// own code to set up and change these values as their design requires. The 34// slave pretty much just sets up the shared memory and saves the pointer. The 35// master does a lot of work to manage the table, reading and writing it to and 36// from disk, and resizing it when it gets too full. 37// 38// To ask whether a page is in history, we compute a 64-bit fingerprint of the 39// URL. This URL is hashed and we see if it is in the URL hashtable. If it is, 40// we consider it visited. Otherwise, it is unvisited. Note that it is possible 41// to get collisions, which is the penalty for not storing all URL strings in 42// memory (which could get to be more than we want to have in memory). We use 43// a salt value for the links on one computer so that an attacker can not 44// manually create a link that causes a collision. 45class VisitedLinkCommon { 46 public: 47 // A number that identifies the URL. 48 typedef uint64 Fingerprint; 49 typedef std::vector<Fingerprint> Fingerprints; 50 51 // A hash value of a fingerprint 52 typedef int32 Hash; 53 54 // A fingerprint or hash value that does not exist 55 static const Fingerprint null_fingerprint_; 56 static const Hash null_hash_; 57 58 VisitedLinkCommon(); 59 virtual ~VisitedLinkCommon(); 60 61 // Returns the fingerprint for the given URL. 62 Fingerprint ComputeURLFingerprint(const char* canonical_url, 63 size_t url_len) const { 64 return ComputeURLFingerprint(canonical_url, url_len, salt_); 65 } 66 67 // Looks up the given key in the table. The fingerprint for the URL is 68 // computed if you call one with the string argument. Returns true if found. 69 // Does not modify the hastable. 70 bool IsVisited(const char* canonical_url, size_t url_len) const; 71 bool IsVisited(const GURL& url) const; 72 bool IsVisited(Fingerprint fingerprint) const; 73 74#ifdef UNIT_TEST 75 // Returns statistics about DB usage 76 void GetUsageStatistics(int32* table_size, 77 VisitedLinkCommon::Fingerprint** fingerprints) { 78 *table_size = table_length_; 79 *fingerprints = hash_table_; 80 } 81#endif 82 83 protected: 84 // This structure is at the beginning of the shared memory so that the slaves 85 // can get stats on the table 86 struct SharedHeader { 87 // see goes into table_length_ 88 uint32 length; 89 90 // goes into salt_ 91 uint8 salt[LINK_SALT_LENGTH]; 92 }; 93 94 // Returns the fingerprint at the given index into the URL table. This 95 // function should be called instead of accessing the table directly to 96 // contain endian issues. 97 Fingerprint FingerprintAt(int32 table_offset) const { 98 if (!hash_table_) 99 return null_fingerprint_; 100 return hash_table_[table_offset]; 101 } 102 103 // Computes the fingerprint of the given canonical URL. It is static so the 104 // same algorithm can be re-used by the table rebuilder, so you will have to 105 // pass the salt as a parameter. See the non-static version above if you 106 // want to use the current class' salt. 107 static Fingerprint ComputeURLFingerprint(const char* canonical_url, 108 size_t url_len, 109 const uint8 salt[LINK_SALT_LENGTH]); 110 111 // Computes the hash value of the given fingerprint, this is used as a lookup 112 // into the hashtable. 113 static Hash HashFingerprint(Fingerprint fingerprint, int32 table_length) { 114 if (table_length == 0) 115 return null_hash_; 116 return static_cast<Hash>(fingerprint % table_length); 117 } 118 // Uses the current hashtable. 119 Hash HashFingerprint(Fingerprint fingerprint) const { 120 return HashFingerprint(fingerprint, table_length_); 121 } 122 123 // pointer to the first item 124 VisitedLinkCommon::Fingerprint* hash_table_; 125 126 // the number of items in the hash table 127 int32 table_length_; 128 129 // salt used for each URL when computing the fingerprint 130 uint8 salt_[LINK_SALT_LENGTH]; 131 132 private: 133 DISALLOW_COPY_AND_ASSIGN(VisitedLinkCommon); 134}; 135 136} // namespace visitedlink 137 138#endif // COMPONENTS_VISITEDLINK_COMMON_VISITEDLINK_COMMON_H_ 139