1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/common/visitedlink_common.h" 6 7#include <string.h> // for memset() 8 9#include "base/logging.h" 10#include "base/md5.h" 11#include "googleurl/src/gurl.h" 12 13const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0; 14const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1; 15 16VisitedLinkCommon::VisitedLinkCommon() 17 : hash_table_(NULL), 18 table_length_(0) { 19 memset(salt_, 0, sizeof(salt_)); 20} 21 22VisitedLinkCommon::~VisitedLinkCommon() { 23} 24 25// FIXME: this uses linear probing, it should be replaced with quadratic 26// probing or something better. See VisitedLinkMaster::AddFingerprint 27bool VisitedLinkCommon::IsVisited(const char* canonical_url, 28 size_t url_len) const { 29 if (url_len == 0) 30 return false; 31 if (!hash_table_ || table_length_ == 0) 32 return false; 33 return IsVisited(ComputeURLFingerprint(canonical_url, url_len)); 34} 35 36bool VisitedLinkCommon::IsVisited(const GURL& url) const { 37 return IsVisited(url.spec().data(), url.spec().size()); 38} 39 40bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const { 41 // Go through the table until we find the item or an empty spot (meaning it 42 // wasn't found). This loop will terminate as long as the table isn't full, 43 // which should be enforced by AddFingerprint. 44 Hash first_hash = HashFingerprint(fingerprint); 45 Hash cur_hash = first_hash; 46 while (true) { 47 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 48 if (cur_fingerprint == null_fingerprint_) 49 return false; // End of probe sequence found. 50 if (cur_fingerprint == fingerprint) 51 return true; // Found a match. 52 53 // This spot was taken, but not by the item we're looking for, search in 54 // the next position. 55 cur_hash++; 56 if (cur_hash == table_length_) 57 cur_hash = 0; 58 if (cur_hash == first_hash) { 59 // Wrapped around and didn't find an empty space, this means we're in an 60 // infinite loop because AddFingerprint didn't do its job resizing. 61 NOTREACHED(); 62 return false; 63 } 64 } 65} 66 67// Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint, 68// this is as random as any other subset of the MD5SUM. 69// 70// FIXME: this uses the MD5SUM of the 16-bit character version. For systems 71// where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be 72// compatable. We should define explicitly what should happen here across 73// platforms, and convert if necessary (probably to UTF-16). 74 75// static 76VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint( 77 const char* canonical_url, 78 size_t url_len, 79 const uint8 salt[LINK_SALT_LENGTH]) { 80 DCHECK(url_len > 0) << "Canonical URLs should not be empty"; 81 82 MD5Context ctx; 83 MD5Init(&ctx); 84 MD5Update(&ctx, salt, sizeof(salt)); 85 MD5Update(&ctx, canonical_url, url_len * sizeof(char)); 86 87 MD5Digest digest; 88 MD5Final(&digest, &ctx); 89 90 // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that 91 // direct cast the alignment could be wrong, and we can't access a 64-bit int 92 // on arbitrary alignment on some processors. This reinterpret_casts it 93 // down to a char array of the same size as fingerprint, and then does the 94 // bit cast, which amounts to a memcpy. This does not handle endian issues. 95 return bit_cast<Fingerprint, uint8[8]>( 96 *reinterpret_cast<uint8(*)[8]>(&digest.a)); 97} 98