106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Use of this source code is governed by a BSD-style license that can be 306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// found in the LICENSE file. 406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "chrome/common/visitedlink_common.h" 606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include <string.h> // for memset() 83345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "base/logging.h" 1006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "base/md5.h" 113345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#include "googleurl/src/gurl.h" 1206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 1306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochconst VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0; 1406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochconst VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1; 1506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 1606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen MurdochVisitedLinkCommon::VisitedLinkCommon() 1706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch : hash_table_(NULL), 1806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch table_length_(0) { 193345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick memset(salt_, 0, sizeof(salt_)); 2006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch} 2106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 2206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen MurdochVisitedLinkCommon::~VisitedLinkCommon() { 2306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch} 2406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 2506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// FIXME: this uses linear probing, it should be replaced with quadratic 2606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// probing or something better. See VisitedLinkMaster::AddFingerprint 2706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochbool VisitedLinkCommon::IsVisited(const char* canonical_url, 2806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch size_t url_len) const { 2906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (url_len == 0) 3006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return false; 3106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (!hash_table_ || table_length_ == 0) 3206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return false; 3306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return IsVisited(ComputeURLFingerprint(canonical_url, url_len)); 3406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch} 3506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 363345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrickbool VisitedLinkCommon::IsVisited(const GURL& url) const { 373345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick return IsVisited(url.spec().data(), url.spec().size()); 383345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick} 393345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick 4006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochbool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const { 4106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Go through the table until we find the item or an empty spot (meaning it 4206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // wasn't found). This loop will terminate as long as the table isn't full, 4306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // which should be enforced by AddFingerprint. 4406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch Hash first_hash = HashFingerprint(fingerprint); 4506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch Hash cur_hash = first_hash; 4606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch while (true) { 4706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch Fingerprint cur_fingerprint = FingerprintAt(cur_hash); 4806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (cur_fingerprint == null_fingerprint_) 4906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return false; // End of probe sequence found. 5006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (cur_fingerprint == fingerprint) 5106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return true; // Found a match. 5206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 5306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // This spot was taken, but not by the item we're looking for, search in 5406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // the next position. 5506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch cur_hash++; 5606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (cur_hash == table_length_) 5706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch cur_hash = 0; 5806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (cur_hash == first_hash) { 5906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Wrapped around and didn't find an empty space, this means we're in an 6006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // infinite loop because AddFingerprint didn't do its job resizing. 6106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch NOTREACHED(); 6206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return false; 6306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 6406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 6506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch} 6606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 6706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint, 6806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// this is as random as any other subset of the MD5SUM. 6906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// 7006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// FIXME: this uses the MD5SUM of the 16-bit character version. For systems 7106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be 7206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// compatable. We should define explicitly what should happen here across 7306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// platforms, and convert if necessary (probably to UTF-16). 7406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 7506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// static 7606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen MurdochVisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint( 7706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch const char* canonical_url, 7806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch size_t url_len, 7906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch const uint8 salt[LINK_SALT_LENGTH]) { 8006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch DCHECK(url_len > 0) << "Canonical URLs should not be empty"; 8106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 8206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch MD5Context ctx; 8306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch MD5Init(&ctx); 8406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch MD5Update(&ctx, salt, sizeof(salt)); 8506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch MD5Update(&ctx, canonical_url, url_len * sizeof(char)); 8606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 8706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch MD5Digest digest; 8806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch MD5Final(&digest, &ctx); 8906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 9006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that 9106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // direct cast the alignment could be wrong, and we can't access a 64-bit int 9206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // on arbitrary alignment on some processors. This reinterpret_casts it 9306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // down to a char array of the same size as fingerprint, and then does the 9406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // bit cast, which amounts to a memcpy. This does not handle endian issues. 9506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return bit_cast<Fingerprint, uint8[8]>( 9606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch *reinterpret_cast<uint8(*)[8]>(&digest.a)); 9706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch} 98