1// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/common/visitedlink_common.h"
6
7#include <string.h>  // for memset()
8
9#include "base/logging.h"
10#include "base/md5.h"
11#include "googleurl/src/gurl.h"
12
13const VisitedLinkCommon::Fingerprint VisitedLinkCommon::null_fingerprint_ = 0;
14const VisitedLinkCommon::Hash VisitedLinkCommon::null_hash_ = -1;
15
16VisitedLinkCommon::VisitedLinkCommon()
17    : hash_table_(NULL),
18      table_length_(0) {
19  memset(salt_, 0, sizeof(salt_));
20}
21
22VisitedLinkCommon::~VisitedLinkCommon() {
23}
24
25// FIXME: this uses linear probing, it should be replaced with quadratic
26// probing or something better. See VisitedLinkMaster::AddFingerprint
27bool VisitedLinkCommon::IsVisited(const char* canonical_url,
28                                  size_t url_len) const {
29  if (url_len == 0)
30    return false;
31  if (!hash_table_ || table_length_ == 0)
32    return false;
33  return IsVisited(ComputeURLFingerprint(canonical_url, url_len));
34}
35
36bool VisitedLinkCommon::IsVisited(const GURL& url) const {
37  return IsVisited(url.spec().data(), url.spec().size());
38}
39
40bool VisitedLinkCommon::IsVisited(Fingerprint fingerprint) const {
41  // Go through the table until we find the item or an empty spot (meaning it
42  // wasn't found). This loop will terminate as long as the table isn't full,
43  // which should be enforced by AddFingerprint.
44  Hash first_hash = HashFingerprint(fingerprint);
45  Hash cur_hash = first_hash;
46  while (true) {
47    Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
48    if (cur_fingerprint == null_fingerprint_)
49      return false;  // End of probe sequence found.
50    if (cur_fingerprint == fingerprint)
51      return true;  // Found a match.
52
53    // This spot was taken, but not by the item we're looking for, search in
54    // the next position.
55    cur_hash++;
56    if (cur_hash == table_length_)
57      cur_hash = 0;
58    if (cur_hash == first_hash) {
59      // Wrapped around and didn't find an empty space, this means we're in an
60      // infinite loop because AddFingerprint didn't do its job resizing.
61      NOTREACHED();
62      return false;
63    }
64  }
65}
66
67// Uses the top 64 bits of the MD5 sum of the canonical URL as the fingerprint,
68// this is as random as any other subset of the MD5SUM.
69//
70// FIXME: this uses the MD5SUM of the 16-bit character version. For systems
71// where wchar_t is not 16 bits (Linux uses 32 bits, I think), this will not be
72// compatable. We should define explicitly what should happen here across
73// platforms, and convert if necessary (probably to UTF-16).
74
75// static
76VisitedLinkCommon::Fingerprint VisitedLinkCommon::ComputeURLFingerprint(
77    const char* canonical_url,
78    size_t url_len,
79    const uint8 salt[LINK_SALT_LENGTH]) {
80  DCHECK(url_len > 0) << "Canonical URLs should not be empty";
81
82  MD5Context ctx;
83  MD5Init(&ctx);
84  MD5Update(&ctx, salt, sizeof(salt));
85  MD5Update(&ctx, canonical_url, url_len * sizeof(char));
86
87  MD5Digest digest;
88  MD5Final(&digest, &ctx);
89
90  // This is the same as "return *(Fingerprint*)&digest.a;" but if we do that
91  // direct cast the alignment could be wrong, and we can't access a 64-bit int
92  // on arbitrary alignment on some processors. This reinterpret_casts it
93  // down to a char array of the same size as fingerprint, and then does the
94  // bit cast, which amounts to a memcpy. This does not handle endian issues.
95  return bit_cast<Fingerprint, uint8[8]>(
96      *reinterpret_cast<uint8(*)[8]>(&digest.a));
97}
98