172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// Copyright (c) 2011 The Chromium Authors. All rights reserved. 272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// Use of this source code is governed by a BSD-style license that can be 372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// found in the LICENSE file. 472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "chrome/browser/safe_browsing/prefix_set.h" 672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include <algorithm> 872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include <math.h> 972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 10ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/file_util.h" 1172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "base/logging.h" 12ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen#include "base/md5.h" 1372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen#include "base/metrics/histogram.h" 1472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 1572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsennamespace { 1672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 17ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// |kMagic| should be reasonably unique, and not match itself across 18ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// endianness changes. I generated this value with: 19ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 20ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic uint32 kMagic = 0x864088dd; 21ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 22ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// Current version the code writes out. 23ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenstatic uint32 kVersion = 0x1; 24ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 25ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsentypedef struct { 26ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen uint32 magic; 27ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen uint32 version; 28ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen uint32 index_size; 29ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen uint32 deltas_size; 30ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} FileHeader; 31ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 3272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen// For |std::upper_bound()| to find a prefix w/in a vector of pairs. 3372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenbool PrefixLess(const std::pair<SBPrefix,size_t>& a, 3472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const std::pair<SBPrefix,size_t>& b) { 3572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen return a.first < b.first; 3672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen} 3772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 3872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen} // namespace 3972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 4072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsennamespace safe_browsing { 4172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 42dc0f95d653279beabeb9817299e2902918ba123eKristian MonsenPrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { 43dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (sorted_prefixes.size()) { 4472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Lead with the first prefix. 45dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SBPrefix prev_prefix = sorted_prefixes[0]; 4672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen size_t run_length = 0; 4772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen index_.push_back(std::make_pair(prev_prefix, deltas_.size())); 4872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 49ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Used to build a checksum from the data used to construct the 50ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // structures. Since the data is a bunch of uniform hashes, it 51ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // seems reasonable to just xor most of it in, rather than trying 52ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // to use a more complicated algorithm. 53ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen uint32 checksum = static_cast<uint32>(sorted_prefixes[0]); 54ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(deltas_.size()); 55ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 56dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (size_t i = 1; i < sorted_prefixes.size(); ++i) { 57dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // Skip duplicates. 58dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen if (sorted_prefixes[i] == prev_prefix) 5972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen continue; 6072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 6172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Calculate the delta. |unsigned| is mandatory, because the 62dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // sorted_prefixes could be more than INT_MAX apart. 63dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen DCHECK_GT(sorted_prefixes[i], prev_prefix); 64ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const unsigned delta = sorted_prefixes[i] - prev_prefix; 65ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const uint16 delta16 = static_cast<uint16>(delta); 6672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 67ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // New index ref if the delta doesn't fit, or if too many 68ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // consecutive deltas have been encoded. 69ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) { 70ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(sorted_prefixes[i]); 71ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(deltas_.size()); 72dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size())); 7372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen run_length = 0; 7472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen } else { 75ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(delta16); 7672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Continue the run of deltas. 77ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen deltas_.push_back(delta16); 78ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta); 7972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen ++run_length; 8072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen } 8172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 82dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen prev_prefix = sorted_prefixes[i]; 8372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen } 84ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum_ = checksum; 85ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(CheckChecksum()); 8672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 8772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Send up some memory-usage stats. Bits because fractional bytes 8872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // are weird. 8972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT + 9072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT; 9172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const size_t unique_prefixes = index_.size() + deltas_.size(); 9272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT; 9372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix", 9472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen bits_used / unique_prefixes, 9572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen kMaxBitsPerPrefix); 9672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen } 9772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen} 9872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 99ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenPrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, 100ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<uint16> *deltas) { 101ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK(index && deltas); 102ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen index_.swap(*index); 103ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen deltas_.swap(*deltas); 104ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 105ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 10672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian MonsenPrefixSet::~PrefixSet() {} 10772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 10872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenbool PrefixSet::Exists(SBPrefix prefix) const { 10972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen if (index_.empty()) 11072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen return false; 11172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 11272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Find the first position after |prefix| in |index_|. 11372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen std::vector<std::pair<SBPrefix,size_t> >::const_iterator 11472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen iter = std::upper_bound(index_.begin(), index_.end(), 11572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen std::pair<SBPrefix,size_t>(prefix, 0), 11672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen PrefixLess); 11772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 11872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // |prefix| comes before anything that's in the set. 11972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen if (iter == index_.begin()) 12072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen return false; 12172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 12272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Capture the upper bound of our target entry's deltas. 12372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second); 12472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 12572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Back up to the entry our target is in. 12672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen --iter; 12772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 12872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // All prefixes in |index_| are in the set. 12972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen SBPrefix current = iter->first; 13072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen if (current == prefix) 13172a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen return true; 13272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 13372a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen // Scan forward accumulating deltas while a match is possible. 13472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen for (size_t di = iter->second; di < bound && current < prefix; ++di) { 13572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen current += deltas_[di]; 13672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen } 13772a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 13872a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen return current == prefix; 13972a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen} 14072a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 141ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenvoid PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const { 142dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (size_t ii = 0; ii < index_.size(); ++ii) { 143dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // The deltas for this |index_| entry run to the next index entry, 144dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen // or the end of the deltas. 145dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen const size_t deltas_end = 146dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size(); 147dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 148dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen SBPrefix current = index_[ii].first; 149dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen prefixes->push_back(current); 150dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen for (size_t di = index_[ii].second; di < deltas_end; ++di) { 151dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen current += deltas_[di]; 152dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen prefixes->push_back(current); 153dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 154dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen } 155dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen} 156dc0f95d653279beabeb9817299e2902918ba123eKristian Monsen 157ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen// static 158ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian MonsenPrefixSet* PrefixSet::LoadFile(const FilePath& filter_name) { 159ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen int64 size_64; 160ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!file_util::GetFileSize(filter_name, &size_64)) 161ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 162ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) 163ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 164ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 165ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb")); 166ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!file.get()) 167ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 168ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 169ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen FileHeader header; 170ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t read = fread(&header, sizeof(header), 1, file.get()); 171ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (read != 1) 172ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 173ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 174ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (header.magic != kMagic || header.version != kVersion) 175ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 176ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 177ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<std::pair<SBPrefix,size_t> > index; 178ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t index_bytes = sizeof(index[0]) * header.index_size; 179ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 180ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen std::vector<uint16> deltas; 181ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; 182ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 183ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Check for bogus sizes before allocating any space. 184ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t expected_bytes = 185ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); 186ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (static_cast<int64>(expected_bytes) != size_64) 187ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 188ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 189ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // The file looks valid, start building the digest. 190ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Context context; 191ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Init(&context); 192ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Update(&context, &header, sizeof(header)); 193ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 194ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Read the index vector. Herb Sutter indicates that vectors are 195ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // guaranteed to be contiuguous, so reading to where element 0 lives 196ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // is valid. 197ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen index.resize(header.index_size); 198ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); 199ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (read != index.size()) 200ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 201ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Update(&context, &(index[0]), index_bytes); 202ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 203ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Read vector of deltas. 204ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen deltas.resize(header.deltas_size); 205ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); 206ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (read != deltas.size()) 207ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 208ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Update(&context, &(deltas[0]), deltas_bytes); 209ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 210ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Digest calculated_digest; 211ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Final(&calculated_digest, &context); 212ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 213ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Digest file_digest; 214ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen read = fread(&file_digest, sizeof(file_digest), 1, file.get()); 215ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (read != 1) 216ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 217ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 218ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) 219ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return NULL; 220ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 221ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Steals contents of |index| and |deltas| via swap(). 222ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return new PrefixSet(&index, &deltas); 223ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 224ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 225ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool PrefixSet::WriteFile(const FilePath& filter_name) const { 226ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen FileHeader header; 227ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen header.magic = kMagic; 228ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen header.version = kVersion; 229ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen header.index_size = static_cast<uint32>(index_.size()); 230ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen header.deltas_size = static_cast<uint32>(deltas_.size()); 231ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 232ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Sanity check that the 32-bit values never mess things up. 233ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (static_cast<size_t>(header.index_size) != index_.size() || 234ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen static_cast<size_t>(header.deltas_size) != deltas_.size()) { 235ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen NOTREACHED(); 236ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return false; 237ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 238ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 239ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen file_util::ScopedFILE file(file_util::OpenFile(filter_name, "wb")); 240ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (!file.get()) 241ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return false; 242ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 243ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Context context; 244ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Init(&context); 245ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 246ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // TODO(shess): The I/O code in safe_browsing_store_file.cc would 247ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // sure be useful about now. 248ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t written = fwrite(&header, sizeof(header), 1, file.get()); 249ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (written != 1) 250ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return false; 251ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Update(&context, &header, sizeof(header)); 252ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 253ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // As for reads, the standard guarantees the ability to access the 254ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // contents of the vector by a pointer to an element. 255ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t index_bytes = sizeof(index_[0]) * index_.size(); 256ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(), file.get()); 257ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (written != index_.size()) 258ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return false; 259ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Update(&context, &(index_[0]), index_bytes); 260ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 261ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size(); 262ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), 263ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen file.get()); 264ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (written != deltas_.size()) 265ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return false; 266ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Update(&context, &(deltas_[0]), deltas_bytes); 267ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 268ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Digest digest; 269ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen MD5Final(&digest, &context); 270ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen written = fwrite(&digest, sizeof(digest), 1, file.get()); 271ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (written != 1) 272ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return false; 273ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 274ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // TODO(shess): Can this code check that the close was successful? 275ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen file.reset(); 276ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 277ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return true; 278ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 279ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 280ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsensize_t PrefixSet::IndexBinFor(size_t target_index) const { 281ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // The items in |index_| have the logical index of each previous 282ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // item in |index_| plus the count of deltas between the items. 283ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Since the indices into |deltas_| are absolute, the logical index 284ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // is then the sum of the two indices. 285ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t lo = 0; 286ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen size_t hi = index_.size(); 287ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 288ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Binary search because linear search was too slow (really, the 289ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // unit test sucked). Inline because the elements can't be compared 290ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // in isolation (their position matters). 291ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen while (hi - lo > 1) { 292ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t i = (lo + hi) / 2; 293ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 294ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen if (target_index < i + index_[i].second) { 295ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_LT(i, hi); // Always making progress. 296ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen hi = i; 297ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } else { 298ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen DCHECK_GT(i, lo); // Always making progress. 299ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen lo = i; 300ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 301ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 302ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return lo; 303ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 304ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 305ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsensize_t PrefixSet::GetSize() const { 306ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return index_.size() + deltas_.size(); 307ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 308ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 309ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool PrefixSet::IsDeltaAt(size_t target_index) const { 310ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen CHECK_LT(target_index, GetSize()); 311ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 312ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t i = IndexBinFor(target_index); 313ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return target_index > i + index_[i].second; 314ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 315ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 316ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenuint16 PrefixSet::DeltaAt(size_t target_index) const { 317ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen CHECK_LT(target_index, GetSize()); 318ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 319ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Find the |index_| entry which contains |target_index|. 320ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen const size_t i = IndexBinFor(target_index); 321ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 322ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // Exactly on the |index_| entry means no delta. 323ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen CHECK_GT(target_index, i + index_[i].second); 324ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 325ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // -i backs out the |index_| entries, -1 gets the delta that lead to 326ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen // the value at |target_index|. 327ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen CHECK_LT(target_index - i - 1, deltas_.size()); 328ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return deltas_[target_index - i - 1]; 329ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 330ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 331ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsenbool PrefixSet::CheckChecksum() const { 332ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen uint32 checksum = 0; 333ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 334ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (size_t ii = 0; ii < index_.size(); ++ii) { 335ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(index_[ii].first); 336ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(index_[ii].second); 337ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 338ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 339ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen for (size_t di = 0; di < deltas_.size(); ++di) { 340ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen checksum ^= static_cast<uint32>(deltas_[di]); 341ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen } 342ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 343ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen return checksum == checksum_; 344ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen} 345ddb351dbec246cf1fab5ec20d2d5520909041de1Kristian Monsen 34672a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen} // namespace safe_browsing 347