15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "chrome/browser/safe_browsing/prefix_set.h"
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <algorithm>
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <math.h>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/file_util.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/logging.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/md5.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/metrics/histogram.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace {
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// |kMagic| should be reasonably unique, and not match itself across
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// endianness changes.  I generated this value with:
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static uint32 kMagic = 0x864088dd;
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Current version the code writes out.
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)static uint32 kVersion = 0x1;
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef struct {
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 magic;
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 version;
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 index_size;
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  uint32 deltas_size;
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} FileHeader;
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// For |std::upper_bound()| to find a prefix w/in a vector of pairs.
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool PrefixLess(const std::pair<SBPrefix,size_t>& a,
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                const std::pair<SBPrefix,size_t>& b) {
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return a.first < b.first;
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace safe_browsing {
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) {
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (sorted_prefixes.size()) {
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Estimate the resulting vector sizes.  There will be strictly
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // more than |min_runs| entries in |index_|, but there generally
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // aren't many forced breaks.
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t min_runs = sorted_prefixes.size() / kMaxRun;
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    index_.reserve(min_runs);
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    deltas_.reserve(sorted_prefixes.size() - min_runs);
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Lead with the first prefix.
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SBPrefix prev_prefix = sorted_prefixes[0];
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    size_t run_length = 0;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    index_.push_back(std::make_pair(prev_prefix, deltas_.size()));
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t i = 1; i < sorted_prefixes.size(); ++i) {
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Skip duplicates.
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (sorted_prefixes[i] == prev_prefix)
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        continue;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // Calculate the delta.  |unsigned| is mandatory, because the
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // sorted_prefixes could be more than INT_MAX apart.
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      DCHECK_GT(sorted_prefixes[i], prev_prefix);
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const unsigned delta = sorted_prefixes[i] - prev_prefix;
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      const uint16 delta16 = static_cast<uint16>(delta);
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // New index ref if the delta doesn't fit, or if too many
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      // consecutive deltas have been encoded.
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) {
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size()));
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        run_length = 0;
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      } else {
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        // Continue the run of deltas.
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        deltas_.push_back(delta16);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta);
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        ++run_length;
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      }
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      prev_prefix = sorted_prefixes[i];
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Send up some memory-usage stats.  Bits because fractional bytes
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // are weird.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT +
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT;
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t unique_prefixes = index_.size() + deltas_.size();
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT;
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix",
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              bits_used / unique_prefixes,
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              kMaxBitsPerPrefix);
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index,
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     std::vector<uint16> *deltas) {
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  DCHECK(index && deltas);
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  index_.swap(*index);
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  deltas_.swap(*deltas);
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)PrefixSet::~PrefixSet() {}
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)bool PrefixSet::Exists(SBPrefix prefix) const {
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (index_.empty())
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Find the first position after |prefix| in |index_|.
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<std::pair<SBPrefix,size_t> >::const_iterator
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      iter = std::upper_bound(index_.begin(), index_.end(),
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              std::pair<SBPrefix,size_t>(prefix, 0),
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                              PrefixLess);
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // |prefix| comes before anything that's in the set.
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (iter == index_.begin())
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Capture the upper bound of our target entry's deltas.
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second);
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Back up to the entry our target is in.
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  --iter;
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // All prefixes in |index_| are in the set.
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  SBPrefix current = iter->first;
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (current == prefix)
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return true;
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Scan forward accumulating deltas while a match is possible.
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t di = iter->second; di < bound && current < prefix; ++di) {
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    current += deltas_[di];
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return current == prefix;
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const {
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  prefixes->reserve(index_.size() + deltas_.size());
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  for (size_t ii = 0; ii < index_.size(); ++ii) {
1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // The deltas for this |index_| entry run to the next index entry,
1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // or the end of the deltas.
1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t deltas_end =
1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size();
1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SBPrefix current = index_[ii].first;
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    prefixes->push_back(current);
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    for (size_t di = index_[ii].second; di < deltas_end; ++di) {
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      current += deltas_[di];
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      prefixes->push_back(current);
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    }
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// static
1552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) {
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int64 size_64;
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!file_util::GetFileSize(filter_name, &size_64))
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  using base::MD5Digest;
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest)))
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb"));
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!file.get())
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FileHeader header;
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t read = fread(&header, sizeof(header), 1, file.get());
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (read != 1)
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (header.magic != kMagic || header.version != kVersion)
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<std::pair<SBPrefix,size_t> > index;
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t index_bytes = sizeof(index[0]) * header.index_size;
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::vector<uint16> deltas;
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size;
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Check for bogus sizes before allocating any space.
1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  const size_t expected_bytes =
1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest);
1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (static_cast<int64>(expected_bytes) != size_64)
1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The file looks valid, start building the digest.
1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Context context;
1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Init(&context);
1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              sizeof(header)));
1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Read the index vector.  Herb Sutter indicates that vectors are
1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // guaranteed to be contiuguous, so reading to where element 0 lives
1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // is valid.
1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (header.index_size) {
1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    index.resize(header.index_size);
1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get());
1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (read != index.size())
2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return NULL;
2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::MD5Update(&context,
2025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    base::StringPiece(reinterpret_cast<char*>(&(index[0])),
2035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      index_bytes));
2045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Read vector of deltas.
2075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (header.deltas_size) {
2085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    deltas.resize(header.deltas_size);
2095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get());
2105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (read != deltas.size())
2115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return NULL;
2125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::MD5Update(&context,
2135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    base::StringPiece(reinterpret_cast<char*>(&(deltas[0])),
2145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                      deltas_bytes));
2155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Digest calculated_digest;
2185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Final(&calculated_digest, &context);
2195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Digest file_digest;
2215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  read = fread(&file_digest, sizeof(file_digest), 1, file.get());
2225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (read != 1)
2235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
2245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
2265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return NULL;
2275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Steals contents of |index| and |deltas| via swap().
2295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return new PrefixSet(&index, &deltas);
2305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool PrefixSet::WriteFile(const base::FilePath& filter_name) const {
2335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  FileHeader header;
2345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  header.magic = kMagic;
2355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  header.version = kVersion;
2365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  header.index_size = static_cast<uint32>(index_.size());
2375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  header.deltas_size = static_cast<uint32>(deltas_.size());
2385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Sanity check that the 32-bit values never mess things up.
2405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (static_cast<size_t>(header.index_size) != index_.size() ||
2415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      static_cast<size_t>(header.deltas_size) != deltas_.size()) {
2425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    NOTREACHED();
2435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  file_util::ScopedFILE file(file_util::OpenFile(filter_name, "wb"));
2475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (!file.get())
2485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Context context;
2515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Init(&context);
2525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(shess): The I/O code in safe_browsing_store_file.cc would
2545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // sure be useful about now.
2555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  size_t written = fwrite(&header, sizeof(header), 1, file.get());
2565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (written != 1)
2575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
2595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                              sizeof(header)));
2605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // As for reads, the standard guarantees the ability to access the
2625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // contents of the vector by a pointer to an element.
2635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (index_.size()) {
2645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t index_bytes = sizeof(index_[0]) * index_.size();
2655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(),
2665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     file.get());
2675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (written != index_.size())
2685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
2695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::MD5Update(&context,
2705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    base::StringPiece(
2715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        reinterpret_cast<const char*>(&(index_[0])),
2725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        index_bytes));
2735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (deltas_.size()) {
2765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size();
2775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(),
2785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                     file.get());
2795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (written != deltas_.size())
2805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return false;
2815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    base::MD5Update(&context,
2825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    base::StringPiece(
2835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        reinterpret_cast<const char*>(&(deltas_[0])),
2845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                        deltas_bytes));
2855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
2865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Digest digest;
2885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  base::MD5Final(&digest, &context);
2895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  written = fwrite(&digest, sizeof(digest), 1, file.get());
2905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  if (written != 1)
2915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return false;
2925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(shess): Can this code check that the close was successful?
2945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  file.reset();
2955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  return true;
2975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}
2985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
2995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace safe_browsing
300