1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/safe_browsing/prefix_set.h" 6 7#include <algorithm> 8#include <math.h> 9 10#include "base/file_util.h" 11#include "base/logging.h" 12#include "base/md5.h" 13#include "base/metrics/histogram.h" 14 15namespace { 16 17// |kMagic| should be reasonably unique, and not match itself across 18// endianness changes. I generated this value with: 19// md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 20static uint32 kMagic = 0x864088dd; 21 22// Current version the code writes out. 23static uint32 kVersion = 0x1; 24 25typedef struct { 26 uint32 magic; 27 uint32 version; 28 uint32 index_size; 29 uint32 deltas_size; 30} FileHeader; 31 32// For |std::upper_bound()| to find a prefix w/in a vector of pairs. 33bool PrefixLess(const std::pair<SBPrefix,size_t>& a, 34 const std::pair<SBPrefix,size_t>& b) { 35 return a.first < b.first; 36} 37 38} // namespace 39 40namespace safe_browsing { 41 42PrefixSet::PrefixSet(const std::vector<SBPrefix>& sorted_prefixes) { 43 if (sorted_prefixes.size()) { 44 // Estimate the resulting vector sizes. There will be strictly 45 // more than |min_runs| entries in |index_|, but there generally 46 // aren't many forced breaks. 47 const size_t min_runs = sorted_prefixes.size() / kMaxRun; 48 index_.reserve(min_runs); 49 deltas_.reserve(sorted_prefixes.size() - min_runs); 50 51 // Lead with the first prefix. 52 SBPrefix prev_prefix = sorted_prefixes[0]; 53 size_t run_length = 0; 54 index_.push_back(std::make_pair(prev_prefix, deltas_.size())); 55 56 for (size_t i = 1; i < sorted_prefixes.size(); ++i) { 57 // Skip duplicates. 58 if (sorted_prefixes[i] == prev_prefix) 59 continue; 60 61 // Calculate the delta. |unsigned| is mandatory, because the 62 // sorted_prefixes could be more than INT_MAX apart. 63 DCHECK_GT(sorted_prefixes[i], prev_prefix); 64 const unsigned delta = sorted_prefixes[i] - prev_prefix; 65 const uint16 delta16 = static_cast<uint16>(delta); 66 67 // New index ref if the delta doesn't fit, or if too many 68 // consecutive deltas have been encoded. 69 if (delta != static_cast<unsigned>(delta16) || run_length >= kMaxRun) { 70 index_.push_back(std::make_pair(sorted_prefixes[i], deltas_.size())); 71 run_length = 0; 72 } else { 73 // Continue the run of deltas. 74 deltas_.push_back(delta16); 75 DCHECK_EQ(static_cast<unsigned>(deltas_.back()), delta); 76 ++run_length; 77 } 78 79 prev_prefix = sorted_prefixes[i]; 80 } 81 82 // Send up some memory-usage stats. Bits because fractional bytes 83 // are weird. 84 const size_t bits_used = index_.size() * sizeof(index_[0]) * CHAR_BIT + 85 deltas_.size() * sizeof(deltas_[0]) * CHAR_BIT; 86 const size_t unique_prefixes = index_.size() + deltas_.size(); 87 static const size_t kMaxBitsPerPrefix = sizeof(SBPrefix) * CHAR_BIT; 88 UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetBitsPerPrefix", 89 bits_used / unique_prefixes, 90 kMaxBitsPerPrefix); 91 } 92} 93 94PrefixSet::PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, 95 std::vector<uint16> *deltas) { 96 DCHECK(index && deltas); 97 index_.swap(*index); 98 deltas_.swap(*deltas); 99} 100 101PrefixSet::~PrefixSet() {} 102 103bool PrefixSet::Exists(SBPrefix prefix) const { 104 if (index_.empty()) 105 return false; 106 107 // Find the first position after |prefix| in |index_|. 108 std::vector<std::pair<SBPrefix,size_t> >::const_iterator 109 iter = std::upper_bound(index_.begin(), index_.end(), 110 std::pair<SBPrefix,size_t>(prefix, 0), 111 PrefixLess); 112 113 // |prefix| comes before anything that's in the set. 114 if (iter == index_.begin()) 115 return false; 116 117 // Capture the upper bound of our target entry's deltas. 118 const size_t bound = (iter == index_.end() ? deltas_.size() : iter->second); 119 120 // Back up to the entry our target is in. 121 --iter; 122 123 // All prefixes in |index_| are in the set. 124 SBPrefix current = iter->first; 125 if (current == prefix) 126 return true; 127 128 // Scan forward accumulating deltas while a match is possible. 129 for (size_t di = iter->second; di < bound && current < prefix; ++di) { 130 current += deltas_[di]; 131 } 132 133 return current == prefix; 134} 135 136void PrefixSet::GetPrefixes(std::vector<SBPrefix>* prefixes) const { 137 prefixes->reserve(index_.size() + deltas_.size()); 138 139 for (size_t ii = 0; ii < index_.size(); ++ii) { 140 // The deltas for this |index_| entry run to the next index entry, 141 // or the end of the deltas. 142 const size_t deltas_end = 143 (ii + 1 < index_.size()) ? index_[ii + 1].second : deltas_.size(); 144 145 SBPrefix current = index_[ii].first; 146 prefixes->push_back(current); 147 for (size_t di = index_[ii].second; di < deltas_end; ++di) { 148 current += deltas_[di]; 149 prefixes->push_back(current); 150 } 151 } 152} 153 154// static 155PrefixSet* PrefixSet::LoadFile(const base::FilePath& filter_name) { 156 int64 size_64; 157 if (!file_util::GetFileSize(filter_name, &size_64)) 158 return NULL; 159 using base::MD5Digest; 160 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) 161 return NULL; 162 163 file_util::ScopedFILE file(file_util::OpenFile(filter_name, "rb")); 164 if (!file.get()) 165 return NULL; 166 167 FileHeader header; 168 size_t read = fread(&header, sizeof(header), 1, file.get()); 169 if (read != 1) 170 return NULL; 171 172 if (header.magic != kMagic || header.version != kVersion) 173 return NULL; 174 175 std::vector<std::pair<SBPrefix,size_t> > index; 176 const size_t index_bytes = sizeof(index[0]) * header.index_size; 177 178 std::vector<uint16> deltas; 179 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; 180 181 // Check for bogus sizes before allocating any space. 182 const size_t expected_bytes = 183 sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); 184 if (static_cast<int64>(expected_bytes) != size_64) 185 return NULL; 186 187 // The file looks valid, start building the digest. 188 base::MD5Context context; 189 base::MD5Init(&context); 190 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), 191 sizeof(header))); 192 193 // Read the index vector. Herb Sutter indicates that vectors are 194 // guaranteed to be contiuguous, so reading to where element 0 lives 195 // is valid. 196 if (header.index_size) { 197 index.resize(header.index_size); 198 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); 199 if (read != index.size()) 200 return NULL; 201 base::MD5Update(&context, 202 base::StringPiece(reinterpret_cast<char*>(&(index[0])), 203 index_bytes)); 204 } 205 206 // Read vector of deltas. 207 if (header.deltas_size) { 208 deltas.resize(header.deltas_size); 209 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); 210 if (read != deltas.size()) 211 return NULL; 212 base::MD5Update(&context, 213 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), 214 deltas_bytes)); 215 } 216 217 base::MD5Digest calculated_digest; 218 base::MD5Final(&calculated_digest, &context); 219 220 base::MD5Digest file_digest; 221 read = fread(&file_digest, sizeof(file_digest), 1, file.get()); 222 if (read != 1) 223 return NULL; 224 225 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) 226 return NULL; 227 228 // Steals contents of |index| and |deltas| via swap(). 229 return new PrefixSet(&index, &deltas); 230} 231 232bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { 233 FileHeader header; 234 header.magic = kMagic; 235 header.version = kVersion; 236 header.index_size = static_cast<uint32>(index_.size()); 237 header.deltas_size = static_cast<uint32>(deltas_.size()); 238 239 // Sanity check that the 32-bit values never mess things up. 240 if (static_cast<size_t>(header.index_size) != index_.size() || 241 static_cast<size_t>(header.deltas_size) != deltas_.size()) { 242 NOTREACHED(); 243 return false; 244 } 245 246 file_util::ScopedFILE file(file_util::OpenFile(filter_name, "wb")); 247 if (!file.get()) 248 return false; 249 250 base::MD5Context context; 251 base::MD5Init(&context); 252 253 // TODO(shess): The I/O code in safe_browsing_store_file.cc would 254 // sure be useful about now. 255 size_t written = fwrite(&header, sizeof(header), 1, file.get()); 256 if (written != 1) 257 return false; 258 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), 259 sizeof(header))); 260 261 // As for reads, the standard guarantees the ability to access the 262 // contents of the vector by a pointer to an element. 263 if (index_.size()) { 264 const size_t index_bytes = sizeof(index_[0]) * index_.size(); 265 written = fwrite(&(index_[0]), sizeof(index_[0]), index_.size(), 266 file.get()); 267 if (written != index_.size()) 268 return false; 269 base::MD5Update(&context, 270 base::StringPiece( 271 reinterpret_cast<const char*>(&(index_[0])), 272 index_bytes)); 273 } 274 275 if (deltas_.size()) { 276 const size_t deltas_bytes = sizeof(deltas_[0]) * deltas_.size(); 277 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), 278 file.get()); 279 if (written != deltas_.size()) 280 return false; 281 base::MD5Update(&context, 282 base::StringPiece( 283 reinterpret_cast<const char*>(&(deltas_[0])), 284 deltas_bytes)); 285 } 286 287 base::MD5Digest digest; 288 base::MD5Final(&digest, &context); 289 written = fwrite(&digest, sizeof(digest), 1, file.get()); 290 if (written != 1) 291 return false; 292 293 // TODO(shess): Can this code check that the close was successful? 294 file.reset(); 295 296 return true; 297} 298 299} // namespace safe_browsing 300