1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved. 2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// found in the LICENSE file. 4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "components/rappor/rappor_metric.h" 6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/logging.h" 85f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "base/rand_util.h" 9a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace rappor { 11a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)RapporMetric::RapporMetric(const std::string& metric_name, 13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const RapporParameters& parameters, 14cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) int32_t cohort_seed) 15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) : metric_name_(metric_name), 16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) parameters_(parameters), 175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) sample_count_(0), 18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) bloom_filter_(parameters.bloom_filter_size_bytes, 19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) parameters.bloom_filter_hash_function_count, 20cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) (cohort_seed % parameters.num_cohorts) * 21cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) parameters.bloom_filter_hash_function_count) { 22cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) DCHECK_GE(cohort_seed, 0); 23cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) DCHECK_LT(cohort_seed, RapporParameters::kMaxCohorts); 24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)RapporMetric::~RapporMetric() {} 27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)void RapporMetric::AddSample(const std::string& str) { 295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) ++sample_count_; 305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) // Replace the previous sample with a 1 in sample_count_ chance so that each 315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) // sample has equal probability of being reported. 325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) if (base::RandGenerator(sample_count_) == 0) { 335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) bloom_filter_.SetString(str); 345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) } 35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ByteVector RapporMetric::GetReport(const std::string& secret) const { 38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // Generate a deterministically random mask of fake data using the 39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // client's secret key + real data as a seed. The inclusion of the secret 40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // in the seed avoids correlations between real and fake data. 41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // The seed isn't a human-readable string. 42effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch const std::string personalization_string = metric_name_ + 43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) std::string(bytes().begin(), bytes().end()); 44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) HmacByteVectorGenerator hmac_generator(bytes().size(), secret, 45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) personalization_string); 46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const ByteVector fake_mask = 47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) hmac_generator.GetWeightedRandomByteVector(parameters().fake_prob); 48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ByteVector fake_bits = 49a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) hmac_generator.GetWeightedRandomByteVector(parameters().fake_one_prob); 50a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 51a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // Redact most of the real data by replacing it with the fake data, hiding 52a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // and limiting the amount of information an individual client reports on. 53a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const ByteVector* fake_and_redacted_bits = 54a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ByteVectorMerge(fake_mask, bytes(), &fake_bits); 55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // Generate biased coin flips for each bit. 57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ByteVectorGenerator coin_generator(bytes().size()); 58a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) const ByteVector zero_coins = 59a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) coin_generator.GetWeightedRandomByteVector(parameters().zero_coin_prob); 60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) ByteVector one_coins = 61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) coin_generator.GetWeightedRandomByteVector(parameters().one_coin_prob); 62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 63a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // Create a randomized response report on the fake and redacted data, sending 64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // the outcome of flipping a zero coin for the zero bits in that data, and of 65a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) // flipping a one coin for the one bits in that data, as the final report. 66a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins); 67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} 68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles) 695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)void RapporMetric::SetBytesForTesting(const ByteVector& bytes) { 705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) bloom_filter_.SetBytesForTesting(bytes); 715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)} 725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles) 73a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)} // namespace rappor 74