1a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved.
2a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)// found in the LICENSE file.
4a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
5a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "components/rappor/rappor_metric.h"
6a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
7a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)#include "base/logging.h"
85f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)#include "base/rand_util.h"
9a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
10a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)namespace rappor {
11a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
12a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)RapporMetric::RapporMetric(const std::string& metric_name,
13a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                           const RapporParameters& parameters,
14cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                           int32_t cohort_seed)
15a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)    : metric_name_(metric_name),
16a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      parameters_(parameters),
175f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)      sample_count_(0),
18a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      bloom_filter_(parameters.bloom_filter_size_bytes,
19a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                    parameters.bloom_filter_hash_function_count,
20cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                    (cohort_seed % parameters.num_cohorts) *
21cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)                        parameters.bloom_filter_hash_function_count) {
22cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  DCHECK_GE(cohort_seed, 0);
23cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles)  DCHECK_LT(cohort_seed, RapporParameters::kMaxCohorts);
24a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
25a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
26a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)RapporMetric::~RapporMetric() {}
27a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
28a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)void RapporMetric::AddSample(const std::string& str) {
295f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  ++sample_count_;
305f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // Replace the previous sample with a 1 in sample_count_ chance so that each
315f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  // sample has equal probability of being reported.
325f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  if (base::RandGenerator(sample_count_) == 0) {
335f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)    bloom_filter_.SetString(str);
345f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  }
35a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
36a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
37a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)ByteVector RapporMetric::GetReport(const std::string& secret) const {
38a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // Generate a deterministically random mask of fake data using the
39a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // client's secret key + real data as a seed.  The inclusion of the secret
40a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // in the seed avoids correlations between real and fake data.
41a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // The seed isn't a human-readable string.
42effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch  const std::string personalization_string = metric_name_ +
43a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      std::string(bytes().begin(), bytes().end());
44a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  HmacByteVectorGenerator hmac_generator(bytes().size(), secret,
45a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)                                         personalization_string);
46a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const ByteVector fake_mask =
47a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      hmac_generator.GetWeightedRandomByteVector(parameters().fake_prob);
48a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  ByteVector fake_bits =
49a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      hmac_generator.GetWeightedRandomByteVector(parameters().fake_one_prob);
50a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
51a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // Redact most of the real data by replacing it with the fake data, hiding
52a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // and limiting the amount of information an individual client reports on.
53a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const ByteVector* fake_and_redacted_bits =
54a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      ByteVectorMerge(fake_mask, bytes(), &fake_bits);
55a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
56a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // Generate biased coin flips for each bit.
57a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  ByteVectorGenerator coin_generator(bytes().size());
58a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  const ByteVector zero_coins =
59a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      coin_generator.GetWeightedRandomByteVector(parameters().zero_coin_prob);
60a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  ByteVector one_coins =
61a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)      coin_generator.GetWeightedRandomByteVector(parameters().one_coin_prob);
62a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
63a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // Create a randomized response report on the fake and redacted data, sending
64a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // the outcome of flipping a zero coin for the zero bits in that data, and of
65a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  // flipping a one coin for the one bits in that data, as the final report.
66a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)  return *ByteVectorMerge(*fake_and_redacted_bits, zero_coins, &one_coins);
67a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}
68a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)
695f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)void RapporMetric::SetBytesForTesting(const ByteVector& bytes) {
705f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)  bloom_filter_.SetBytesForTesting(bytes);
715f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)}
725f1c94371a64b3196d4be9466099bb892df9b88eTorne (Richard Coles)
73a1401311d1ab56c4ed0a474bd38c108f75cb0cd9Torne (Richard Coles)}  // namespace rappor
74