package com.google.android.rappor; // BEGIN android-changed: Removed guava dependency // import static com.google.common.base.Preconditions.checkArgument; // // import com.google.common.base.Verify; // END android-changed import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; import java.util.BitSet; // BEGIN android-changed import java.util.Random; // END android-changed // BEGIN android-changed: Remove javax // import javax.annotation.Nullable; // import javax.annotation.concurrent.GuardedBy; // END android-changed /** * Encodes reports using the RAPPOR differentially-private encoding algorithm. * * The RAPPOR algorithm is described at go/rappor and presented in detail at go/rappor-writeup. * * @author bonawitz@google.com Keith Bonawitz */ // TODO(bonawitz): Make encoder and interface and make this a final class implementing it. // We can't just make this final now because there exist tests that need to Mock it. public class Encoder { /** * A non-decreasing version number. * *

The version number should increase any time the Encoder has a user-visible functional change * to any of encoding algorithms or the interpretation of the input parameters. */ public static final long VERSION = 3; /** * Minimum length required for the user secret, in bytes. */ public static final int MIN_USER_SECRET_BYTES = HmacDrbg.ENTROPY_INPUT_SIZE_BYTES; /** * Maximum number of bits in the RAPPOR-encoded report. * * Must be less than HmacDrbg.MAX_BYTES_TOTAL; */ public static final int MAX_BITS = 4096; /** * Maximum number of Bloom filter hashes used for RAPPOR-encoded strings. * *

This is constrained in the current implementation by requiring 2 bytes from an MD5 value * (which is 16 bytes long) for each Bloom filter hash. */ public static final int MAX_BLOOM_HASHES = 8; /** * Maximum number of cohorts supported. */ public static final int MAX_COHORTS = 128; /** * First (and only) byte of HMAC_DRBG personalization strings used to generate the cohort number. */ private static final byte HMAC_DRBG_TYPE_COHORT = 0x00; /** * First byte of HMAC_DRBG personalization strings used to generate the PRR response. */ private static final byte HMAC_DRBG_TYPE_PRR = 0x01; /** * A unique identifier for this Encoder, represented in UTF-8. * *

The (userSecret, encoderId) pair identify a the logical memoization table used for RAPPOR's * Permanent Randomized Response stage. Therefore, for any userSecret, each Encoder must have a * distinct identifier for Permanent Randomized Response to be effective. * *

In practice, "memoization" is achieved by generating deterministic pseudo-random bits using * HMAC_DRBG. encoderIdBytes is used as part of the personalization string. */ private final byte[] encoderIdBytes; /** * The RAPPOR "f" probability, on the range [0.0, 1.0]. * *

This it the probability of replacing a bit from the input with a uniform random bit when * generating the permanent randomized response. * *

Setting probabilityF to 0 disables the PRR phase of RAPPOR. */ private final double probabilityF; /** * The RAPPOR "p" probability, on the range [0.0, 1.0]. * *

This is the probability of emitting a '1' bit in the instantaneous randomized response, * given that the corresponding bit in the permanent randomized response is '0'. * *

Setting probabilityP to 0.0 and probabilityQ to 1.0 disables the IRR phase of RAPPOR. */ private final double probabilityP; /** * The RAPPOR "1" probability, on the range [0.0, 1.0]. * *

This is the probability of emitting a 1 bit in the instantaneous randomized response, given * that the corresponding bit in the permanent randomized response is 1. * *

Setting probabilityP to 0.0 and probabilityQ to 1.0 disables the IRR phase of RAPPOR. */ private final double probabilityQ; /** * The number of bits in the RAPPOR-encoded report. * *

Must satisfy 1 <= numBits <= MAX_BITS. * *

*/ private final int numBits; /** * The number of hash functions used forming the Bloom filter encoding of a string. * *

Must satisfy 1 <= numBloomHashes <= MAX_BLOOM_HASHES. */ private final int numBloomHashes; /** * The number of cohorts used for cohort assignment. */ private final int numCohorts; /** * The cohort this user is assigned to for Bloom filter string encoding. * *

This value is stable for a given (userSecret, numCohorts) pair. Furthermore, if two * encoders use the same userSecret but have different numCohorts values, the cohort assignment * for the encoder with the smaller numCohorts value is a bitwise suffix of the cohort assignment * for the encoder with the larger numCohorts value. It follows that, if you know maximum cohort * assignment across a set of encoders, and you know the numCohorts setting for each encoder, then * you can deduce the cohort assignment for each encoder by taking the bitwise-and of the max * cohort value and (numCohorts-1), noting that numCohorts is required to be a positive power of * 2. */ private final int cohort; /** * A bitmask with 1 bits in all the positions where a RAPPOR-encoded report could have a 1 bit. * *

The bitmask has the lowest order numBits set to 1 and the rest 0. */ private final BitSet inputMask; /** * SHA-256 utility class instance. * *

This object is stateful; access must be synchronized. The reset method must be * called before each use. */ // BEGIN android-changed: Remove javax // @GuardedBy("this") // END android-changed private final MessageDigest sha256; /** * MD5 utility class instance. * *

This object is stateful; access must be synchronized. The reset method must be * called before each use. */ // BEGIN android-changed: Remove javax // @GuardedBy("this") // END android-changed private final MessageDigest md5; /** * A SecureRandom instance, initialized with a cryptographically secure random seed. */ // BEGIN android-changed // private final SecureRandom random; private final Random random; // BEGIN android-changed /** * Entropy input for constructing HmacDrbg objects. */ private final byte[] userSecret; /** * Constructs a new RAPPOR message encoder. * * @param userSecret Stable secret randomly selected for this user. UserSecret must be at least * MIN_USER_SECRET_BYTES bytes of high-quality entropy. Changing the user secret clears the * memoized cohort assignments and permanent randomized responses. Be aware that resetting * these memoizations has significant privacy risks -- consult documentation at go/rappor for * more details. * @param encoderId Uniquely identifies this encoder. Used to differentiate momoized * cohort assignments and permanent randomized responses. * @param numBits The number of bits in the RAPPOR-encoded report. * @param probabilityF The RAPPOR "f" probability, on the range [0.0, 1.0]. This will be * quantized to the nearest 1/128. * @param probabilityP The RAPPOR "p" probability, on the range [0.0, 1.0]. * @param probabilityQ The RAPPOR "1" probability, on the range [0.0, 1.0]. * @param numCohorts Number of cohorts into which the user pool is randomly segmented. * @param numBloomHashes The number of hash functions used forming the Bloom filter encoding of a * string. */ public Encoder(byte[] userSecret, String encoderId, int numBits, double probabilityF, double probabilityP, double probabilityQ, int numCohorts, int numBloomHashes) { this( null, // random null, // md5, null, // sha256, userSecret, encoderId, numBits, probabilityF, probabilityP, probabilityQ, numCohorts, numBloomHashes); } /** * Constructs a new RAPPOR message encoder, using constructor-style dependency injection. * * @param random A cryptographically secure random number generator, or null (in which case a * new SecureRandom will be constructed). * @param md5 A configured MD5 hash algorithm, or null (in which case a new MessageDigest will be * constructed). Note: MessageDigest objects are stateful, and that state must not be * modified while calls to the Encoder are active. * @param sha256 A configured SHA-256 hash algorithm, or null (in which case a new MessageDigest * will be constructed). Note: MessageDigest objects are stateful, and that state must not * be modified while calls to the Encoder are active. * @param userSecret Stable secret randomly selected for this user. UserSecret must be at least * 32 bytes of high-quality entropy. Changing the user secret clears the memoized cohort * assignments and permanent randomized responses. Be aware that resetting these memoizations * has significant privacy risks -- consult documentation at go/rappor for more details. * @param encoderId Uniquely identifies this encoder. Used to differentiate momoized * cohort assignments and permanent randomized responses. * @param numBits The number of bits in the RAPPOR-encoded report. * @param probabilityF The RAPPOR "f" probability, on the range [0.0, 1.0]. This will be * quantized to the nearest 1/128. * @param probabilityP The RAPPOR "p" probability, on the range [0.0, 1.0]. * @param probabilityQ The RAPPOR "1" probability, on the range [0.0, 1.0]. * @param numCohorts Number of cohorts into which the user pool is randomly segmented. * @param numBloomHashes The number of hash functions used forming the Bloom filter encoding of a * string. */ public Encoder( // BEGIN android-changed: Remove javax // @Nullable SecureRandom random, // @Nullable MessageDigest md5, // @Nullable MessageDigest sha256, // SecureRandom random, Random random, MessageDigest md5, MessageDigest sha256, // END android-changed byte[] userSecret, String encoderId, int numBits, double probabilityF, double probabilityP, double probabilityQ, int numCohorts, int numBloomHashes) { if (md5 != null) { this.md5 = md5; } else { try { this.md5 = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException impossible) { // This should never happen. Every implementation of the Java platform // is required to support MD5. throw new AssertionError(impossible); } } this.md5.reset(); if (sha256 != null) { this.sha256 = sha256; } else { try { this.sha256 = MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException impossible) { // This should never happen. Every implementation of the Java platform // is required to support 256. throw new AssertionError(impossible); } } this.sha256.reset(); this.encoderIdBytes = encoderId.getBytes(StandardCharsets.UTF_8); if (random != null) { this.random = random; } else { this.random = new SecureRandom(); } // android-changed: Removed guava dependency // checkArgument( // userSecret.length >= MIN_USER_SECRET_BYTES, // "userSecret must be at least %s bytes of high-quality entropy.", // MIN_USER_SECRET_BYTES); checkArgument( userSecret.length >= MIN_USER_SECRET_BYTES, "userSecret must be at least " + MIN_USER_SECRET_BYTES + " bytes of high-quality entropy."); this.userSecret = userSecret; checkArgument( probabilityF >= 0 && probabilityF <= 1, "probabilityF must be on range [0.0, 1.0]"); this.probabilityF = Math.round(probabilityF * 128) / 128.0; checkArgument( probabilityP >= 0 && probabilityP <= 1, "probabilityP must be on range [0.0, 1.0]"); this.probabilityP = probabilityP; checkArgument( probabilityQ >= 0 && probabilityQ <= 1, "probabilityQ must be on range [0.0, 1.0]"); this.probabilityQ = probabilityQ; checkArgument( numBits >= 1 && numBits <= MAX_BITS, "numBits must be on range [1, " + MAX_BITS + "]."); this.numBits = numBits; // Make a bitmask with the lowest-order numBits set to 1. this.inputMask = new BitSet(numBits); this.inputMask.set(0, numBits, true); checkArgument( numBloomHashes >= 1 && numBloomHashes <= numBits, "numBloomHashes must be on range [1, numBits)."); this.numBloomHashes = numBloomHashes; checkArgument( numCohorts >= 1 && numCohorts <= MAX_COHORTS, "numCohorts must be on range [1, " + MAX_COHORTS + "]."); // Assuming numCohorts >= 1: // // If numCohorts is a power of 2, then numCohorts has one bit set and numCohorts - 1 has only // the bits to the right of numCohorts's bit set. The bitwise-and of these is 0. // // If numCohorts is not a power of 2, then numCohorts has at least two bits set. It follows // subtracting one from numCohorts keeps the most significant bit set to 1, and thus the // bitwise-and has at least this non-zero bit. boolean numCohortsIsPowerOfTwo = (numCohorts & (numCohorts - 1)) == 0; checkArgument(numCohortsIsPowerOfTwo, "numCohorts must be a power of 2."); this.numCohorts = numCohorts; // cohortMasterAssignment depends only on the userSecret. HmacDrbg cohortDrbg = new HmacDrbg(userSecret, new byte[] {HMAC_DRBG_TYPE_COHORT}); ByteBuffer cohortDrbgBytes = ByteBuffer.wrap(cohortDrbg.nextBytes(4)); int cohortMasterAssignment = Math.abs(cohortDrbgBytes.getInt()) % MAX_COHORTS; this.cohort = cohortMasterAssignment & (numCohorts - 1); } public double getProbabilityF() { return probabilityF; } public double getProbabilityP() { return probabilityP; } public double getProbabilityQ() { return probabilityQ; } public int getNumBits() { return numBits; } public int getNumBloomHashes() { return numBloomHashes; } public int getNumCohorts() { return numCohorts; } public int getCohort() { return cohort; } /** * Returns the Encoder ID as a UTF-8 formatted string. */ public String getEncoderId() { return new String(encoderIdBytes, StandardCharsets.UTF_8); } /** * Encodes a boolean into a RAPPOR report. * *

The boolean is 0 or 1, then encoded using permanent and instantaneous randomized response. * *

In most cases, numBits should be 1 when using this method. */ public byte[] encodeBoolean(boolean bool) { BitSet input = new BitSet(numBits); input.set(0, bool); return encodeBits(input); } /** * Encodes an ordinal into a RAPPOR report. * *

The ordinal is represented using a 1-hot binary representation, then encoded using permanent * and instantaneous randomized response. * * @param ordinal A value on the range [0, numBits). */ public byte[] encodeOrdinal(int ordinal) { checkArgument( ordinal >= 0 && ordinal < numBits, "Ordinal value must be in range [0, numBits)."); BitSet input = new BitSet(numBits); input.set(ordinal, true); return encodeBits(input); } /** * Encodes a string into a RAPPOR report. * *

The string is represented using a Bloom filter with numBloomHashes hash functions, then * encoded using permanent and instantaneous randomized response. * * @param string An arbitrary string. */ public byte[] encodeString(String string) { // Implements a Bloom filter by slicing a single MD5 hash into numBloomHashes bit indices. byte[] stringInUtf8 = string.getBytes(StandardCharsets.UTF_8); byte[] message = ByteBuffer.allocate(4 + stringInUtf8.length) .putInt(cohort) .put(stringInUtf8) .array(); byte[] digest; synchronized (this) { md5.reset(); digest = md5.digest(message); } // android-changed: Removed guava dependency // Verify.verify(digest.length == 16); // Verify.verify(numBloomHashes <= digest.length / 2); verify(digest.length == 16); verify(numBloomHashes <= digest.length / 2); BitSet input = new BitSet(numBits); for (int i = 0; i < numBloomHashes; i++) { // Convert byte pairs to ints on [0, 65535]. // Anding with 0xFF converts signed byte to unsigned int. int digestWord = (digest[i * 2] & 0xFF) * 256 + (digest[i * 2 + 1] & 0xFF); int chosenBit = digestWord % numBits; input.set(chosenBit, true); } return encodeBits(input); } /** * Encodes an arbitrary bitstring into a RAPPOR report. * * @param bits A bitstring in which only the least significant numBits bits may be 1. */ public byte[] encodeBits(byte[] bits) { return encodeBits(BitSet.valueOf(bits)); } /** * Encodes an arbitrary bitstring into a RAPPOR report. * * @param bits A bitstring in which only the least significant numBits bits may be 1. */ private byte[] encodeBits(BitSet bits) { BitSet permanentRandomizedResponse = computePermanentRandomizedResponse(bits); BitSet encodedBitSet = computeInstantaneousRandomizedResponse(permanentRandomizedResponse); // BitSet.toByteArray only returns enough bytes to capture the most significant bit // that is set. For example, a BitSet with no bits set could return a length-0 array. // In contrast, we guarantee that our output is sized according to numBits. byte[] encodedBytes = encodedBitSet.toByteArray(); byte[] output = new byte[(numBits + 7) / 8]; // android-changed: Removed guava dependency // Verify.verify(encodedBytes.length <= output.length); verify(encodedBytes.length <= output.length); System.arraycopy( encodedBytes, // src 0, // srcPos output, // dest 0, // destPos encodedBytes.length); // length return output; } /** * Returns the permanent randomized response for the given bits. * *

The response for a particular bits input is guaranteed to always be the same for any encoder * constructed with the same parameters (including the encoderId and the userSecret). */ private BitSet computePermanentRandomizedResponse(BitSet bits) { // Ensures that the input only has bits set in the lowest BitSet masked = new BitSet(); masked.or(bits); masked.andNot(inputMask); checkArgument(masked.isEmpty(), "Input bits had bits set past Encoder's numBits limit."); if (probabilityF == 0.0) { return bits; } // Builds a personalization string that contains both the encoderId and input value (bits), // and is no longer than HmacDrbg.MAX_PERSONALIZATION_STRING_LENGTH_BYTES. The first byte // of the personalization string is always HMAC_DRBG_TYPE_PRR, to avoid collisions with the // cohort-generation personalization string. byte[] personalizationString; synchronized (this) { int personalizationStringLength = Math.min(HmacDrbg.MAX_PERSONALIZATION_STRING_LENGTH_BYTES, 1 + sha256.getDigestLength()); personalizationString = new byte[personalizationStringLength]; personalizationString[0] = HMAC_DRBG_TYPE_PRR; sha256.reset(); sha256.update(encoderIdBytes); sha256.update(new byte[] {0}); sha256.update(bits.toByteArray()); byte[] digest = sha256.digest(personalizationString); System.arraycopy(digest, 0, personalizationString, 1, personalizationString.length - 1); } HmacDrbg drbg = new HmacDrbg(userSecret, personalizationString); byte[] pseudorandomStream = drbg.nextBytes(numBits); // android-changed: Removed guava dependency // Verify.verify(numBits <= pseudorandomStream.length); verify(numBits <= pseudorandomStream.length); int probabilityFTimes128 = (int) Math.round(probabilityF * 128); BitSet result = new BitSet(numBits); for (int i = 0; i < numBits; i++) { // Grabs a single byte from the pseudorandom stream. // Anding with 0xFF converts a signed byte to an unsigned integer. int pseudorandomByte = pseudorandomStream[i] & 0xFF; // Uses bits 1-7 to get a random number between 0 and 127. int uniform0to127 = pseudorandomByte >> 1; boolean shouldUseNoise = uniform0to127 < probabilityFTimes128; if (shouldUseNoise) { // Uses bit 0 as a flip of a fair coin. result.set(i, (pseudorandomByte & 0x01) > 0); } else { result.set(i, bits.get(i)); } } return result; } /** * Returns the instantaneous randomized response for the given bits. * *

The instantaneous response is NOT memoized -- it is sampled randomly on * every invocation. */ private BitSet computeInstantaneousRandomizedResponse(BitSet bits) { // Ensures that the input only has bits set in the lowest BitSet masked = new BitSet(); masked.or(bits); masked.andNot(inputMask); checkArgument(masked.isEmpty(), "Input bits had bits set past Encoder's numBits limit."); if (probabilityP == 0.0 && probabilityQ == 1.0) { return bits; } BitSet response = new BitSet(numBits); for (int i = 0; i < numBits; i++) { boolean bit = bits.get(i); double probability = bit ? probabilityQ : probabilityP; boolean responseBit = random.nextFloat() < probability; response.set(i, responseBit); } return response; } // BEGIN android-changed: Added guava methods private static void checkArgument(boolean expression, Object errorMessage) { if (!expression) { throw new IllegalArgumentException(String.valueOf(errorMessage)); } } private static void verify(boolean expression) { if (!expression) { throw new java.lang.IllegalStateException(); } } // END android-changed }