1f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved. 2f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// found in the LICENSE file. 4f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 5f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "components/metrics/compression_utils.h" 6f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 7f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include <vector> 8f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 9f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "base/basictypes.h" 10f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "base/logging.h" 11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "base/sys_byteorder.h" 12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "third_party/zlib/zlib.h" 13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace { 15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 16f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The difference in bytes between a zlib header and a gzip header. 17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)const size_t kGzipZlibHeaderDifferenceBytes = 16; 18f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Pass an integer greater than the following get a gzip header instead of a 20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// zlib header when calling deflateInit2() and inflateInit2(). 21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)const int kWindowBitsToGetGzipHeader = 16; 22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 23f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This describes the amount of memory zlib uses to compress data. It can go 24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// from 1 to 9, with 8 being the default. For details, see: 25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// http://www.zlib.net/manual.html (search for memLevel). 26f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)const int kZlibMemoryLevel = 8; 27f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 28f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This code is taken almost verbatim from third_party/zlib/compress.c. The only 29f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// difference is deflateInit2() is called which sets the window bits to be > 16. 30f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// That causes a gzip header to be emitted rather than a zlib header. 31f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)int GzipCompressHelper(Bytef* dest, 32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uLongf* dest_length, 33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const Bytef* source, 34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uLong source_length) { 35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) z_stream stream; 36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.next_in = bit_cast<Bytef*>(source); 38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.avail_in = static_cast<uInt>(source_length); 39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.next_out = dest; 40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.avail_out = static_cast<uInt>(*dest_length); 41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (static_cast<uLong>(stream.avail_out) != *dest_length) 42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return Z_BUF_ERROR; 43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.zalloc = static_cast<alloc_func>(0); 45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.zfree = static_cast<free_func>(0); 46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.opaque = static_cast<voidpf>(0); 47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) gz_header gzip_header; 49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) memset(&gzip_header, 0, sizeof(gzip_header)); 50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) int err = deflateInit2(&stream, 51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) Z_DEFAULT_COMPRESSION, 52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) Z_DEFLATED, 53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) MAX_WBITS + kWindowBitsToGetGzipHeader, 54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) kZlibMemoryLevel, 55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) Z_DEFAULT_STRATEGY); 56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (err != Z_OK) 57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err; 58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) err = deflateSetHeader(&stream, &gzip_header); 60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (err != Z_OK) 61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err; 62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) err = deflate(&stream, Z_FINISH); 64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (err != Z_STREAM_END) { 65f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) deflateEnd(&stream); 66f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err == Z_OK ? Z_BUF_ERROR : err; 67f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 68f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) *dest_length = stream.total_out; 69f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 70f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) err = deflateEnd(&stream); 71f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err; 72f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} 73f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 74f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This code is taken almost verbatim from third_party/zlib/uncompr.c. The only 75f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// difference is inflateInit2() is called which sets the window bits to be > 16. 76f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// That causes a gzip header to be parsed rather than a zlib header. 77f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)int GzipUncompressHelper(Bytef* dest, 78f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uLongf* dest_length, 79f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const Bytef* source, 80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uLong source_length) { 81f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) z_stream stream; 82f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 83f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.next_in = bit_cast<Bytef*>(source); 84f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.avail_in = static_cast<uInt>(source_length); 85f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (static_cast<uLong>(stream.avail_in) != source_length) 86f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return Z_BUF_ERROR; 87f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 88f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.next_out = dest; 89f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.avail_out = static_cast<uInt>(*dest_length); 90f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (static_cast<uLong>(stream.avail_out) != *dest_length) 91f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return Z_BUF_ERROR; 92f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 93f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.zalloc = static_cast<alloc_func>(0); 94f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) stream.zfree = static_cast<free_func>(0); 95f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 96f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) int err = inflateInit2(&stream, MAX_WBITS + kWindowBitsToGetGzipHeader); 97f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (err != Z_OK) 98f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err; 99f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 100f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) err = inflate(&stream, Z_FINISH); 101f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (err != Z_STREAM_END) { 102f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) inflateEnd(&stream); 103f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0)) 104f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return Z_DATA_ERROR; 105f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err; 106f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 107f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) *dest_length = stream.total_out; 108f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) err = inflateEnd(&stream); 110f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return err; 111f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} 112f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 113f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Returns the uncompressed size from GZIP-compressed |compressed_data|. 114f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)uint32 GetUncompressedSize(const std::string& compressed_data) { 115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) // The uncompressed size is stored in the last 4 bytes of |input| in LE. 116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uint32 size; 117f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (compressed_data.length() < sizeof(size)) 118f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return 0; 119f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) memcpy(&size, &compressed_data[compressed_data.length() - sizeof(size)], 120f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) sizeof(size)); 121f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return base::ByteSwapToLE32(size); 122f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} 123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} // namespace 125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 126f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace metrics { 127f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool GzipCompress(const std::string& input, std::string* output) { 129f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) const uLongf input_size = static_cast<uLongf>(input.size()); 130f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) std::vector<Bytef> compressed_data(kGzipZlibHeaderDifferenceBytes + 131f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) compressBound(input_size)); 132f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 133f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uLongf compressed_size = static_cast<uLongf>(compressed_data.size()); 134f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) if (GzipCompressHelper(&compressed_data.front(), 135f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) &compressed_size, 136f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) bit_cast<const Bytef*>(input.data()), 137f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) input_size) != Z_OK) { 138f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return false; 139f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) } 140f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) compressed_data.resize(compressed_size); 142f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) output->assign(compressed_data.begin(), compressed_data.end()); 143f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) DCHECK_EQ(input.size(), GetUncompressedSize(*output)); 144f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return true; 145f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} 146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 147f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool GzipUncompress(const std::string& input, std::string* output) { 148f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) output->resize(GetUncompressedSize(input)); 149f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) uLongf uncompressed_size = static_cast<uLongf>(output->length()); 150f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) return GzipUncompressHelper(bit_cast<Bytef*>(output->data()), 151f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) &uncompressed_size, 152f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) bit_cast<const Bytef*>(input.data()), 153f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) static_cast<uLongf>(input.length())) == Z_OK; 154f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} 155f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles) 156f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)} // namespace metrics 157