1f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Copyright 2014 The Chromium Authors. All rights reserved.
2f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
3f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// found in the LICENSE file.
4f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
5f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "components/metrics/compression_utils.h"
6f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
7f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include <vector>
8f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
9f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "base/basictypes.h"
10f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "base/logging.h"
11f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "base/sys_byteorder.h"
12f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)#include "third_party/zlib/zlib.h"
13f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
14f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace {
15f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
16f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// The difference in bytes between a zlib header and a gzip header.
17f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)const size_t kGzipZlibHeaderDifferenceBytes = 16;
18f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
19f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Pass an integer greater than the following get a gzip header instead of a
20f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// zlib header when calling deflateInit2() and inflateInit2().
21f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)const int kWindowBitsToGetGzipHeader = 16;
22f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
23f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This describes the amount of memory zlib uses to compress data. It can go
24f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// from 1 to 9, with 8 being the default. For details, see:
25f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// http://www.zlib.net/manual.html (search for memLevel).
26f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)const int kZlibMemoryLevel = 8;
27f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
28f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This code is taken almost verbatim from third_party/zlib/compress.c. The only
29f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// difference is deflateInit2() is called which sets the window bits to be > 16.
30f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// That causes a gzip header to be emitted rather than a zlib header.
31f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)int GzipCompressHelper(Bytef* dest,
32f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                       uLongf* dest_length,
33f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                       const Bytef* source,
34f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                       uLong source_length) {
35f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  z_stream stream;
36f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
37f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.next_in = bit_cast<Bytef*>(source);
38f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.avail_in = static_cast<uInt>(source_length);
39f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.next_out = dest;
40f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.avail_out = static_cast<uInt>(*dest_length);
41f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (static_cast<uLong>(stream.avail_out) != *dest_length)
42f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return Z_BUF_ERROR;
43f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
44f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.zalloc = static_cast<alloc_func>(0);
45f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.zfree = static_cast<free_func>(0);
46f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.opaque = static_cast<voidpf>(0);
47f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
48f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  gz_header gzip_header;
49f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  memset(&gzip_header, 0, sizeof(gzip_header));
50f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  int err = deflateInit2(&stream,
51f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         Z_DEFAULT_COMPRESSION,
52f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         Z_DEFLATED,
53f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         MAX_WBITS + kWindowBitsToGetGzipHeader,
54f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         kZlibMemoryLevel,
55f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         Z_DEFAULT_STRATEGY);
56f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (err != Z_OK)
57f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return err;
58f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
59f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  err = deflateSetHeader(&stream, &gzip_header);
60f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (err != Z_OK)
61f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return err;
62f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
63f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  err = deflate(&stream, Z_FINISH);
64f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (err != Z_STREAM_END) {
65f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    deflateEnd(&stream);
66f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return err == Z_OK ? Z_BUF_ERROR : err;
67f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
68f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  *dest_length = stream.total_out;
69f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
70f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  err = deflateEnd(&stream);
71f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return err;
72f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
73f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
74f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// This code is taken almost verbatim from third_party/zlib/uncompr.c. The only
75f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// difference is inflateInit2() is called which sets the window bits to be > 16.
76f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// That causes a gzip header to be parsed rather than a zlib header.
77f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)int GzipUncompressHelper(Bytef* dest,
78f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         uLongf* dest_length,
79f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         const Bytef* source,
80f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         uLong source_length) {
81f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  z_stream stream;
82f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
83f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.next_in = bit_cast<Bytef*>(source);
84f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.avail_in = static_cast<uInt>(source_length);
85f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (static_cast<uLong>(stream.avail_in) != source_length)
86f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return Z_BUF_ERROR;
87f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
88f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.next_out = dest;
89f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.avail_out = static_cast<uInt>(*dest_length);
90f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (static_cast<uLong>(stream.avail_out) != *dest_length)
91f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return Z_BUF_ERROR;
92f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
93f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.zalloc = static_cast<alloc_func>(0);
94f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  stream.zfree = static_cast<free_func>(0);
95f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
96f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  int err = inflateInit2(&stream, MAX_WBITS + kWindowBitsToGetGzipHeader);
97f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (err != Z_OK)
98f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return err;
99f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
100f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  err = inflate(&stream, Z_FINISH);
101f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (err != Z_STREAM_END) {
102f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    inflateEnd(&stream);
103f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0))
104f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)      return Z_DATA_ERROR;
105f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return err;
106f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
107f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  *dest_length = stream.total_out;
108f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
109f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  err = inflateEnd(&stream);
110f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return err;
111f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
112f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
113f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)// Returns the uncompressed size from GZIP-compressed |compressed_data|.
114f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)uint32 GetUncompressedSize(const std::string& compressed_data) {
115f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  // The uncompressed size is stored in the last 4 bytes of |input| in LE.
116f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uint32 size;
117f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (compressed_data.length() < sizeof(size))
118f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return 0;
119f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  memcpy(&size, &compressed_data[compressed_data.length() - sizeof(size)],
120f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)         sizeof(size));
121f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return base::ByteSwapToLE32(size);
122f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
123f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
124f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}  // namespace
125f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
126f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)namespace metrics {
127f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
128f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool GzipCompress(const std::string& input, std::string* output) {
129f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  const uLongf input_size = static_cast<uLongf>(input.size());
130f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  std::vector<Bytef> compressed_data(kGzipZlibHeaderDifferenceBytes +
131f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                                     compressBound(input_size));
132f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
133f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uLongf compressed_size = static_cast<uLongf>(compressed_data.size());
134f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  if (GzipCompressHelper(&compressed_data.front(),
135f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         &compressed_size,
136f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         bit_cast<const Bytef*>(input.data()),
137f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                         input_size) != Z_OK) {
138f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)    return false;
139f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  }
140f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
141f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  compressed_data.resize(compressed_size);
142f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  output->assign(compressed_data.begin(), compressed_data.end());
143f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  DCHECK_EQ(input.size(), GetUncompressedSize(*output));
144f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return true;
145f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
146f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
147f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)bool GzipUncompress(const std::string& input, std::string* output) {
148f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  output->resize(GetUncompressedSize(input));
149f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  uLongf uncompressed_size = static_cast<uLongf>(output->length());
150f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)  return GzipUncompressHelper(bit_cast<Bytef*>(output->data()),
151f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                              &uncompressed_size,
152f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                              bit_cast<const Bytef*>(input.data()),
153f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)                              static_cast<uLongf>(input.length())) == Z_OK;
154f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}
155f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)
156f8ee788a64d60abd8f2d742a5fdedde054ecd910Torne (Richard Coles)}  // namespace metrics
157