1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/base/gzip_filter.h"
6
7#if defined(USE_SYSTEM_ZLIB)
8#include <zlib.h>
9#else
10#include "third_party/zlib/zlib.h"
11#endif
12
13#include "base/logging.h"
14#include "net/base/gzip_header.h"
15
16namespace net {
17
18GZipFilter::GZipFilter()
19    : decoding_status_(DECODING_UNINITIALIZED),
20      decoding_mode_(DECODE_MODE_UNKNOWN),
21      gzip_header_status_(GZIP_CHECK_HEADER_IN_PROGRESS),
22      zlib_header_added_(false),
23      gzip_footer_bytes_(0),
24      possible_sdch_pass_through_(false) {
25}
26
27GZipFilter::~GZipFilter() {
28  if (decoding_status_ != DECODING_UNINITIALIZED) {
29    inflateEnd(zlib_stream_.get());
30  }
31}
32
33bool GZipFilter::InitDecoding(Filter::FilterType filter_type) {
34  if (decoding_status_ != DECODING_UNINITIALIZED)
35    return false;
36
37  // Initialize zlib control block
38  zlib_stream_.reset(new z_stream);
39  if (!zlib_stream_.get())
40    return false;
41  memset(zlib_stream_.get(), 0, sizeof(z_stream));
42
43  // Set decoding mode
44  switch (filter_type) {
45    case Filter::FILTER_TYPE_DEFLATE: {
46      if (inflateInit(zlib_stream_.get()) != Z_OK)
47        return false;
48      decoding_mode_ = DECODE_MODE_DEFLATE;
49      break;
50    }
51    case Filter::FILTER_TYPE_GZIP_HELPING_SDCH:
52      possible_sdch_pass_through_ =  true;  // Needed to optionally help sdch.
53      // Fall through to GZIP case.
54    case Filter::FILTER_TYPE_GZIP: {
55      gzip_header_.reset(new GZipHeader());
56      if (!gzip_header_.get())
57        return false;
58      if (inflateInit2(zlib_stream_.get(), -MAX_WBITS) != Z_OK)
59        return false;
60      decoding_mode_ = DECODE_MODE_GZIP;
61      break;
62    }
63    default: {
64      return false;
65    }
66  }
67
68  decoding_status_ = DECODING_IN_PROGRESS;
69  return true;
70}
71
72Filter::FilterStatus GZipFilter::ReadFilteredData(char* dest_buffer,
73                                                  int* dest_len) {
74  if (!dest_buffer || !dest_len || *dest_len <= 0)
75    return Filter::FILTER_ERROR;
76
77  if (decoding_status_ == DECODING_DONE) {
78    if (GZIP_GET_INVALID_HEADER != gzip_header_status_)
79      SkipGZipFooter();
80    // Some server might send extra data after the gzip footer. We just copy
81    // them out. Mozilla does this too.
82    return CopyOut(dest_buffer, dest_len);
83  }
84
85  if (decoding_status_ != DECODING_IN_PROGRESS)
86    return Filter::FILTER_ERROR;
87
88  Filter::FilterStatus status;
89
90  if (decoding_mode_ == DECODE_MODE_GZIP &&
91      gzip_header_status_ == GZIP_CHECK_HEADER_IN_PROGRESS) {
92    // With gzip encoding the content is wrapped with a gzip header.
93    // We need to parse and verify the header first.
94    status = CheckGZipHeader();
95    switch (status) {
96      case Filter::FILTER_NEED_MORE_DATA: {
97        // We have consumed all input data, either getting a complete header or
98        // a partial header. Return now to get more data.
99        *dest_len = 0;
100        // Partial header means it can't be an SDCH header.
101        // Reason: SDCH *always* starts with 8 printable characters [a-zA-Z/_].
102        // Gzip always starts with two non-printable characters.  Hence even a
103        // single character (partial header) means that this can't be an SDCH
104        // encoded body masquerading as a GZIP body.
105        possible_sdch_pass_through_ = false;
106        return status;
107      }
108      case Filter::FILTER_OK: {
109        // The header checking succeeds, and there are more data in the input.
110        // We must have got a complete header here.
111        DCHECK_EQ(gzip_header_status_, GZIP_GET_COMPLETE_HEADER);
112        break;
113      }
114      case Filter::FILTER_ERROR: {
115        if (possible_sdch_pass_through_ &&
116            GZIP_GET_INVALID_HEADER == gzip_header_status_) {
117          decoding_status_ = DECODING_DONE;  // Become a pass through filter.
118          return CopyOut(dest_buffer, dest_len);
119        }
120        decoding_status_ = DECODING_ERROR;
121        return status;
122      }
123      default: {
124        status = Filter::FILTER_ERROR;    // Unexpected.
125        decoding_status_ = DECODING_ERROR;
126        return status;
127      }
128    }
129  }
130
131  int dest_orig_size = *dest_len;
132  status = DoInflate(dest_buffer, dest_len);
133
134  if (decoding_mode_ == DECODE_MODE_DEFLATE && status == Filter::FILTER_ERROR) {
135    // As noted in Mozilla implementation, some servers such as Apache with
136    // mod_deflate don't generate zlib headers.
137    // See 677409 for instances where this work around is needed.
138    // Insert a dummy zlib header and try again.
139    if (InsertZlibHeader()) {
140      *dest_len = dest_orig_size;
141      status = DoInflate(dest_buffer, dest_len);
142    }
143  }
144
145  if (status == Filter::FILTER_DONE) {
146    decoding_status_ = DECODING_DONE;
147  } else if (status == Filter::FILTER_ERROR) {
148    decoding_status_ = DECODING_ERROR;
149  }
150
151  return status;
152}
153
154Filter::FilterStatus GZipFilter::CheckGZipHeader() {
155  DCHECK_EQ(gzip_header_status_, GZIP_CHECK_HEADER_IN_PROGRESS);
156
157  // Check input data in pre-filter buffer.
158  if (!next_stream_data_ || stream_data_len_ <= 0)
159    return Filter::FILTER_ERROR;
160
161  const char* header_end = NULL;
162  GZipHeader::Status header_status;
163  header_status = gzip_header_->ReadMore(next_stream_data_, stream_data_len_,
164                                         &header_end);
165
166  switch (header_status) {
167    case GZipHeader::INCOMPLETE_HEADER: {
168      // We read all the data but only got a partial header.
169      next_stream_data_ = NULL;
170      stream_data_len_ = 0;
171      return Filter::FILTER_NEED_MORE_DATA;
172    }
173    case GZipHeader::COMPLETE_HEADER: {
174      // We have a complete header. Check whether there are more data.
175      int num_chars_left = static_cast<int>(stream_data_len_ -
176                                            (header_end - next_stream_data_));
177      gzip_header_status_ = GZIP_GET_COMPLETE_HEADER;
178
179      if (num_chars_left > 0) {
180        next_stream_data_ = const_cast<char*>(header_end);
181        stream_data_len_ = num_chars_left;
182        return Filter::FILTER_OK;
183      } else {
184        next_stream_data_ = NULL;
185        stream_data_len_ = 0;
186        return Filter::FILTER_NEED_MORE_DATA;
187      }
188    }
189    case GZipHeader::INVALID_HEADER: {
190      gzip_header_status_ = GZIP_GET_INVALID_HEADER;
191      return Filter::FILTER_ERROR;
192    }
193    default: {
194      break;
195    }
196  }
197
198  return Filter::FILTER_ERROR;
199}
200
201Filter::FilterStatus GZipFilter::DoInflate(char* dest_buffer, int* dest_len) {
202  // Make sure we have both valid input data and output buffer.
203  if (!dest_buffer || !dest_len || *dest_len <= 0)  // output
204    return Filter::FILTER_ERROR;
205
206  if (!next_stream_data_ || stream_data_len_ <= 0) {  // input
207    *dest_len = 0;
208    return Filter::FILTER_NEED_MORE_DATA;
209  }
210
211  // Fill in zlib control block
212  zlib_stream_.get()->next_in = bit_cast<Bytef*>(next_stream_data_);
213  zlib_stream_.get()->avail_in = stream_data_len_;
214  zlib_stream_.get()->next_out = bit_cast<Bytef*>(dest_buffer);
215  zlib_stream_.get()->avail_out = *dest_len;
216
217  int inflate_code = inflate(zlib_stream_.get(), Z_NO_FLUSH);
218  int bytesWritten = *dest_len - zlib_stream_.get()->avail_out;
219
220  Filter::FilterStatus status;
221
222  switch (inflate_code) {
223    case Z_STREAM_END: {
224      *dest_len = bytesWritten;
225
226      stream_data_len_ = zlib_stream_.get()->avail_in;
227      next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in);
228
229      SkipGZipFooter();
230
231      status = Filter::FILTER_DONE;
232      break;
233    }
234    case Z_BUF_ERROR: {
235      // According to zlib documentation, when calling inflate with Z_NO_FLUSH,
236      // getting Z_BUF_ERROR means no progress is possible. Neither processing
237      // more input nor producing more output can be done.
238      // Since we have checked both input data and output buffer before calling
239      // inflate, this result is unexpected.
240      status = Filter::FILTER_ERROR;
241      break;
242    }
243    case Z_OK: {
244      // Some progress has been made (more input processed or more output
245      // produced).
246      *dest_len = bytesWritten;
247
248      // Check whether we have consumed all input data.
249      stream_data_len_ = zlib_stream_.get()->avail_in;
250      if (stream_data_len_ == 0) {
251        next_stream_data_ = NULL;
252        status = Filter::FILTER_NEED_MORE_DATA;
253      } else {
254        next_stream_data_ = bit_cast<char*>(zlib_stream_.get()->next_in);
255        status = Filter::FILTER_OK;
256      }
257      break;
258    }
259    default: {
260      status = Filter::FILTER_ERROR;
261      break;
262    }
263  }
264
265  return status;
266}
267
268bool GZipFilter::InsertZlibHeader() {
269  static char dummy_head[2] = { 0x78, 0x1 };
270
271  char dummy_output[4];
272
273  // We only try add additional header once.
274  if (zlib_header_added_)
275    return false;
276
277  inflateReset(zlib_stream_.get());
278  zlib_stream_.get()->next_in = bit_cast<Bytef*>(&dummy_head[0]);
279  zlib_stream_.get()->avail_in = sizeof(dummy_head);
280  zlib_stream_.get()->next_out = bit_cast<Bytef*>(&dummy_output[0]);
281  zlib_stream_.get()->avail_out = sizeof(dummy_output);
282
283  int code = inflate(zlib_stream_.get(), Z_NO_FLUSH);
284  zlib_header_added_ = true;
285
286  return (code == Z_OK);
287}
288
289
290void GZipFilter::SkipGZipFooter() {
291  int footer_bytes_expected = kGZipFooterSize - gzip_footer_bytes_;
292  if (footer_bytes_expected > 0) {
293    int footer_byte_avail = std::min(footer_bytes_expected, stream_data_len_);
294    stream_data_len_ -= footer_byte_avail;
295    next_stream_data_ += footer_byte_avail;
296    gzip_footer_bytes_ += footer_byte_avail;
297
298    if (stream_data_len_ == 0)
299      next_stream_data_ = NULL;
300  }
301}
302
303}  // namespace net
304