1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Derived from:
6//   mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
7// The license block is:
8/* ***** BEGIN LICENSE BLOCK *****
9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
10 *
11 * The contents of this file are subject to the Mozilla Public License Version
12 * 1.1 (the "License"); you may not use this file except in compliance with
13 * the License. You may obtain a copy of the License at
14 * http://www.mozilla.org/MPL/
15 *
16 * Software distributed under the License is distributed on an "AS IS" basis,
17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
18 * for the specific language governing rights and limitations under the
19 * License.
20 *
21 * The Original Code is Mozilla.
22 *
23 * The Initial Developer of the Original Code is
24 * Netscape Communications.
25 * Portions created by the Initial Developer are Copyright (C) 2001
26 * the Initial Developer. All Rights Reserved.
27 *
28 * Contributor(s):
29 *   Darin Fisher <darin@netscape.com> (original author)
30 *
31 * Alternatively, the contents of this file may be used under the terms of
32 * either the GNU General Public License Version 2 or later (the "GPL"), or
33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
34 * in which case the provisions of the GPL or the LGPL are applicable instead
35 * of those above. If you wish to allow use of your version of this file only
36 * under the terms of either the GPL or the LGPL, and not to allow others to
37 * use your version of this file under the terms of the MPL, indicate your
38 * decision by deleting the provisions above and replace them with the notice
39 * and other provisions required by the GPL or the LGPL. If you do not delete
40 * the provisions above, a recipient may use your version of this file under
41 * the terms of any one of the MPL, the GPL or the LGPL.
42 *
43 * ***** END LICENSE BLOCK ***** */
44
45#include "net/http/http_chunked_decoder.h"
46
47#include <algorithm>
48
49#include "base/logging.h"
50#include "base/strings/string_number_conversions.h"
51#include "base/strings/string_piece.h"
52#include "base/strings/string_util.h"
53#include "net/base/net_errors.h"
54
55namespace net {
56
57// Absurdly long size to avoid imposing a constraint on chunked encoding
58// extensions.
59const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384;
60
61HttpChunkedDecoder::HttpChunkedDecoder()
62    : chunk_remaining_(0),
63      chunk_terminator_remaining_(false),
64      reached_last_chunk_(false),
65      reached_eof_(false),
66      bytes_after_eof_(0) {
67}
68
69int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
70  int result = 0;
71
72  while (buf_len) {
73    if (chunk_remaining_) {
74      int num = std::min(chunk_remaining_, buf_len);
75
76      buf_len -= num;
77      chunk_remaining_ -= num;
78
79      result += num;
80      buf += num;
81
82      // After each chunk's data there should be a CRLF
83      if (!chunk_remaining_)
84        chunk_terminator_remaining_ = true;
85      continue;
86    } else if (reached_eof_) {
87      bytes_after_eof_ += buf_len;
88      break;  // Done!
89    }
90
91    int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
92    if (bytes_consumed < 0)
93      return bytes_consumed; // Error
94
95    buf_len -= bytes_consumed;
96    if (buf_len)
97      memmove(buf, buf + bytes_consumed, buf_len);
98  }
99
100  return result;
101}
102
103int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
104  DCHECK_EQ(0, chunk_remaining_);
105  DCHECK_GT(buf_len, 0);
106
107  int bytes_consumed = 0;
108
109  size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
110  if (index_of_lf != base::StringPiece::npos) {
111    buf_len = static_cast<int>(index_of_lf);
112    if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
113      buf_len--;
114    bytes_consumed = static_cast<int>(index_of_lf) + 1;
115
116    // Make buf point to the full line buffer to parse.
117    if (!line_buf_.empty()) {
118      line_buf_.append(buf, buf_len);
119      buf = line_buf_.data();
120      buf_len = static_cast<int>(line_buf_.size());
121    }
122
123    if (reached_last_chunk_) {
124      if (buf_len)
125        DVLOG(1) << "ignoring http trailer";
126      else
127        reached_eof_ = true;
128    } else if (chunk_terminator_remaining_) {
129      if (buf_len) {
130        DLOG(ERROR) << "chunk data not terminated properly";
131        return ERR_INVALID_CHUNKED_ENCODING;
132      }
133      chunk_terminator_remaining_ = false;
134    } else if (buf_len) {
135      // Ignore any chunk-extensions.
136      size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
137      if (index_of_semicolon != base::StringPiece::npos)
138        buf_len = static_cast<int>(index_of_semicolon);
139
140      if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
141        DLOG(ERROR) << "Failed parsing HEX from: " <<
142            std::string(buf, buf_len);
143        return ERR_INVALID_CHUNKED_ENCODING;
144      }
145
146      if (chunk_remaining_ == 0)
147        reached_last_chunk_ = true;
148    } else {
149      DLOG(ERROR) << "missing chunk-size";
150      return ERR_INVALID_CHUNKED_ENCODING;
151    }
152    line_buf_.clear();
153  } else {
154    // Save the partial line; wait for more data.
155    bytes_consumed = buf_len;
156
157    // Ignore a trailing CR
158    if (buf[buf_len - 1] == '\r')
159      buf_len--;
160
161    if (line_buf_.length() + buf_len > kMaxLineBufLen) {
162      DLOG(ERROR) << "Chunked line length too long";
163      return ERR_INVALID_CHUNKED_ENCODING;
164    }
165
166    line_buf_.append(buf, buf_len);
167  }
168  return bytes_consumed;
169}
170
171
172// While the HTTP 1.1 specification defines chunk-size as 1*HEX
173// some sites rely on more lenient parsing.
174// http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
175// (0x20) to be 7 characters long, such as "819b   ".
176//
177// A comparison of browsers running on WindowsXP shows that
178// they will parse the following inputs (egrep syntax):
179//
180// Let \X be the character class for a hex digit: [0-9a-fA-F]
181//
182//   RFC 2616: ^\X+$
183//        IE7: ^\X+[^\X]*$
184// Safari 3.1: ^[\t\r ]*\X+[\t ]*$
185//  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
186// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
187//
188// Our strategy is to be as strict as possible, while not breaking
189// known sites.
190//
191//         Us: ^\X+[ ]*$
192bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
193  DCHECK_GE(len, 0);
194
195  // Strip trailing spaces
196  while (len && start[len - 1] == ' ')
197    len--;
198
199  // Be more restrictive than HexStringToInt;
200  // don't allow inputs with leading "-", "+", "0x", "0X"
201  base::StringPiece chunk_size(start, len);
202  if (chunk_size.find_first_not_of("0123456789abcdefABCDEF")
203      != base::StringPiece::npos) {
204    return false;
205  }
206
207  int parsed_number;
208  bool ok = base::HexStringToInt(chunk_size, &parsed_number);
209  if (ok && parsed_number >= 0) {
210    *out = parsed_number;
211    return true;
212  }
213  return false;
214}
215
216}  // namespace net
217