1/* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is Mozilla.
15 *
16 * The Initial Developer of the Original Code is
17 * Netscape Communications.
18 * Portions created by the Initial Developer are Copyright (C) 2001
19 * the Initial Developer. All Rights Reserved.
20 *
21 * Contributor(s):
22 *   Darin Fisher <darin@netscape.com> (original author)
23 *
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
35 *
36 * ***** END LICENSE BLOCK ***** */
37
38// Derived from:
39// mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp
40
41#include "net/http/http_chunked_decoder.h"
42
43#include "base/logging.h"
44#include "base/string_piece.h"
45#include "base/string_util.h"
46#include "net/base/net_errors.h"
47
48namespace net {
49
50HttpChunkedDecoder::HttpChunkedDecoder()
51    : chunk_remaining_(0),
52      chunk_terminator_remaining_(false),
53      reached_last_chunk_(false),
54      reached_eof_(false),
55      bytes_after_eof_(0) {
56}
57
58int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) {
59  int result = 0;
60
61  while (buf_len) {
62    if (chunk_remaining_) {
63      int num = std::min(chunk_remaining_, buf_len);
64
65      buf_len -= num;
66      chunk_remaining_ -= num;
67
68      result += num;
69      buf += num;
70
71      // After each chunk's data there should be a CRLF
72      if (!chunk_remaining_)
73        chunk_terminator_remaining_ = true;
74      continue;
75    } else if (reached_eof_) {
76      bytes_after_eof_ += buf_len;
77      break;  // Done!
78    }
79
80    int bytes_consumed = ScanForChunkRemaining(buf, buf_len);
81    if (bytes_consumed < 0)
82      return bytes_consumed; // Error
83
84    buf_len -= bytes_consumed;
85    if (buf_len)
86      memmove(buf, buf + bytes_consumed, buf_len);
87  }
88
89  return result;
90}
91
92int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) {
93  DCHECK(chunk_remaining_ == 0);
94  DCHECK(buf_len > 0);
95
96  int bytes_consumed = 0;
97
98  size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n');
99  if (index_of_lf != base::StringPiece::npos) {
100    buf_len = static_cast<int>(index_of_lf);
101    if (buf_len && buf[buf_len - 1] == '\r')  // Eliminate a preceding CR.
102      buf_len--;
103    bytes_consumed = static_cast<int>(index_of_lf) + 1;
104
105    // Make buf point to the full line buffer to parse.
106    if (!line_buf_.empty()) {
107      line_buf_.append(buf, buf_len);
108      buf = line_buf_.data();
109      buf_len = static_cast<int>(line_buf_.size());
110    }
111
112    if (reached_last_chunk_) {
113      if (buf_len) {
114        DLOG(INFO) << "ignoring http trailer";
115      } else {
116        reached_eof_ = true;
117      }
118    } else if (chunk_terminator_remaining_) {
119       if (buf_len) {
120         DLOG(ERROR) << "chunk data not terminated properly";
121         return ERR_INVALID_CHUNKED_ENCODING;
122       }
123       chunk_terminator_remaining_ = false;
124    } else if (buf_len) {
125      // Ignore any chunk-extensions.
126      size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';');
127      if (index_of_semicolon != base::StringPiece::npos)
128        buf_len = static_cast<int>(index_of_semicolon);
129
130      if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) {
131        DLOG(ERROR) << "Failed parsing HEX from: " <<
132            std::string(buf, buf_len);
133        return ERR_INVALID_CHUNKED_ENCODING;
134      }
135
136      if (chunk_remaining_ == 0)
137        reached_last_chunk_ = true;
138    } else {
139      DLOG(ERROR) << "missing chunk-size";
140      return ERR_INVALID_CHUNKED_ENCODING;
141    }
142    line_buf_.clear();
143  } else {
144    // Save the partial line; wait for more data.
145    bytes_consumed = buf_len;
146
147    // Ignore a trailing CR
148    if (buf[buf_len - 1] == '\r')
149      buf_len--;
150
151    line_buf_.append(buf, buf_len);
152  }
153  return bytes_consumed;
154}
155
156
157// While the HTTP 1.1 specification defines chunk-size as 1*HEX
158// some sites rely on more lenient parsing.
159// http://www.yahoo.com/, for example, pads chunk-size with trailing spaces
160// (0x20) to be 7 characters long, such as "819b   ".
161//
162// A comparison of browsers running on WindowsXP shows that
163// they will parse the following inputs (egrep syntax):
164//
165// Let \X be the character class for a hex digit: [0-9a-fA-F]
166//
167//   RFC 2616: ^\X+$
168//        IE7: ^\X+[^\X]*$
169// Safari 3.1: ^[\t\r ]*\X+[\t ]*$
170//  Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$
171// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$
172//
173// Our strategy is to be as strict as possible, while not breaking
174// known sites.
175//
176//         Us: ^\X+[ ]*$
177bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) {
178  DCHECK(len >= 0);
179
180  // Strip trailing spaces
181  while (len && start[len - 1] == ' ')
182    len--;
183
184  // Be more restrictive than HexStringToInt;
185  // don't allow inputs with leading "-", "+", "0x", "0X"
186  if (base::StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF")
187      != base::StringPiece::npos)
188    return false;
189
190  int parsed_number;
191  bool ok = HexStringToInt(std::string(start, len), &parsed_number);
192  if (ok && parsed_number >= 0) {
193    *out = parsed_number;
194    return true;
195  }
196  return false;
197}
198
199}  // namespace net
200