1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// Derived from: 6// mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp 7// The license block is: 8/* ***** BEGIN LICENSE BLOCK ***** 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 10 * 11 * The contents of this file are subject to the Mozilla Public License Version 12 * 1.1 (the "License"); you may not use this file except in compliance with 13 * the License. You may obtain a copy of the License at 14 * http://www.mozilla.org/MPL/ 15 * 16 * Software distributed under the License is distributed on an "AS IS" basis, 17 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 18 * for the specific language governing rights and limitations under the 19 * License. 20 * 21 * The Original Code is Mozilla. 22 * 23 * The Initial Developer of the Original Code is 24 * Netscape Communications. 25 * Portions created by the Initial Developer are Copyright (C) 2001 26 * the Initial Developer. All Rights Reserved. 27 * 28 * Contributor(s): 29 * Darin Fisher <darin@netscape.com> (original author) 30 * 31 * Alternatively, the contents of this file may be used under the terms of 32 * either the GNU General Public License Version 2 or later (the "GPL"), or 33 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 34 * in which case the provisions of the GPL or the LGPL are applicable instead 35 * of those above. If you wish to allow use of your version of this file only 36 * under the terms of either the GPL or the LGPL, and not to allow others to 37 * use your version of this file under the terms of the MPL, indicate your 38 * decision by deleting the provisions above and replace them with the notice 39 * and other provisions required by the GPL or the LGPL. If you do not delete 40 * the provisions above, a recipient may use your version of this file under 41 * the terms of any one of the MPL, the GPL or the LGPL. 42 * 43 * ***** END LICENSE BLOCK ***** */ 44 45#include "net/http/http_chunked_decoder.h" 46 47#include "base/logging.h" 48#include "base/string_number_conversions.h" 49#include "base/string_piece.h" 50#include "base/string_util.h" 51#include "net/base/net_errors.h" 52 53namespace net { 54 55HttpChunkedDecoder::HttpChunkedDecoder() 56 : chunk_remaining_(0), 57 chunk_terminator_remaining_(false), 58 reached_last_chunk_(false), 59 reached_eof_(false), 60 bytes_after_eof_(0) { 61} 62 63int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { 64 int result = 0; 65 66 while (buf_len) { 67 if (chunk_remaining_) { 68 int num = std::min(chunk_remaining_, buf_len); 69 70 buf_len -= num; 71 chunk_remaining_ -= num; 72 73 result += num; 74 buf += num; 75 76 // After each chunk's data there should be a CRLF 77 if (!chunk_remaining_) 78 chunk_terminator_remaining_ = true; 79 continue; 80 } else if (reached_eof_) { 81 bytes_after_eof_ += buf_len; 82 break; // Done! 83 } 84 85 int bytes_consumed = ScanForChunkRemaining(buf, buf_len); 86 if (bytes_consumed < 0) 87 return bytes_consumed; // Error 88 89 buf_len -= bytes_consumed; 90 if (buf_len) 91 memmove(buf, buf + bytes_consumed, buf_len); 92 } 93 94 return result; 95} 96 97int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { 98 DCHECK(chunk_remaining_ == 0); 99 DCHECK(buf_len > 0); 100 101 int bytes_consumed = 0; 102 103 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); 104 if (index_of_lf != base::StringPiece::npos) { 105 buf_len = static_cast<int>(index_of_lf); 106 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. 107 buf_len--; 108 bytes_consumed = static_cast<int>(index_of_lf) + 1; 109 110 // Make buf point to the full line buffer to parse. 111 if (!line_buf_.empty()) { 112 line_buf_.append(buf, buf_len); 113 buf = line_buf_.data(); 114 buf_len = static_cast<int>(line_buf_.size()); 115 } 116 117 if (reached_last_chunk_) { 118 if (buf_len) 119 DVLOG(1) << "ignoring http trailer"; 120 else 121 reached_eof_ = true; 122 } else if (chunk_terminator_remaining_) { 123 if (buf_len) { 124 DLOG(ERROR) << "chunk data not terminated properly"; 125 return ERR_INVALID_CHUNKED_ENCODING; 126 } 127 chunk_terminator_remaining_ = false; 128 } else if (buf_len) { 129 // Ignore any chunk-extensions. 130 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); 131 if (index_of_semicolon != base::StringPiece::npos) 132 buf_len = static_cast<int>(index_of_semicolon); 133 134 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { 135 DLOG(ERROR) << "Failed parsing HEX from: " << 136 std::string(buf, buf_len); 137 return ERR_INVALID_CHUNKED_ENCODING; 138 } 139 140 if (chunk_remaining_ == 0) 141 reached_last_chunk_ = true; 142 } else { 143 DLOG(ERROR) << "missing chunk-size"; 144 return ERR_INVALID_CHUNKED_ENCODING; 145 } 146 line_buf_.clear(); 147 } else { 148 // Save the partial line; wait for more data. 149 bytes_consumed = buf_len; 150 151 // Ignore a trailing CR 152 if (buf[buf_len - 1] == '\r') 153 buf_len--; 154 155 line_buf_.append(buf, buf_len); 156 } 157 return bytes_consumed; 158} 159 160 161// While the HTTP 1.1 specification defines chunk-size as 1*HEX 162// some sites rely on more lenient parsing. 163// http://www.yahoo.com/, for example, pads chunk-size with trailing spaces 164// (0x20) to be 7 characters long, such as "819b ". 165// 166// A comparison of browsers running on WindowsXP shows that 167// they will parse the following inputs (egrep syntax): 168// 169// Let \X be the character class for a hex digit: [0-9a-fA-F] 170// 171// RFC 2616: ^\X+$ 172// IE7: ^\X+[^\X]*$ 173// Safari 3.1: ^[\t\r ]*\X+[\t ]*$ 174// Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ 175// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ 176// 177// Our strategy is to be as strict as possible, while not breaking 178// known sites. 179// 180// Us: ^\X+[ ]*$ 181bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { 182 DCHECK(len >= 0); 183 184 // Strip trailing spaces 185 while (len && start[len - 1] == ' ') 186 len--; 187 188 // Be more restrictive than HexStringToInt; 189 // don't allow inputs with leading "-", "+", "0x", "0X" 190 if (base::StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF") 191 != base::StringPiece::npos) 192 return false; 193 194 int parsed_number; 195 bool ok = base::HexStringToInt(start, start + len, &parsed_number); 196 if (ok && parsed_number >= 0) { 197 *out = parsed_number; 198 return true; 199 } 200 return false; 201} 202 203} // namespace net 204