1/* ***** BEGIN LICENSE BLOCK ***** 2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 3 * 4 * The contents of this file are subject to the Mozilla Public License Version 5 * 1.1 (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * http://www.mozilla.org/MPL/ 8 * 9 * Software distributed under the License is distributed on an "AS IS" basis, 10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 11 * for the specific language governing rights and limitations under the 12 * License. 13 * 14 * The Original Code is Mozilla. 15 * 16 * The Initial Developer of the Original Code is 17 * Netscape Communications. 18 * Portions created by the Initial Developer are Copyright (C) 2001 19 * the Initial Developer. All Rights Reserved. 20 * 21 * Contributor(s): 22 * Darin Fisher <darin@netscape.com> (original author) 23 * 24 * Alternatively, the contents of this file may be used under the terms of 25 * either the GNU General Public License Version 2 or later (the "GPL"), or 26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 27 * in which case the provisions of the GPL or the LGPL are applicable instead 28 * of those above. If you wish to allow use of your version of this file only 29 * under the terms of either the GPL or the LGPL, and not to allow others to 30 * use your version of this file under the terms of the MPL, indicate your 31 * decision by deleting the provisions above and replace them with the notice 32 * and other provisions required by the GPL or the LGPL. If you do not delete 33 * the provisions above, a recipient may use your version of this file under 34 * the terms of any one of the MPL, the GPL or the LGPL. 35 * 36 * ***** END LICENSE BLOCK ***** */ 37 38// Derived from: 39// mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp 40 41#include "net/http/http_chunked_decoder.h" 42 43#include "base/logging.h" 44#include "base/string_piece.h" 45#include "base/string_util.h" 46#include "net/base/net_errors.h" 47 48namespace net { 49 50HttpChunkedDecoder::HttpChunkedDecoder() 51 : chunk_remaining_(0), 52 chunk_terminator_remaining_(false), 53 reached_last_chunk_(false), 54 reached_eof_(false), 55 bytes_after_eof_(0) { 56} 57 58int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { 59 int result = 0; 60 61 while (buf_len) { 62 if (chunk_remaining_) { 63 int num = std::min(chunk_remaining_, buf_len); 64 65 buf_len -= num; 66 chunk_remaining_ -= num; 67 68 result += num; 69 buf += num; 70 71 // After each chunk's data there should be a CRLF 72 if (!chunk_remaining_) 73 chunk_terminator_remaining_ = true; 74 continue; 75 } else if (reached_eof_) { 76 bytes_after_eof_ += buf_len; 77 break; // Done! 78 } 79 80 int bytes_consumed = ScanForChunkRemaining(buf, buf_len); 81 if (bytes_consumed < 0) 82 return bytes_consumed; // Error 83 84 buf_len -= bytes_consumed; 85 if (buf_len) 86 memmove(buf, buf + bytes_consumed, buf_len); 87 } 88 89 return result; 90} 91 92int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { 93 DCHECK(chunk_remaining_ == 0); 94 DCHECK(buf_len > 0); 95 96 int bytes_consumed = 0; 97 98 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); 99 if (index_of_lf != base::StringPiece::npos) { 100 buf_len = static_cast<int>(index_of_lf); 101 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. 102 buf_len--; 103 bytes_consumed = static_cast<int>(index_of_lf) + 1; 104 105 // Make buf point to the full line buffer to parse. 106 if (!line_buf_.empty()) { 107 line_buf_.append(buf, buf_len); 108 buf = line_buf_.data(); 109 buf_len = static_cast<int>(line_buf_.size()); 110 } 111 112 if (reached_last_chunk_) { 113 if (buf_len) { 114 DLOG(INFO) << "ignoring http trailer"; 115 } else { 116 reached_eof_ = true; 117 } 118 } else if (chunk_terminator_remaining_) { 119 if (buf_len) { 120 DLOG(ERROR) << "chunk data not terminated properly"; 121 return ERR_INVALID_CHUNKED_ENCODING; 122 } 123 chunk_terminator_remaining_ = false; 124 } else if (buf_len) { 125 // Ignore any chunk-extensions. 126 size_t index_of_semicolon = base::StringPiece(buf, buf_len).find(';'); 127 if (index_of_semicolon != base::StringPiece::npos) 128 buf_len = static_cast<int>(index_of_semicolon); 129 130 if (!ParseChunkSize(buf, buf_len, &chunk_remaining_)) { 131 DLOG(ERROR) << "Failed parsing HEX from: " << 132 std::string(buf, buf_len); 133 return ERR_INVALID_CHUNKED_ENCODING; 134 } 135 136 if (chunk_remaining_ == 0) 137 reached_last_chunk_ = true; 138 } else { 139 DLOG(ERROR) << "missing chunk-size"; 140 return ERR_INVALID_CHUNKED_ENCODING; 141 } 142 line_buf_.clear(); 143 } else { 144 // Save the partial line; wait for more data. 145 bytes_consumed = buf_len; 146 147 // Ignore a trailing CR 148 if (buf[buf_len - 1] == '\r') 149 buf_len--; 150 151 line_buf_.append(buf, buf_len); 152 } 153 return bytes_consumed; 154} 155 156 157// While the HTTP 1.1 specification defines chunk-size as 1*HEX 158// some sites rely on more lenient parsing. 159// http://www.yahoo.com/, for example, pads chunk-size with trailing spaces 160// (0x20) to be 7 characters long, such as "819b ". 161// 162// A comparison of browsers running on WindowsXP shows that 163// they will parse the following inputs (egrep syntax): 164// 165// Let \X be the character class for a hex digit: [0-9a-fA-F] 166// 167// RFC 2616: ^\X+$ 168// IE7: ^\X+[^\X]*$ 169// Safari 3.1: ^[\t\r ]*\X+[\t ]*$ 170// Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ 171// Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ 172// 173// Our strategy is to be as strict as possible, while not breaking 174// known sites. 175// 176// Us: ^\X+[ ]*$ 177bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { 178 DCHECK(len >= 0); 179 180 // Strip trailing spaces 181 while (len && start[len - 1] == ' ') 182 len--; 183 184 // Be more restrictive than HexStringToInt; 185 // don't allow inputs with leading "-", "+", "0x", "0X" 186 if (base::StringPiece(start, len).find_first_not_of("0123456789abcdefABCDEF") 187 != base::StringPiece::npos) 188 return false; 189 190 int parsed_number; 191 bool ok = HexStringToInt(std::string(start, len), &parsed_number); 192 if (ok && parsed_number >= 0) { 193 *out = parsed_number; 194 return true; 195 } 196 return false; 197} 198 199} // namespace net 200