balsa_frame.cc revision f2477e01787aa58f445919b809d89e252beef54f
1d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis// Copyright 2013 The Chromium Authors. All rights reserved. 264924859b6b09d1cfb62fecf5954ec6c27cb58feTed Kremenek// Use of this source code is governed by a BSD-style license that can be 34af84313df0d2710fd57af89132e680294225cadTed Kremenek// found in the LICENSE file. 4d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek 5d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include "net/tools/balsa/balsa_frame.h" 6d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek 7d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <assert.h> 8d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#if __SSE2__ 9d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <emmintrin.h> 1077349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#endif // __SSE2__ 1177349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include <strings.h> 1277349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek 13d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <limits> 14d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <string> 15a7af5ea88a6c5bdf87497cca6c20831e8c546751Argyrios Kyrtzidis#include <utility> 16c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks#include <vector> 17c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks 1843dee220252ef0b42c5f8a3bb1eca97f84f2565fArgyrios Kyrtzidis#include "base/logging.h" 199b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "base/port.h" 209b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "base/strings/string_piece.h" 219b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "net/tools/balsa/balsa_enums.h" 22d1e5a89226da79f7e6f43d40facc46abda9e5245Jordy Rose#include "net/tools/balsa/balsa_headers.h" 23199c3d6cd16aebbb9c7f0d42af9d922c9628bf70Ken Dyck#include "net/tools/balsa/balsa_visitor_interface.h" 2416f0049415ec596504891259e2a83e19871c0d52Chris Lattner#include "net/tools/balsa/buffer_interface.h" 2516f0049415ec596504891259e2a83e19871c0d52Chris Lattner#include "net/tools/balsa/simple_buffer.h" 26337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek#include "net/tools/balsa/split.h" 27c506357c3778092c2a3251243f12524e8eb89274Zhongxing Xu#include "net/tools/balsa/string_piece_utils.h" 281b63e4f732dbc73d90abf886b4d21f8e3a165f6dChris Lattner 2916f0049415ec596504891259e2a83e19871c0d52Chris Lattnernamespace net { 300bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek 31a95d3750441ac8ad03e36af8e6e74039c9a3109dTed Kremenek// Constants holding some header names for headers which can affect the way the 326cb7c1a43b0c8f739d1f54b7fdae5ede86033496Benjamin Kramer// HTTP message is framed, and so must be processed specially: 33c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaksstatic const char kContentLength[] = "content-length"; 344323a57627e796dcfdfdb7d47672dc09ed308edaTed Kremenekstatic const size_t kContentLengthSize = sizeof(kContentLength) - 1; 350f5f0595d6a038843a7051c5a65fca7bce2915a0Ted Kremenekstatic const char kTransferEncoding[] = "transfer-encoding"; 360f5f0595d6a038843a7051c5a65fca7bce2915a0Ted Kremenekstatic const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1; 370f5f0595d6a038843a7051c5a65fca7bce2915a0Ted Kremenek 380f5f0595d6a038843a7051c5a65fca7bce2915a0Ted KremenekBalsaFrame::BalsaFrame() 39b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek : last_char_was_slash_r_(false), 409ef6537a894c33003359b1f9b9676e9178e028b7Ted Kremenek saw_non_newline_char_(false), 41b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek start_was_space_(true), 42ab2b8c54bca82866876f91e756788916d3fa20c3Ted Kremenek chunk_length_character_extracted_(false), 43c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks is_request_(true), 44c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks request_was_head_(false), 45749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks max_header_length_(16 * 1024), 46749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks max_request_uri_length_(2048), 47749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks visitor_(&do_nothing_visitor_), 48749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks chunk_length_remaining_(0), 49749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks content_length_remaining_(0), 50749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks last_slash_n_loc_(NULL), 515903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks last_recorded_slash_n_loc_(NULL), 525903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks last_slash_n_idx_(0), 535903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks term_chars_(0), 54e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE), 5532c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek last_error_(BalsaFrameEnums::NO_ERROR), 5632c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek headers_(NULL) { 5732c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek} 589c378f705405d37f49795d5e915989de774fe11fTed Kremenek 5932c4995826c76f282fc05fbbc3241d2dded4fb57Ted KremenekBalsaFrame::~BalsaFrame() {} 6032c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek 6132c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenekvoid BalsaFrame::Reset() { 6232c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek last_char_was_slash_r_ = false; 6332c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek saw_non_newline_char_ = false; 64bdb435ddaafd5069becd543d638112f68825b89dTed Kremenek start_was_space_ = true; 65bdb435ddaafd5069becd543d638112f68825b89dTed Kremenek chunk_length_character_extracted_ = false; 66bdb435ddaafd5069becd543d638112f68825b89dTed Kremenek // is_request_ = true; // not reset between messages. 673fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks // request_was_head_ = false; // not reset between messages. 686a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks // max_header_length_ = 4096; // not reset between messages. 693bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks // max_request_uri_length_ = 2048; // not reset between messages. 7025e695b2d574d919cc1bbddf3a2efe073d449b1cZhongxing Xu // visitor_ = &do_nothing_visitor_; // not reset between messages. 711d26f48dc2eea1c07431ca1519d7034a21b9bcffTed Kremenek chunk_length_remaining_ = 0; 72e62f048960645b79363408fdead53fec2a063c52Anna Zaks content_length_remaining_ = 0; 73d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis last_slash_n_loc_ = NULL; 74c77a55126fcad66fb086f8e100a494caa2496a2dZhongxing Xu last_recorded_slash_n_loc_ = NULL; 7532a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek last_slash_n_idx_ = 0; 7632a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek term_chars_ = 0; 7750a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE; 78c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek last_error_ = BalsaFrameEnums::NO_ERROR; 798ad8c546372fe602708cb7ceeaf0ebbb866735c6Anna Zaks lines_.clear(); 808ad8c546372fe602708cb7ceeaf0ebbb866735c6Anna Zaks if (headers_ != NULL) { 81a5a4166f8cb04490b7b27355874edfb98837cd45Zhongxing Xu headers_->Clear(); 82c77a55126fcad66fb086f8e100a494caa2496a2dZhongxing Xu } 83e62f048960645b79363408fdead53fec2a063c52Anna Zaks} 84d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek 85d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenekconst char* BalsaFrameEnums::ParseStateToString( 86d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek BalsaFrameEnums::ParseState error_code) { 87d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek switch (error_code) { 88d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek case PARSE_ERROR: 89c80135ba857da48173578b9c528fce6777e18168Ted Kremenek return "PARSE_ERROR"; 9050a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek case READING_HEADER_AND_FIRSTLINE: 91d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis return "READING_HEADER_AND_FIRSTLINE"; 92cf118d41f7930a18dce97416ef7834a62642f587Ted Kremenek case READING_CHUNK_LENGTH: 93e448ab4f9dd162802f5d7cfea60f7830cc61c654Ted Kremenek return "READING_CHUNK_LENGTH"; 9450a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek case READING_CHUNK_EXTENSION: 9550a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek return "READING_CHUNK_EXTENSION"; 96e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek case READING_CHUNK_DATA: 97e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek return "READING_CHUNK_DATA"; 98e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek case READING_CHUNK_TERM: 99e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek return "READING_CHUNK_TERM"; 1008bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek case READING_LAST_CHUNK_TERM: 1018bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek return "READING_LAST_CHUNK_TERM"; 102a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case READING_TRAILER: 1031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump return "READING_TRAILER"; 104cfcd7fd0de701c5ce05e96de1ed2d0bf8c7035d9Ted Kremenek case READING_UNTIL_CLOSE: 10552e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek return "READING_UNTIL_CLOSE"; 10652e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek case READING_CONTENT: 1075974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "READING_CONTENT"; 108a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case MESSAGE_FULLY_READ: 1095974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "MESSAGE_FULLY_READ"; 1105974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case NUM_STATES: 1115974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "UNKNOWN_STATE"; 1125974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek } 1135974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "UNKNOWN_STATE"; 1145974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek} 1155974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek 1165974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenekconst char* BalsaFrameEnums::ErrorCodeToString( 1175974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek BalsaFrameEnums::ErrorCode error_code) { 1185974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek switch (error_code) { 1195974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case NO_ERROR: 120b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek return "NO_ERROR"; 1215974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case NO_STATUS_LINE_IN_RESPONSE: 1225974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "NO_STATUS_LINE_IN_RESPONSE"; 1235974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case NO_REQUEST_LINE_IN_REQUEST: 124b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek return "NO_REQUEST_LINE_IN_REQUEST"; 1251397663af9dbcc24dbf0e11de43931b3dc08fdbbTed Kremenek case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION: 1269c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION"; 127c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD: 1285974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD"; 1295974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE: 1305974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE"; 1315974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI: 132b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI"; 1335974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE: 1345974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE"; 135b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION: 1368bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION"; 1375974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek case FAILED_CONVERTING_STATUS_CODE_TO_INT: 13852e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek return "FAILED_CONVERTING_STATUS_CODE_TO_INT"; 139a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case REQUEST_URI_TOO_LONG: 140a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "REQUEST_URI_TOO_LONG"; 141a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case HEADERS_TOO_LONG: 142a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "HEADERS_TOO_LONG"; 143a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case UNPARSABLE_CONTENT_LENGTH: 144a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "UNPARSABLE_CONTENT_LENGTH"; 145a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case MAYBE_BODY_BUT_NO_CONTENT_LENGTH: 146a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH"; 147a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH: 148a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH"; 149a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case HEADER_MISSING_COLON: 150a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "HEADER_MISSING_COLON"; 151a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case INVALID_CHUNK_LENGTH: 152a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "INVALID_CHUNK_LENGTH"; 153a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case CHUNK_LENGTH_OVERFLOW: 154a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "CHUNK_LENGTH_OVERFLOW"; 155a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO: 1565974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO"; 157a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT: 158a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT"; 159a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case MULTIPLE_CONTENT_LENGTH_KEYS: 160a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "MULTIPLE_CONTENT_LENGTH_KEYS"; 161a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case MULTIPLE_TRANSFER_ENCODING_KEYS: 162a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "MULTIPLE_TRANSFER_ENCODING_KEYS"; 163a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case UNKNOWN_TRANSFER_ENCODING: 164a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "UNKNOWN_TRANSFER_ENCODING"; 165a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case INVALID_HEADER_FORMAT: 166a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "INVALID_HEADER_FORMAT"; 167a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case INTERNAL_LOGIC_ERROR: 168a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "INTERNAL_LOGIC_ERROR"; 169a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek case NUM_ERROR_CODES: 1705974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek return "UNKNOWN_ERROR"; 171cfcd7fd0de701c5ce05e96de1ed2d0bf8c7035d9Ted Kremenek } 172a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek return "UNKNOWN_ERROR"; 173a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek} 17452e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek 175e070a1df66aab6d4168fb28f7559fdf996df3567Ted Kremenek// Summary: 176e070a1df66aab6d4168fb28f7559fdf996df3567Ted Kremenek// Parses the first line of either a request or response. 177e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek// Note that in the case of a detected warning, error_code will be set 178e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek// but the function will not return false. 179e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek// Exactly zero or one warning or error (but not both) may be detected 180e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek// by this function. 1819c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek// Note that this function will not write the data of the first-line 18232a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek// into the header's buffer (that should already have been done elsewhere). 1838bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// 184fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose// Pre-conditions: 185fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose// begin != end 18632a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek// *begin should be a character which is > ' '. This implies that there 18732a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek// is at least one non-whitespace characters between [begin, end). 1888bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// headers is a valid pointer to a BalsaHeaders class. 189183ff98f425d470c2a0276880aaf43496c9dad14Argyrios Kyrtzidis// error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value. 190c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose// Entire first line must exist between [begin, end) 191c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose// Exactly zero or one newlines -may- exist between [begin, end) 1928bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// [begin, end) should exist in the header's buffer. 1938bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// 19435bdbf40624beba3fc00cb72ab444659939c1a6bTed Kremenek// Side-effects: 195537716ad8dd10f984b6cfe6985afade1185c5e3cJordy Rose// headers will be modified 19666c40400e7d6272b0cd675ada18dd62c1f0362c7Anna Zaks// error_code may be modified if either a warning or error is detected 19766c40400e7d6272b0cd675ada18dd62c1f0362c7Anna Zaks// 19835bdbf40624beba3fc00cb72ab444659939c1a6bTed Kremenek// Returns: 19966c40400e7d6272b0cd675ada18dd62c1f0362c7Anna Zaks// True if no error (as opposed to warning) is detected. 200c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose// False if an error (as opposed to warning) is detected. 201c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose 2028bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// 203dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose// If there is indeed non-whitespace in the line, then the following 204dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose// will take care of this for you: 205dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose// while (*begin <= ' ') ++begin; 206dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose// ProcessFirstLine(begin, end, is_request, &headers, &error_code); 207e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenek// 20830726c6baee1417307236e854f1474fdb3cedb98Argyrios Kyrtzidisbool ParseHTTPFirstLine(const char* begin, 209ccc263b44c62ce3a02f797a3ddb3d6017cf0e5e4Ted Kremenek const char* end, 210ccc263b44c62ce3a02f797a3ddb3d6017cf0e5e4Ted Kremenek bool is_request, 211ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks size_t max_request_uri_length, 212ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks BalsaHeaders* headers, 213ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks BalsaFrameEnums::ErrorCode* error_code) { 214ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks const char* current = begin; 215ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // HTTP firstlines all have the following structure: 2169c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu // LWS NONWS LWS NONWS LWS NONWS NOTCRLF CRLF 2173c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n" 2183c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // ws1 nws1 ws2 nws2 ws3 nws3 ws4 2193c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // | [-------) [-------) [----------------) 220ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // REQ: method request_uri version 2213c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // RESP: version statuscode reason 2223c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // 223ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // The first NONWS->LWS component we'll call firstline_a. 2243c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // The second firstline_b, and the third firstline_c. 2253c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // 2263c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // firstline_a goes from nws1 to (but not including) ws2 2273c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // firstline_b goes from nws2 to (but not including) ws3 2283c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // firstline_c goes from nws3 to (but not including) ws4 229ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // 2303c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek // In the code: 2319c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu // ws1 == whitespace_1_idx_ 2320b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // nws1 == non_whitespace_1_idx_ 2339c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu // ws2 == whitespace_2_idx_ 2349c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu // nws2 == non_whitespace_2_idx_ 235ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek // ws3 == whitespace_3_idx_ 236ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek // nws3 == non_whitespace_3_idx_ 237ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek // ws4 == whitespace_4_idx_ 238ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek 239ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek // Kill all whitespace (including '\r\n') at the end of the line. 240ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek --end; 241ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek if (*end != '\n') { 242ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; 243ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" 244ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek << headers->OriginalHeadersForDebugging(); 2459e9a3e612d57b583800d5f0e48bb28d4afbd8b84Ted Kremenek return false; 246ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek } 247ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek while (begin < end && *end <= ' ') { 248ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek --end; 249ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek } 250ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek DCHECK(*end != '\n'); 251ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks if (*end == '\n') { 252ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; 253ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" 254ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks << headers->OriginalHeadersForDebugging(); 255ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks return false; 256ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek } 257ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek ++end; 258ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek 259ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek // The two following statements should not be possible. 260ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek if (end == begin) { 261ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR; 2620b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n" 2630b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks << headers->OriginalHeadersForDebugging(); 2640b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks return false; 2650b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks } 2660b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks 2670b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // whitespace_1_idx_ 2680b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->whitespace_1_idx_ = current - begin; 2690b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // This loop is commented out as it is never used in current code. This is 2700b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // true only because we don't begin parsing the headers at all until we've 2710b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // encountered a non whitespace character at the beginning of the stream, at 2720b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // which point we begin our demarcation of header-start. If we did -not- do 2730b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop 2740b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // would be necessary for the proper functioning of this parsing. 2750b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // This is left here as this function may (in the future) be refactored out 2760b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // of the BalsaFrame class so that it may be shared between code in 2770b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // BalsaFrame and BalsaHeaders (where it would be used in some variant of the 2780b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // set_first_line() function (at which point it would be necessary). 2790b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks#if 0 2800b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks while (*current <= ' ') { 281241677a13cc46647a8f5098b3e3239bd9480dca2Ted Kremenek ++current; 28277d7ef8d8a80ccb2ab3d25c80810571e3ab14ee4Ted Kremenek } 283f185cc1ac77a84139c603eee3473b88dcb839c68Anna Zaks#endif 284f185cc1ac77a84139c603eee3473b88dcb839c68Anna Zaks // non_whitespace_1_idx_ 2856bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks headers->non_whitespace_1_idx_ = current - begin; 2866bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks do { 2876bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks // The first time through, we're guaranteed that the current character 2886bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks // won't be a whitespace (else the loop above wouldn't have terminated). 2890b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // That implies that we're guaranteed to get at least one non-whitespace 2900b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // character if we get into this loop at all. 2911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump ++current; 2926bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks if (current == end) { 2936bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks headers->whitespace_2_idx_ = current - begin; 2946bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks headers->non_whitespace_2_idx_ = current - begin; 295fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose headers->whitespace_3_idx_ = current - begin; 2960b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->non_whitespace_3_idx_ = current - begin; 2970b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->whitespace_4_idx_ = current - begin; 298183ff98f425d470c2a0276880aaf43496c9dad14Argyrios Kyrtzidis // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD for request 299fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response 300fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose *error_code = 301fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose static_cast<BalsaFrameEnums::ErrorCode>( 3020b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION + 303fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose is_request); 304fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose if (!is_request) { // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION 3058bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek return false; 3066bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks } 3076bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks goto output_exhausted; 3086bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks } 3096bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks } while (*current > ' '); 3106bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks // whitespace_2_idx_ 3110b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->whitespace_2_idx_ = current - begin; 3126bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks do { 3136bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks ++current; 3140b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // Note that due to the loop which consumes all of the whitespace 3156bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks // at the end of the line, current can never == end while in this function. 3166bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks } while (*current <= ' '); 3176bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks // non_whitespace_2_idx_ 3186bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks headers->non_whitespace_2_idx_ = current - begin; 3196bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks do { 3208bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek ++current; 3216bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks if (current == end) { 3220b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->whitespace_3_idx_ = current - begin; 3230b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->non_whitespace_3_idx_ = current - begin; 3246bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks headers->whitespace_4_idx_ = current - begin; 32577d7ef8d8a80ccb2ab3d25c80810571e3ab14ee4Ted Kremenek // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request 3260b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response 3270b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks *error_code = 3280b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks static_cast<BalsaFrameEnums::ErrorCode>( 3290b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE 3300b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks + is_request); 3310b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks goto output_exhausted; 3320b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks } 3330b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks } while (*current > ' '); 3340b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // whitespace_3_idx_ 3350b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->whitespace_3_idx_ = current - begin; 3360b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks do { 3371eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump ++current; 3380b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // Note that due to the loop which consumes all of the whitespace 3390b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // at the end of the line, current can never == end while in this function. 3400b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks } while (*current <= ' '); 3410b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // non_whitespace_3_idx_ 3420b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->non_whitespace_3_idx_ = current - begin; 3430b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks headers->whitespace_4_idx_ = end - begin; 3440b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks 3450b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks output_exhausted: 3460b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // Note that we don't fail the parse immediately when parsing of the 3470b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // firstline fails. Depending on the protocol type, we may want to accept 348dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks // a firstline with only one or two elements, e.g., for HTTP/0.9: 3490b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // GET\r\n 3500b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks // or 351dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks // GET /\r\n 3521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // should be parsed without issue (though the visitor should know that 353dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks // parsing the entire line was not exactly as it should be). 354dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks // 355ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Eventually, these errors may be removed alltogether, as the visitor can 356ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // detect them on its own by examining the size of the various fields. 357dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks // headers->set_first_line(non_whitespace_1_idx_, current); 358dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks 3591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump if (is_request) { 360e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) > 361846d4e923bf11bcdc2816758aafa331795f29230Ted Kremenek max_request_uri_length) { 362846d4e923bf11bcdc2816758aafa331795f29230Ted Kremenek // For requests, we need at least the method. We could assume that a 3630a3ed3143b00f237decb1288c1ff574ae09eba4eTed Kremenek // blank URI means "/". If version isn't stated, it should be assumed 364e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // to be HTTP/0.9 by the visitor. 365e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG; 366d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis return false; 367056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks } 368ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } else { 369ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks headers->parsed_response_code_ = 0; 3700a3ed3143b00f237decb1288c1ff574ae09eba4eTed Kremenek { 371cbb67480094b3bcb5b715acd827cbad55e2a204cSean Hunt const char* parsed_response_code_current = 372056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks begin + headers->non_whitespace_2_idx_; 373056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks const char* parsed_response_code_end = begin + headers->whitespace_3_idx_; 374056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; 375056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks 376d074441e027471a914cbb909a7aad1d43224950fZhongxing Xu // Convert a string of [0-9]* into an int. 377d074441e027471a914cbb909a7aad1d43224950fZhongxing Xu // Note that this allows for the conversion of response codes which 378056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks // are outside the bounds of normal HTTP response codes (no checking 3799dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu // is done to ensure that these are valid-- they're merely parsed)! 38000eb3f9c5b33e3d99aee1f8b75dd9c9678fdd66bFrancois Pichet while (parsed_response_code_current < parsed_response_code_end) { 3819dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu if (*parsed_response_code_current < '0' || 3829dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu *parsed_response_code_current > '9') { 383e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; 384e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen return false; 3859dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu } 386e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen size_t status_code_x_10 = headers->parsed_response_code_ * 10; 3879dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu uint8 c = *parsed_response_code_current - '0'; 388e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen if ((headers->parsed_response_code_ > kMaxDiv10) || 389e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen (std::numeric_limits<size_t>::max() - status_code_x_10) < c) { 390e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen // overflow. 3919dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT; 392e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen return false; 393e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen } 394e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen headers->parsed_response_code_ = status_code_x_10 + c; 395e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen ++parsed_response_code_current; 396e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen } 397056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks } 398056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks } 399056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks return true; 400056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks} 401056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks 402056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// begin - beginning of the firstline 403056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// end - end of the firstline 404056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// 405056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// A precondition for this function is that there is non-whitespace between 406056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// [begin, end). If this precondition is not met, the function will not perform 407056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// as expected (and bad things may happen, and it will eat your first, second, 408d074441e027471a914cbb909a7aad1d43224950fZhongxing Xu// and third unborn children!). 409056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// 410056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// Another precondition for this function is that [begin, end) includes 411dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks// at most one newline, which must be at the end of the line. 412dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaksvoid BalsaFrame::ProcessFirstLine(const char* begin, const char* end) { 413dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks BalsaFrameEnums::ErrorCode previous_error = last_error_; 4149c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu if (!ParseHTTPFirstLine(begin, 4159c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu end, 416d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis is_request_, 417ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks max_request_uri_length_, 418ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks headers_, 4193c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek &last_error_)) { 4204ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu parse_state_ = BalsaFrameEnums::PARSE_ERROR; 421056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks visitor_->HandleHeaderError(this); 4224ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu return; 4234ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu } 424056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks if (previous_error != last_error_) { 4254ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu visitor_->HandleHeaderWarning(this); 4264ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu } 427056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks 4284ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu if (is_request_) { 4294ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu int version_length = 430056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_; 4314ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu visitor_->ProcessRequestFirstLine( 4324ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu begin + headers_->non_whitespace_1_idx_, 4334ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, 4344ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu begin + headers_->non_whitespace_1_idx_, 435ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, 436dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks begin + headers_->non_whitespace_2_idx_, 437dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, 4384ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu begin + headers_->non_whitespace_3_idx_, 4394ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu version_length); 440056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks if (version_length == 0) 441056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 442056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks } else { 4438bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek visitor_->ProcessResponseFirstLine( 444056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks begin + headers_->non_whitespace_1_idx_, 4452210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_, 4462210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu begin + headers_->non_whitespace_1_idx_, 4472210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_, 4482210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu begin + headers_->non_whitespace_2_idx_, 4492210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_, 4502210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu begin + headers_->non_whitespace_3_idx_, 4512210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_); 4522210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu } 4532210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu} 454b13453bd8a91f331d0910ca95ad52aa41b52f648Zhongxing Xu 455056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// 'stream_begin' points to the first character of the headers buffer. 456b13453bd8a91f331d0910ca95ad52aa41b52f648Zhongxing Xu// 'line_begin' points to the first character of the line. 4572210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu// 'current' points to a char which is ':'. 458056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// 'line_end' points to the position of '\n' + 1. 4594ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu// 'line_begin' points to the position of first character of line. 4604ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xuvoid BalsaFrame::CleanUpKeyValueWhitespace( 461d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis const char* stream_begin, 462056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks const char* line_begin, 4634ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu const char* current, 464d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis const char* line_end, 465056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks HeaderLineDescription* current_header_line) { 4664ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu const char* colon_loc = current; 467d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis DCHECK_LT(colon_loc, line_end); 468056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks DCHECK_EQ(':', *colon_loc); 469056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks DCHECK_EQ(':', *current); 4709c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu DCHECK_GE(' ', *line_end) 4719c378f705405d37f49795d5e915989de774fe11fTed Kremenek << "\"" << std::string(line_begin, line_end) << "\""; 4726889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks 4730bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek // TODO(fenix): Investigate whether or not the bounds tests in the 4740bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek // while loops here are redundant, and if so, remove them. 4750bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek --current; 4766889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks while (current > line_begin && *current <= ' ') --current; 4776889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks current += (current != colon_loc); 4780bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek current_header_line->key_end_idx = current - stream_begin; 479f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall 480f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall current = colon_loc; 481892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek DCHECK_EQ(':', *current); 482f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall ++current; 483e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek while (current < line_end && *current <= ' ') ++current; 484e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek current_header_line->value_begin_idx = current - stream_begin; 485e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek 4861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump DCHECK_GE(current_header_line->key_end_idx, 487ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks current_header_line->first_char_idx); 488e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek DCHECK_GE(current_header_line->value_begin_idx, 4891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump current_header_line->key_end_idx); 490e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek DCHECK_GE(current_header_line->last_char_idx, 491f85e193739c953358c865005855253af4f68a497John McCall current_header_line->value_begin_idx); 492f85e193739c953358c865005855253af4f68a497John McCall} 4931b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek 4941b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenekinline void BalsaFrame::FindColonsAndParseIntoKeyValue() { 4951b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek DCHECK(!lines_.empty()); 4961b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 4979be88403e965cc49af76c9d33d818781d44b333eFrancois Pichet // The last line is always just a newline (and is uninteresting). 498c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek const Lines::size_type lines_size_m1 = lines_.size() - 1; 499ed8abf18329df67b0abcbb3a10458bd8c1d2a595Douglas Gregor#if __SSE2__ 5001b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':', 5011b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek ':', ':', ':', ':', ':', ':', ':', ':'}; 5026ad6f2848d7652ab2991286eb48be440d3493b28Francois Pichet const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16; 5034ca8ac2e61c37ddadf37024af86f3e1019af8532Douglas Gregor#endif // __SSE2__ 50421ff2e516b0e0bc8c1dbf965cb3d44bac3c64330John Wiegley const char* current = stream_begin + lines_[1].first; 505552622067dc45013d240f73952fece703f5e63bdJohn Wiegley // This code is a bit more subtle than it may appear at first glance. 5061b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // This code looks for a colon in the current line... but it also looks 507c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek // beyond the current line. If there is no colon in the current line, then 5086b219d082434394c1ac401390ec1d1967727815aSebastian Redl // for each subsequent line (until the colon which -has- been found is 509be230c36e32142cbdcdbe9c97511d097beeecbabDouglas Gregor // associated with a line), no searching for a colon will be performed. In 510c7793c73ba8a343de3f2552d984851985a46f159Douglas Gregor // this way, we minimize the amount of bytes we have scanned for a colon. 51128bbe4b8acc338476fe0825769b41fb32b423c72John Wiegley for (Lines::size_type i = 1; i < lines_size_m1;) { 51228bbe4b8acc338476fe0825769b41fb32b423c72John Wiegley const char* line_begin = stream_begin + lines_[i].first; 51301d08018b7cf5ce1601707cfd7a84d22015fc04eDouglas Gregor 514ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Here we handle possible continuations. Note that we do not replace 515337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek // the '\n' in the line before a continuation (at least, as of now), 516337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek // which implies that any code which looks for a value must deal with 517ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // "\r\n", etc -within- the line (and not just at the end of it). 518c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek for (++i; i < lines_size_m1; ++i) { 519c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek const char c = *(stream_begin + lines_[i].first); 5205fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek if (c > ' ') { 5215fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek // Not a continuation, so stop. Note that if the 'original' i = 1, 5225fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek // and the next line is not a continuation, we'll end up with i = 2 52391a5755ad73c5dc1dfb167e448fdd74e75a6df56John McCall // when we break. This handles the incrementing of i for the outer 524ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // loop. 5255fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek break; 5265fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek } 527f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall } 528f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall const char* line_end = stream_begin + lines_[i - 1].second; 529f111d935722ed488144600cea5ed03a6b5069e8fPeter Collingbourne DCHECK_LT(line_begin - stream_begin, line_end - stream_begin); 530f111d935722ed488144600cea5ed03a6b5069e8fPeter Collingbourne 5311b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // We cleanup the whitespace at the end of the line before doing anything 5321b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // else of interest as it allows us to do nothing when irregularly formatted 5331b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // headers are parsed (e.g. those with only keys, only values, or no colon). 5341b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // 5351b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // We're guaranteed to have *line_end > ' ' while line_end >= line_begin. 5361b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek --line_end; 53746eaf7789a1059a7b42b7dbd183150c72df5738fTed Kremenek DCHECK_EQ('\n', *line_end) 5381b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek << "\"" << std::string(line_begin, line_end) << "\""; 5391b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek while (*line_end <= ' ' && line_end > line_begin) { 540d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek --line_end; 5411b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek } 542d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek ++line_end; 5431b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek DCHECK_GE(' ', *line_end); 5441b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek DCHECK_LT(line_begin, line_end); 545534986f2b21e6050bf00163cd6423fd92155a6edRichard Smith 5461b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // We use '0' for the block idx, because we're always writing to the first 5471b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // block from the framer (we do this because the framer requires that the 548d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek // entire header sequence be in a contiguous buffer). 549d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek headers_->header_lines_.push_back( 550ba0513de93d2fab6db5ab30b6927209fcc883078Douglas Gregor HeaderLineDescription(line_begin - stream_begin, 5511b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek line_end - stream_begin, 5521b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek line_end - stream_begin, 5538ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek line_end - stream_begin, 5548f08426e6f54ed20b959018f24dbea106a00b4adJordy Rose 0)); 5558bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek if (current >= line_end) { 5565eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; 5575eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek visitor_->HandleHeaderWarning(this); 558ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Then the next colon will not be found within this header line-- time 5598ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek // to try again with another header-line. 5608ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek continue; 5618ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek } else if (current < line_begin) { 5624beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek // When this condition is true, the last detected colon was part of a 563ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // previous line. We reset to the beginning of the line as we don't care 5644beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek // about the presence of any colon before the beginning of the current 565ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // line. 5664beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek current = line_begin; 5674beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek } 5681a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if __SSE2__ 5691a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek while (current < header_lines_end_m16) { 5701a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek __m128i header_bytes = 5711a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek _mm_loadu_si128(reinterpret_cast<const __m128i *>(current)); 57214429b918bd2f4cb52abc75546a7fe37142054caArgyrios Kyrtzidis __m128i colon_cmp = 573f8b5aae41e46f94fe90ed5f1ee98f36f0aa59dc9Ted Kremenek _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons)); 57414429b918bd2f4cb52abc75546a7fe37142054caArgyrios Kyrtzidis int colon_msk = _mm_movemask_epi8(colon_cmp); 57514429b918bd2f4cb52abc75546a7fe37142054caArgyrios Kyrtzidis if (colon_msk == 0) { 576f85e193739c953358c865005855253af4f68a497John McCall current += 16; 5778bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek continue; 578f85e193739c953358c865005855253af4f68a497John McCall } 579f85e193739c953358c865005855253af4f68a497John McCall current += (ffs(colon_msk) - 1); 5805eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek if (current > line_end) { 5815eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek break; 582f85e193739c953358c865005855253af4f68a497John McCall } 583f85e193739c953358c865005855253af4f68a497John McCall goto found_colon; 584f85e193739c953358c865005855253af4f68a497John McCall } 585ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks#endif // __SSE2__ 586e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen for (; current < line_end; ++current) { 587f85e193739c953358c865005855253af4f68a497John McCall if (*current != ':') { 588f85e193739c953358c865005855253af4f68a497John McCall continue; 5891b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek } 5901b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek goto found_colon; 5911b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek } 5921b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // If we've gotten to here, then there was no colon 5931b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // in the line. The arguments we passed into the construction 5941b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // for the HeaderLineDescription object should be OK-- it assumes 5951b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // that the entire content is 'key' by default (which is true, as 596f85e193739c953358c865005855253af4f68a497John McCall // there was no colon, there can be no value). Note that this is a 5971b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek // construct which is technically not allowed by the spec. 5981b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON; 5991b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek visitor_->HandleHeaderWarning(this); 6001b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek continue; 601eb382ec1507cf2c8c12d7443d0b67c076223aec6Patrick Beard found_colon: 6021b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek DCHECK_EQ(*current, ':'); 6031b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek DCHECK_LE(current - stream_begin, line_end - stream_begin); 6041b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek DCHECK_LE(stream_begin - stream_begin, current - stream_begin); 6051b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek 606e08ce650a2b02410eddd1f60a4aa6b3d4be71e73Peter Collingbourne HeaderLineDescription& current_header_line = headers_->header_lines_.back(); 60756ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall current_header_line.key_end_idx = current - stream_begin; 60861eee0ca33b29e102f11bab77c8b74cc00e2392bTanya Lattner current_header_line.value_begin_idx = current_header_line.key_end_idx; 609276b061970939293f1abaf694bd3ef05b2cbda79Eli Friedman if (current < line_end) { 610337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek ++current_header_line.key_end_idx; 6111b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek 612337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek CleanUpKeyValueWhitespace(stream_begin, 613337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek line_begin, 614337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek current, 615337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek line_end, 616337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek ¤t_header_line); 6171b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek } 6181b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek } 619bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xu} 620bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xu 621bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xuvoid BalsaFrame::ProcessContentLengthLine( 622477323d58a0de352c6a61e08b5a83127c4adc904Zhongxing Xu HeaderLines::size_type line_idx, 6231a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek BalsaHeadersEnums::ContentLengthStatus* status, 624bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xu size_t* length) { 625f901a7de97f46ba2b1ff153f9fb83d00dc37cfcfDouglas Gregor const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; 626e739a29c62c67eaec0af5c4d5c75f9e8f11228bdTed Kremenek const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 627e739a29c62c67eaec0af5c4d5c75f9e8f11228bdTed Kremenek const char* line_end = stream_begin + header_line.last_char_idx; 628cc2c4b293d8590346f26b7ecc16d299226b8794fTed Kremenek const char* value_begin = (stream_begin + header_line.value_begin_idx); 629bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek 630bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek if (value_begin >= line_end) { 631bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek // There is no non-whitespace value data. 632bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek#if DEBUGFRAMER 633bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek LOG(INFO) << "invalid content-length -- no non-whitespace value data"; 634bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek#endif 635e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; 636bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek return; 6371eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 6381a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek 6391a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek *length = 0; 6401a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek while (value_begin < line_end) { 6411a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek if (*value_begin < '0' || *value_begin > '9') { 6421a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek // bad! content-length found, and couldn't parse all of it! 6431a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH; 6441a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if DEBUGFRAMER 6451a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek LOG(INFO) << "invalid content-length - non numeric character detected"; 6461a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#endif // DEBUGFRAMER 6471a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek return; 6481a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek } 6491a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10; 6501a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek size_t length_x_10 = *length * 10; 6511a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek const unsigned char c = *value_begin - '0'; 6521a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek if (*length > kMaxDiv10 || 6531a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek (std::numeric_limits<size_t>::max() - length_x_10) < c) { 6541a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW; 6551a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if DEBUGFRAMER 6561a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek LOG(INFO) << "content-length overflow"; 6571a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#endif // DEBUGFRAMER 6581a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek return; 6591a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek } 6601a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek *length = length_x_10 + c; 6611a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek ++value_begin; 6621a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek } 6631a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if DEBUGFRAMER 6641a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek LOG(INFO) << "content_length parsed: " << *length; 6651a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#endif // DEBUGFRAMER 6661a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH; 6671a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek} 668540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek 669ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksvoid BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) { 670892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek const HeaderLineDescription& header_line = headers_->header_lines_[line_idx]; 671ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 672540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek const char* line_end = stream_begin + header_line.last_char_idx; 6731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump const char* value_begin = stream_begin + header_line.value_begin_idx; 674e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek size_t value_length = line_end - value_begin; 675ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 676e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek if ((value_length == 7) && 677ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks !strncasecmp(value_begin, "chunked", 7)) { 678e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek headers_->transfer_encoding_is_chunked_ = true; 679b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek } else if ((value_length == 8) && 680c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek !strncasecmp(value_begin, "identity", 8)) { 681ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks headers_->transfer_encoding_is_chunked_ = false; 682c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek } else { 683ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING; 684c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 685c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek visitor_->HandleHeaderError(this); 686e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek return; 68703509aea098772644bf4662dc1c88634818ceeccZhongxing Xu } 688e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek} 689ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 690e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremeneknamespace { 691ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksbool SplitStringPiece(base::StringPiece original, char delim, 692e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek base::StringPiece* before, base::StringPiece* after) { 693e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek const char* p = original.data(); 6942de56d1d0c3a504ad1529de2677628bdfbb95cd4John McCall const char* end = p + original.size(); 6958bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek 696ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks while (p != end) { 6975eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek if (*p == delim) { 6985eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek ++p; 6995eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } else { 700e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek const char* start = p; 701e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek while (++p != end && *p != delim) { 70206fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek // Skip to the next occurence of the delimiter. 703ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } 704ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks *before = base::StringPiece(start, p - start); 705b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek if (p != end) 706bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu *after = base::StringPiece(p + 1, end - (p + 1)); 707031ccc0555a82afc2e8afe29e19dd57ff204e2deZhongxing Xu else 708892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek *after = base::StringPiece(""); 7099c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek StringPieceUtils::RemoveWhitespaceContext(before); 71048af2a9c1ed3259512f2d1431720add1fbe8fb5fTed Kremenek StringPieceUtils::RemoveWhitespaceContext(after); 71148af2a9c1ed3259512f2d1431720add1fbe8fb5fTed Kremenek return true; 712892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek } 71348af2a9c1ed3259512f2d1431720add1fbe8fb5fTed Kremenek } 714ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 715e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek *before = original; 716e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek *after = ""; 71706fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek return false; 718b277159055933e610bbc80262b600d3ad7e0595cTed Kremenek} 719b277159055933e610bbc80262b600d3ad7e0595cTed Kremenek 7209fcce65e7e1307b5b8da9be13e4092d6bb94dc1dRichard Smith// TODO(phython): Fix this function to properly deal with quoted values. 7219fcce65e7e1307b5b8da9be13e4092d6bb94dc1dRichard Smith// E.g. ";;foo", "\";;\"", or \"aa; 722ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks// The last example, the semi-colon is a separator between extensions. 723b277159055933e610bbc80262b600d3ad7e0595cTed Kremenekvoid ProcessChunkExtensionsManual(base::StringPiece all_extensions, 724ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks BalsaHeaders* extensions) { 72506fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek base::StringPiece extension; 726e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek base::StringPiece remaining; 727337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek StringPieceUtils::RemoveWhitespaceContext(&all_extensions); 728337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek SplitStringPiece(all_extensions, ';', &extension, &remaining); 729337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek while (!extension.empty()) { 730337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek base::StringPiece key; 731337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek base::StringPiece value; 732337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek SplitStringPiece(extension, '=', &key, &value); 733337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek if (!value.empty()) { 73406fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek // Strip quotation marks if they exist. 735744f1cd66bb6747ea71fbf1172698e7bf35ec88dTed Kremenek if (!value.empty() && value[0] == '"') 7367ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu value.remove_prefix(1); 7377ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu if (!value.empty() && value[value.length() - 1] == '"') 7387ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu value.remove_suffix(1); 7397ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu } 740ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 741892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek extensions->AppendHeader(key, value); 742ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 7437ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu StringPieceUtils::RemoveWhitespaceContext(&remaining); 7447ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu SplitStringPiece(remaining, ';', &extension, &remaining); 7457ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu } 746856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu} 747ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 74803509aea098772644bf4662dc1c88634818ceeccZhongxing Xu} // anonymous namespace 749856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu 750ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksvoid BalsaFrame::ProcessChunkExtensions(const char* input, size_t size, 751856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu BalsaHeaders* extensions) { 752856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions); 753856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu} 7546b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu 755ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksvoid BalsaFrame::ProcessHeaderLines() { 75603509aea098772644bf4662dc1c88634818ceeccZhongxing Xu HeaderLines::size_type content_length_idx = 0; 7576b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu HeaderLines::size_type transfer_encoding_idx = 0; 758ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 7596b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu DCHECK(!lines_.empty()); 7606b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu#if DEBUGFRAMER 761e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n"; 762e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek#endif // DEBUGFRAMER 7631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 764e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // There is no need to attempt to process headers if no header lines exist. 765ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // There are at least two lines in the message which are not header lines. 7669c378f705405d37f49795d5e915989de774fe11fTed Kremenek // These two non-header lines are the first line of the message, and the 767e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // last line of the message (which is an empty line). 768ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Thus, we test to see if we have more than two lines total before attempting 769e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // to parse any header lines. 770e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek if (lines_.size() > 2) { 7711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump const char* stream_begin = headers_->OriginalHeaderStreamBegin(); 772e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek 773ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Then, for the rest of the header data, we parse these into key-value 774892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // pairs. 775ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks FindColonsAndParseIntoKeyValue(); 776e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // At this point, we've parsed all of the headers. Time to look for those 777f22679e3e5d5f5754931952e58112b4c863a4137Zhongxing Xu // headers which we require for framing. 778f22679e3e5d5f5754931952e58112b4c863a4137Zhongxing Xu const HeaderLines::size_type 779ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks header_lines_size = headers_->header_lines_.size(); 780892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) { 781ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks const HeaderLineDescription& current_header_line = 782f22679e3e5d5f5754931952e58112b4c863a4137Zhongxing Xu headers_->header_lines_[i]; 7831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump const char* key_begin = 78456ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall (stream_begin + current_header_line.first_char_idx); 785e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek const char* key_end = (stream_begin + current_header_line.key_end_idx); 786ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks const size_t key_len = key_end - key_begin; 78756ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall const char c = *key_begin; 78856ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall#if DEBUGFRAMER 78956ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len) 790ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks << " c: '" << c << "' key_len: " << key_len; 791e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek#endif // DEBUGFRAMER 792e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // If a header begins with either lowercase or uppercase 'c' or 't', then 7931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // the header may be one of content-length, connection, content-encoding 794bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu // or transfer-encoding. These headers are special, as they change the way 795ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // that the message is framed, and so the framer is required to search 796bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu // for them. 797ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 798bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu 799bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu if (c == 'c' || c == 'C') { 800892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek if ((key_len == kContentLengthSize) && 801ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) { 802892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek BalsaHeadersEnums::ContentLengthStatus content_length_status = 803892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek BalsaHeadersEnums::NO_CONTENT_LENGTH; 804ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks size_t length = 0; 805e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek ProcessContentLengthLine(i, &content_length_status, &length); 806892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek if (content_length_idx != 0) { // then we've already seen one! 8071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump if ((headers_->content_length_status_ != content_length_status) || 808e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek ((headers_->content_length_status_ == 809ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks BalsaHeadersEnums::VALID_CONTENT_LENGTH) && 810e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek length != headers_->content_length_)) { 811ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS; 812e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 8131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump visitor_->HandleHeaderError(this); 8140835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis return; 8150d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu } 8160d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu continue; 8170d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu } else { 8180d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu content_length_idx = i + 1; 8190d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu headers_->content_length_status_ = content_length_status; 820f85e193739c953358c865005855253af4f68a497John McCall headers_->content_length_ = length; 821f85e193739c953358c865005855253af4f68a497John McCall content_length_remaining_ = length; 822ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } 8239c378f705405d37f49795d5e915989de774fe11fTed Kremenek 824f85e193739c953358c865005855253af4f68a497John McCall } 825f85e193739c953358c865005855253af4f68a497John McCall } else if (c == 't' || c == 'T') { 826f85e193739c953358c865005855253af4f68a497John McCall if ((key_len == kTransferEncodingSize) && 827f85e193739c953358c865005855253af4f68a497John McCall 0 == strncasecmp(key_begin, kTransferEncoding, 828f85e193739c953358c865005855253af4f68a497John McCall kTransferEncodingSize)) { 829f85e193739c953358c865005855253af4f68a497John McCall if (transfer_encoding_idx != 0) { 830f85e193739c953358c865005855253af4f68a497John McCall last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS; 831f85e193739c953358c865005855253af4f68a497John McCall parse_state_ = BalsaFrameEnums::PARSE_ERROR; 832f85e193739c953358c865005855253af4f68a497John McCall visitor_->HandleHeaderError(this); 833f85e193739c953358c865005855253af4f68a497John McCall return; 834f85e193739c953358c865005855253af4f68a497John McCall } 835f85e193739c953358c865005855253af4f68a497John McCall transfer_encoding_idx = i + 1; 836f85e193739c953358c865005855253af4f68a497John McCall } 837ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } else if (i == 0 && (key_len == 0 || c == ' ')) { 838e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT; 839e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 840b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek visitor_->HandleHeaderError(this); 84103e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor return; 842ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } 84303e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor } 84403e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor if (headers_->transfer_encoding_is_chunked_) { 845e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen headers_->content_length_ = 0; 846e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH; 84703e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor content_length_remaining_ = 0; 848e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen } 849ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks if (transfer_encoding_idx != 0) { 85003e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor ProcessTransferEncodingLine(transfer_encoding_idx - 1); 85103e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor } 85203e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor } 853c4f8706b6539e06a5de153bd72850bb2e0a71456Zhongxing Xu} 854ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 855c4f8706b6539e06a5de153bd72850bb2e0a71456Zhongxing Xuvoid BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() { 856ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // For responses, can't have a body if the request was a HEAD, or if it is 857c4f8706b6539e06a5de153bd72850bb2e0a71456Zhongxing Xu // one of these response-codes. rfc2616 section 4.3 8581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 85997ed4f68f5dba3e21e7a490ef0f9ffd3bfead7f8Ted Kremenek if (is_request_ || 860ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks !(request_was_head_ || 861892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek (headers_->parsed_response_code_ >= 100 && 862ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks headers_->parsed_response_code_ < 200) || 863469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek (headers_->parsed_response_code_ == 204) || 864ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks (headers_->parsed_response_code_ == 304))) { 86597ed4f68f5dba3e21e7a490ef0f9ffd3bfead7f8Ted Kremenek // Then we can have a body. 866ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks if (headers_->transfer_encoding_is_chunked_) { 867892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // Note that 868ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // if ( Transfer-Encoding: chunked && Content-length: ) 86997ed4f68f5dba3e21e7a490ef0f9ffd3bfead7f8Ted Kremenek // then Transfer-Encoding: chunked trumps. 870af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek // This is as specified in the spec. 871af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek // rfc2616 section 4.4.3 872ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; 873af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek } else { 874ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Errors parsing content-length definitely can cause 875af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek // protocol errors/warnings 8761eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump switch (headers_->content_length_status_) { 877b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // If we have a content-length, and it is parsed 878ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // properly, there are two options. 879b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // 1) zero content, in which case the message is done, and 880b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // 2) nonzero content, in which case we have to 881b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // consume the body. 882b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek case BalsaHeadersEnums::VALID_CONTENT_LENGTH: 883b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek if (headers_->content_length_ == 0) { 884b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 885b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek } else { 886b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek parse_state_ = BalsaFrameEnums::READING_CONTENT; 887b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek } 888b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek break; 889b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW: 890b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek case BalsaHeadersEnums::INVALID_CONTENT_LENGTH: 891b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // If there were characters left-over after parsing the 892b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // content length, we should flag an error and stop. 893b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 894b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH; 895b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek visitor_->HandleHeaderError(this); 896b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek break; 897b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // We can have: no transfer-encoding, no content length, and no 898b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // connection: close... 899b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // Unfortunately, this case doesn't seem to be covered in the spec. 900b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek // We'll assume that the safest thing to do here is what the google 901ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // binaries before 2008 already do, which is to assume that 902e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek // everything until the connection is closed is body. 903b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek case BalsaHeadersEnums::NO_CONTENT_LENGTH: 9041eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump if (is_request_) { 905bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek base::StringPiece method = headers_->request_method(); 906bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek // POSTs and PUTs should have a detectable body length. If they 907bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek // do not we consider it an error. 908ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks if ((method.size() == 4 && 909bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek strncmp(method.data(), "POST", 4) == 0) || 910bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek (method.size() == 3 && 9111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump strncmp(method.data(), "PUT", 3) == 0)) { 9121b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 913ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks last_error_ = 9141b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH; 915ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks visitor_->HandleHeaderError(this); 9161b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek break; 9171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 9188ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 919ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } else { 9208ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE; 921ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH; 9228ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor visitor_->HandleHeaderWarning(this); 9238ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor } 924f4e3cfbe8abd124be6341ef5d714819b4fbd9082Peter Collingbourne break; 925ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // The COV_NF_... statements here provide hints to the apparatus 926f4e3cfbe8abd124be6341ef5d714819b4fbd9082Peter Collingbourne // which computes coverage reports/ratios that this code is never 927f4e3cfbe8abd124be6341ef5d714819b4fbd9082Peter Collingbourne // intended to be executed, and should technically be impossible. 928ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // COV_NF_START 929e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek default: 9301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump LOG(FATAL) << "Saw a content_length_status: " 931e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek << headers_->content_length_status_ << " which is unknown."; 9329c378f705405d37f49795d5e915989de774fe11fTed Kremenek // COV_NF_END 933a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek } 934a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek } 935a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek } 936a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek} 937a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek 938a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremeneksize_t BalsaFrame::ProcessHeaders(const char* message_start, 939a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek size_t message_length) { 9401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump const char* const original_message_start = message_start; 9419c378f705405d37f49795d5e915989de774fe11fTed Kremenek const char* const message_end = message_start + message_length; 9428bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek const char* message_current = message_start; 943ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks const char* checkpoint = message_start; 9445eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek 9455eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek if (message_length == 0) { 9465eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek goto bottom; 947a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek } 948e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek 949e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek while (message_current < message_end) { 9506987c7b74146b9658b1925c5981f8b0cd0672b55Zhongxing Xu size_t base_idx = headers_->GetReadableBytesFromHeaderStream(); 95172374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek 952ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // Yes, we could use strchr (assuming null termination), or 95303509aea098772644bf4662dc1c88634818ceeccZhongxing Xu // memchr, but as it turns out that is slower than this tight loop 954ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // for the input that we see. 955031ccc0555a82afc2e8afe29e19dd57ff204e2deZhongxing Xu if (!saw_non_newline_char_) { 956892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek do { 9579c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek const char c = *message_current; 95872374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek if (c != '\r' && c != '\n') { 95972374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek if (c <= ' ') { 960892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 961ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST; 9621b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek visitor_->HandleHeaderError(this); 96372374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek goto bottom; 9644b9c2d235fb9449e249d74f48ecfec601650de93John McCall } else { 9654b9c2d235fb9449e249d74f48ecfec601650de93John McCall saw_non_newline_char_ = true; 9664b9c2d235fb9449e249d74f48ecfec601650de93John McCall checkpoint = message_start = message_current; 9678bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek goto read_real_message; 9684b9c2d235fb9449e249d74f48ecfec601650de93John McCall } 9694b9c2d235fb9449e249d74f48ecfec601650de93John McCall } 9705eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek ++message_current; 9715eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } while (message_current < message_end); 9725eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek goto bottom; // this is necessary to skip 'last_char_was_slash_r' checks 9734b9c2d235fb9449e249d74f48ecfec601650de93John McCall } else { 9744b9c2d235fb9449e249d74f48ecfec601650de93John McCall read_real_message: 9755eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // Note that SSE2 can be enabled on certain piii platforms. 9765eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek#if __SSE2__ 9775eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek { 9784b9c2d235fb9449e249d74f48ecfec601650de93John McCall const char* const message_end_m16 = message_end - 16; 9794b9c2d235fb9449e249d74f48ecfec601650de93John McCall __v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', 9804b9c2d235fb9449e249d74f48ecfec601650de93John McCall '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' }; 9814b9c2d235fb9449e249d74f48ecfec601650de93John McCall while (message_current < message_end_m16) { 9821b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek // What this does (using compiler intrinsics): 9831b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek // 9841b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek // Load 16 '\n's into an xmm register 9855903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // Load 16 bytes of currennt message into an xmm register 9865903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // Do byte-wise equals on those two xmm registers 9875903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // Take the first bit of each byte, and put that into the first 9885903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // 16 bits of a mask 9895903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // If the mask is zero, no '\n' found. increment by 16 and try again 9905903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // Else scan forward to find the first set bit. 9915903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // Increment current by the index of the first set bit 9925903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // (ffs returns index of first set bit + 1) 9935903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks __m128i msg_bytes = 9945903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks _mm_loadu_si128(const_cast<__m128i *>( 9955903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks reinterpret_cast<const __m128i *>(message_current))); 9965903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks __m128i newline_cmp = 9975903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines)); 9985903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks int newline_msk = _mm_movemask_epi8(newline_cmp); 9995903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks if (newline_msk == 0) { 10005903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks message_current += 16; 10015903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks continue; 10025903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks } 10030b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks message_current += (ffs(newline_msk) - 1); 10045903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks const size_t relative_idx = message_current - message_start; 10055903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks const size_t message_current_idx = 1 + base_idx + relative_idx; 10065903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks lines_.push_back(std::make_pair(last_slash_n_idx_, 10075903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks message_current_idx)); 10085903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks if (lines_.size() == 1) { 10095903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks headers_->WriteFromFramer(checkpoint, 10105903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks 1 + message_current - checkpoint); 1011253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks checkpoint = message_current + 1; 10125903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks const char* begin = headers_->OriginalHeaderStreamBegin(); 10135903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks#if DEBUGFRAMER 10145903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks LOG(INFO) << "First line " << std::string(begin, lines_[0].second); 10155903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks LOG(INFO) << "is_request_: " << is_request_; 10165903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks#endif 10175903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks ProcessFirstLine(begin, begin + lines_[0].second); 10185903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) 10195903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks goto process_lines; 10205903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) 10215903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks goto bottom; 10225903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks } 10235903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks const size_t chars_since_last_slash_n = (message_current_idx - 10245903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks last_slash_n_idx_); 10255903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks last_slash_n_idx_ = message_current_idx; 10265903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks if (chars_since_last_slash_n > 2) { 10275903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks // We have a slash-n, but the last slash n was 1028253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks // more than 2 characters away from this. Thus, we know 1029253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks // that this cannot be an end-of-header. 1030253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks ++message_current; 1031253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks continue; 1032253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks } 10335903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks if ((chars_since_last_slash_n == 1) || 10345903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks (((message_current > message_start) && 10355903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks (*(message_current - 1) == '\r')) || 10365903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks (last_char_was_slash_r_))) { 10375903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks goto process_lines; 10385903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks } 10395903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks ++message_current; 10405903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks } 10415903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks } 1042c03a39e16762627b421247b12a2658be630a3300Anna Zaks#endif // __SSE2__ 1043253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks while (message_current < message_end) { 1044253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks if (*message_current != '\n') { 104527c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek ++message_current; 104627c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek continue; 1047c03a39e16762627b421247b12a2658be630a3300Anna Zaks } 104827c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek const size_t relative_idx = message_current - message_start; 1049c03a39e16762627b421247b12a2658be630a3300Anna Zaks const size_t message_current_idx = 1 + base_idx + relative_idx; 1050ca804539d908d3a0e8c72a0df5f1f571d29490bbTed Kremenek lines_.push_back(std::make_pair(last_slash_n_idx_, 1051253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks message_current_idx)); 1052253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks if (lines_.size() == 1) { 1053749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks headers_->WriteFromFramer(checkpoint, 1054749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks 1 + message_current - checkpoint); 1055749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks checkpoint = message_current + 1; 10565903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks const char* begin = headers_->OriginalHeaderStreamBegin(); 10573bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks#if DEBUGFRAMER 10585903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks LOG(INFO) << "First line " << std::string(begin, lines_[0].second); 10595903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks LOG(INFO) << "is_request_: " << is_request_; 10603bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks#endif 1061e62f048960645b79363408fdead53fec2a063c52Anna Zaks ProcessFirstLine(begin, begin + lines_[0].second); 10623bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) 10635903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks goto process_lines; 10645903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) 10655903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks goto bottom; 10665903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks } 1067b47dbcbc12430fdf3e5a5b9f59cdec5480e89e75Anna Zaks const size_t chars_since_last_slash_n = (message_current_idx - 1068253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks last_slash_n_idx_); 1069253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks last_slash_n_idx_ = message_current_idx; 10705903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks if (chars_since_last_slash_n > 2) { 1071749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks // false positive. 1072253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks ++message_current; 1073253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks continue; 1074253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks } 107527c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek if ((chars_since_last_slash_n == 1) || 1076e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek (((message_current > message_start) && 1077e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek (*(message_current - 1) == '\r')) || 1078e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek (last_char_was_slash_r_))) { 1079e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek goto process_lines; 1080e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek } 1081e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek ++message_current; 10828bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek } 10835eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } 10845eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek continue; 10855eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek process_lines: 10861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump ++message_current; 108705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek DCHECK(message_current >= message_start); 108805a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek if (message_current > message_start) { 1089a8538d902fce9cfec20f39b34492268b51643819Ted Kremenek headers_->WriteFromFramer(checkpoint, message_current - checkpoint); 10901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 109105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek 10921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // Check if we have exceeded maximum headers length 109303509aea098772644bf4662dc1c88634818ceeccZhongxing Xu // Although we check for this limit before and after we call this function 109405a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // we check it here as well to make sure that in case the visitor changed 10951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // the max_header_length_ (for example after processing the first line) 10962de56d1d0c3a504ad1529de2677628bdfbb95cd4John McCall // we handle it gracefully. 10971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) { 109805a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 109905a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; 110005a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek visitor_->HandleHeaderError(this); 110105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek goto bottom; 110205a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek } 11031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 11049c378f705405d37f49795d5e915989de774fe11fTed Kremenek // Since we know that we won't be writing any more bytes of the header, 11052de56d1d0c3a504ad1529de2677628bdfbb95cd4John McCall // we tell that to the headers object. The headers object may make 110603509aea098772644bf4662dc1c88634818ceeccZhongxing Xu // more efficient allocation decisions when this is signaled. 11071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump headers_->DoneWritingFromFramer(); 11085eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek { 110905a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek const char* readable_ptr = NULL; 11101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump size_t readable_size = 0; 111156ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size); 111205a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek visitor_->ProcessHeaderInput(readable_ptr, readable_size); 111356ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall } 111456ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall 11151eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // Ok, now that we've written everything into our header buffer, it is 111605a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // time to process the header lines (extract proper values for headers 111705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // which are important for framing). 11181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump ProcessHeaderLines(); 11199c378f705405d37f49795d5e915989de774fe11fTed Kremenek if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) { 11201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump goto bottom; 112105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek } 112256ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall AssignParseStateAfterHeadersHaveBeenParsed(); 112305a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) { 112456ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall goto bottom; 11251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 11265eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek visitor_->ProcessHeaders(*headers_); 112705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek visitor_->HeaderDone(); 11281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) { 112905a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek visitor_->MessageDone(); 11301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 11319c378f705405d37f49795d5e915989de774fe11fTed Kremenek goto bottom; 11321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 11339c378f705405d37f49795d5e915989de774fe11fTed Kremenek // If we've gotten to here, it means that we've consumed all of the 11345eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // available input. We need to record whether or not the last character we 113505a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // saw was a '\r' so that a subsequent call to ProcessInput correctly finds 113605a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek // a header framing that is split across the two calls. 113705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek last_char_was_slash_r_ = (*(message_end - 1) == '\r'); 113805a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek DCHECK(message_current >= message_start); 11396ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek if (message_current > message_start) { 11406ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek headers_->WriteFromFramer(checkpoint, message_current - checkpoint); 11416ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek } 11426ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek bottom: 11436ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek return message_current - original_message_start; 1144294fd0a62b95f512637910bf85c7efa6c2354b50Ted Kremenek} 11458bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek 1146294fd0a62b95f512637910bf85c7efa6c2354b50Ted Kremenek 11475eca482fe895ea57bc82410222e6426c09e63284Ted Kremeneksize_t BalsaFrame::BytesSafeToSplice() const { 1148294fd0a62b95f512637910bf85c7efa6c2354b50Ted Kremenek switch (parse_state_) { 11496ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek case BalsaFrameEnums::READING_CHUNK_DATA: 115003509aea098772644bf4662dc1c88634818ceeccZhongxing Xu return chunk_length_remaining_; 11516ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek case BalsaFrameEnums::READING_UNTIL_CLOSE: 11526ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek return std::numeric_limits<size_t>::max(); 11536ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek case BalsaFrameEnums::READING_CONTENT: 11546ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek return content_length_remaining_; 11556ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek default: 11561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump return 0; 115703509aea098772644bf4662dc1c88634818ceeccZhongxing Xu } 11586ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek} 11596ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek 11606ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenekvoid BalsaFrame::BytesSpliced(size_t bytes_spliced) { 11616ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek switch (parse_state_) { 11621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump case BalsaFrameEnums::READING_CHUNK_DATA: 11636ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek if (chunk_length_remaining_ >= bytes_spliced) { 11646ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek chunk_length_remaining_ -= bytes_spliced; 11656ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek if (chunk_length_remaining_ == 0) { 11666ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; 11676ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek } 11681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump return; 11696ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek } else { 11706ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek last_error_ = 11716ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; 11726ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek goto error_exit; 11736ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek } 11746ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek 11756ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek case BalsaFrameEnums::READING_UNTIL_CLOSE: 11766ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek return; 11771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 11785eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek case BalsaFrameEnums::READING_CONTENT: 11796ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek if (content_length_remaining_ >= bytes_spliced) { 11806ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek content_length_remaining_ -= bytes_spliced; 11819c378f705405d37f49795d5e915989de774fe11fTed Kremenek if (content_length_remaining_ == 0) { 1182a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 1183ad62deeb70e97da6bd514dd390ea1ce6af6ad81dAnna Zaks visitor_->MessageDone(); 11841aae01a8308d2f8e31adab3f4d7ac35543aac680Anna Zaks } 1185a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks return; 1186a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks } else { 1187f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks last_error_ = 1188f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT; 1189b2331834a0515c80862ee51325c758a053829f15Ted Kremenek goto error_exit; 11901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 11911aae01a8308d2f8e31adab3f4d7ac35543aac680Anna Zaks 1192a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks default: 11934e82d3cf6fd4c907265e3fa3aac0a835c35dc759Anna Zaks last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO; 1194b2331834a0515c80862ee51325c758a053829f15Ted Kremenek goto error_exit; 1195b2331834a0515c80862ee51325c758a053829f15Ted Kremenek } 11961eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 119721028dd8850c64a414f7a82dfddcc291351203d6Ted Kremenek error_exit: 119821028dd8850c64a414f7a82dfddcc291351203d6Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 119921028dd8850c64a414f7a82dfddcc291351203d6Ted Kremenek visitor_->HandleBodyError(this); 12000fb0bc4067d6c9d7c0e655300ef309b05d3adfc9Ted Kremenek}; 1201f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks 1202f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// You may note that the state-machine contained within this function has both 12034e82d3cf6fd4c907265e3fa3aac0a835c35dc759Anna Zaks// switch and goto labels for nearly the same thing. For instance, the 12048ff5c41f2bde7ebbe568b4c15e59f14b8befae66Anna Zaks// following two labels refer to the same code block: 1205f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// label_reading_chunk_data: 12068ff5c41f2bde7ebbe568b4c15e59f14b8befae66Anna Zaks// case BalsaFrameEnums::READING_CHUNK_DATA: 12071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump// The 'case' statement is required for the switch statement which occurs when 1208f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// ProcessInput is invoked. The goto label is required as the state-machine 1209f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// does not use a computed goto in any subsequent operations. 1210f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// 1211cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// Since several states exit the state machine for various reasons, there is 1212cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// also one label at the bottom of the function. When it is appropriate to 1213cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// return from the function, that part of the state machine instead issues a 1214cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// goto bottom; This results in less code duplication, and makes debugging 1215cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// easier (as you can add a statement to a section of code which is guaranteed 12168bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// to be invoked when the function is exiting. 12175eca482fe895ea57bc82410222e6426c09e63284Ted Kremeneksize_t BalsaFrame::ProcessInput(const char* input, size_t size) { 1218cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks const char* current = input; 1219cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks const char* on_entry = current; 1220cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks const char* end = current + size; 1221cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks#if DEBUGFRAMER 1222cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks LOG(INFO) << "\n==============" 1223cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks << BalsaFrameEnums::ParseStateToString(parse_state_) 1224cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks << "===============\n"; 1225cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks#endif // DEBUGFRAMER 1226cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks 1227cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks DCHECK(headers_ != NULL); 1228cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks if (headers_ == NULL) return 0; 12295eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek 1230cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { 1231cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks const size_t header_length = headers_->GetReadableBytesFromHeaderStream(); 1232cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // Yes, we still have to check this here as the user can change the 1233cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // max_header_length amount! 1234cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // Also it is possible that we have reached the maximum allowed header size, 12350835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu // and we have more to consume (remember we are still inside 1236b38911f16b4943548db6a3695fc6ae23070b25d2Ted Kremenek // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error. 12370835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu if (header_length > max_header_length_ || 12385eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek (header_length == max_header_length_ && size > 0)) { 12395eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 12405eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; 12410835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu visitor_->HandleHeaderError(this); 124273c498a08f4968b6987d1453c7b77929dcc6d5f7Argyrios Kyrtzidis goto bottom; 12435eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } 12445eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek size_t bytes_to_process = max_header_length_ - header_length; 12455eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek if (bytes_to_process > size) { 12465eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek bytes_to_process = size; 1247cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks } 12481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump current += ProcessHeaders(input, bytes_to_process); 12491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // If we are still reading headers check if we have crossed the headers 1250cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // limit. Note that we check for >= as opposed to >. This is because if 12510835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu // header_length_after equals max_header_length_ and we are still in the 1252cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // parse_state_ BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for 1253cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // sure that the headers limit will be crossed later on 12548bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) { 12555eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // Note that headers_ is valid only if we are still reading headers. 12565eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek const size_t header_length_after = 1257cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks headers_->GetReadableBytesFromHeaderStream(); 1258cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks if (header_length_after >= max_header_length_) { 1259cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks parse_state_ = BalsaFrameEnums::PARSE_ERROR; 12601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG; 1261cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks visitor_->HandleHeaderError(this); 1262cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks } 12638bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek } 12645eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek goto bottom; 12655eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ || 1266cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks parse_state_ == BalsaFrameEnums::PARSE_ERROR) { 1267cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks // Can do nothing more 'till we're reset. 1268cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks goto bottom; 1269cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks } 1270f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks 1271f233d48cfc513b045e2c2cfca5c175220fbd0a82Ted Kremenek while (current < end) { 1272f233d48cfc513b045e2c2cfca5c175220fbd0a82Ted Kremenek switch (parse_state_) { 1273e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenek label_reading_chunk_length: 1274754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek case BalsaFrameEnums::READING_CHUNK_LENGTH: 1275ad8dcf4a9df0e24051dc31bf9e6f3cd138a34298Chris Lattner // In this state we read the chunk length. 1276754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // Note that once we hit a character which is not in: 12778bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek // [0-9;A-Fa-f\n], we transition to a different state. 12785eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // 12791eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump { 1280754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // If we used strtol, etc, we'd have to buffer this line. 1281754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // This is more annoying than simply doing the conversion 1282754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // here. This code accounts for overflow. 12834a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek static const signed char buf[] = { 1284754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // %0 %1 %2 %3 %4 %5 %6 %7 %8 \t \n %b %c \r %e %f 1285754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1, 12861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f 1287d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1288754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f 12891c96b24285d05c0eac455ae96d7c9ff43d42bc96Zhongxing Xu -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1290ad8dcf4a9df0e24051dc31bf9e6f3cd138a34298Chris Lattner // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f 12911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -2, -1, -1, -1, -1, 1292ad8dcf4a9df0e24051dc31bf9e6f3cd138a34298Chris Lattner // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f 129324f1a967741ff9f8025ee23be12ba6feacc31f77Ted Kremenek -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1294a8538d902fce9cfec20f39b34492268b51643819Ted Kremenek // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f 1295754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1296754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f 1297754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f 1299b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikie -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1300754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek }; 1301754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // valid cases: 13021c96b24285d05c0eac455ae96d7c9ff43d42bc96Zhongxing Xu // "09123\n" // -> 09123 1303754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // "09123\r\n" // -> 09123 13042055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu // "09123 \n" // -> 09123 13052055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu // "09123 \r\n" // -> 09123 13062055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu // "09123 12312\n" // -> 09123 1307754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // "09123 12312\r\n" // -> 09123 1308754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // "09123; foo=bar\n" // -> 09123 13091eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // "09123; foo=bar\r\n" // -> 09123 1310754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // "FFFFFFFFFFFFFFFF\r\n" // -> FFFFFFFFFFFFFFFF 1311b3cfd58c9b13325d994e5f9b5065e6a22d91911dTed Kremenek // "FFFFFFFFFFFFFFFF 22\r\n" // -> FFFFFFFFFFFFFFFF 13121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // invalid cases: 1313754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // "[ \t]+[^\n]*\n" 1314a8538d902fce9cfec20f39b34492268b51643819Ted Kremenek // "FFFFFFFFFFFFFFFFF\r\n" (would overflow) 1315754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek // "\r\n" 1316f233d48cfc513b045e2c2cfca5c175220fbd0a82Ted Kremenek // "\n" 1317d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis while (current < end) { 131873099bfea9f5d4ec05265170bbefec3d76fb6b5eTed Kremenek const char c = *current; 1319af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks ++current; 1320af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks const signed char addition = buf[static_cast<int>(c)]; 1321af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks if (addition >= 0) { 1322af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks chunk_length_character_extracted_ = true; 13234d2ae4a70336dc2aa11389b34946be152bb454c9Anna Zaks size_t length_x_16 = chunk_length_remaining_ * 16; 132473099bfea9f5d4ec05265170bbefec3d76fb6b5eTed Kremenek const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16; 132573099bfea9f5d4ec05265170bbefec3d76fb6b5eTed Kremenek if ((chunk_length_remaining_ > kMaxDiv16) || 1326d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis ((std::numeric_limits<size_t>::max() - length_x_16) < 1327daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek static_cast<size_t>(addition))) { 1328e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenek // overflow -- asked for a chunk-length greater than 2^64 - 1!! 1329d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis parse_state_ = BalsaFrameEnums::PARSE_ERROR; 13308bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW; 13319c378f705405d37f49795d5e915989de774fe11fTed Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 13325eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek visitor_->HandleChunkingError(this); 1333daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek goto bottom; 13345b9bd2137ebef350af803c634e3fdf5d74678100Ted Kremenek } 13352055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu chunk_length_remaining_ = length_x_16 + addition; 1336b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek continue; 13372055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu } 13382055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu 1339daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek if (!chunk_length_character_extracted_ || addition == -1) { 1340daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no 13415b9bd2137ebef350af803c634e3fdf5d74678100Ted Kremenek // characters were converted, or an unexpected character was 1342692416c214a3b234236dedcf875735a9cc29e90bTed Kremenek // seen. 13438bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek parse_state_ = BalsaFrameEnums::PARSE_ERROR; 134434feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH; 134534feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 134634feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek visitor_->HandleChunkingError(this); 13471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump goto bottom; 134834feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek } 1349e71f3d587844110d836c82250830b27b1651afdbTed Kremenek 1350e71f3d587844110d836c82250830b27b1651afdbTed Kremenek --current; 1351e71f3d587844110d836c82250830b27b1651afdbTed Kremenek parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION; 1352e71f3d587844110d836c82250830b27b1651afdbTed Kremenek visitor_->ProcessChunkLength(chunk_length_remaining_); 13539c378f705405d37f49795d5e915989de774fe11fTed Kremenek goto label_reading_chunk_extension; 135472afb3739da0da02158242ae41a50cfe0bea78b4Ted Kremenek } 135572afb3739da0da02158242ae41a50cfe0bea78b4Ted Kremenek } 135685df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith visitor_->ProcessBodyInput(on_entry, current - on_entry); 135785df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith goto bottom; // case BalsaFrameEnums::READING_CHUNK_LENGTH 13581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump 1359daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek label_reading_chunk_extension: 136085df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith case BalsaFrameEnums::READING_CHUNK_EXTENSION: 136185df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith { 136285df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith // TODO(phython): Convert this scanning to be 16 bytes at a time if 136314a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek // there is data to be read. 136414a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek const char* extensions_start = current; 13651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump size_t extensions_length = 0; 1366daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek while (current < end) { 1367daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek const char c = *current; 1368aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek if (c == '\r' || c == '\n') { 13691eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump extensions_length = 137014a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek (extensions_start == current) ? 137185df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith 0 : 13729c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek current - extensions_start - 1; 137348569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek } 1374b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek 13751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump ++current; 13768bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek if (c == '\n') { 1377a591bc04d21fa62ebffcb2c7814d738ca8f5e2f9Ted Kremenek chunk_length_character_extracted_ = false; 13781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump visitor_->ProcessChunkExtensions( 1379daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek extensions_start, extensions_length); 1380daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek if (chunk_length_remaining_ != 0) { 1381daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA; 13821c96b24285d05c0eac455ae96d7c9ff43d42bc96Zhongxing Xu goto label_reading_chunk_data; 1383daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek } 1384daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek HeaderFramingFound('\n'); 13851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM; 1386daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek goto label_reading_last_chunk_term; 1387daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek } 138848569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek } 13898bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek visitor_->ProcessChunkExtensions( 139048569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek extensions_start, extensions_length); 139148569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek } 139248569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek 139348569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 139448569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek goto bottom; // case BalsaFrameEnums::READING_CHUNK_EXTENSION 139548569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek 139648569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek label_reading_chunk_data: 13975014ab113eb211b8320ae30b173d7020352663c6Ted Kremenek case BalsaFrameEnums::READING_CHUNK_DATA: 1398b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek while (current < end) { 139914a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek if (chunk_length_remaining_ == 0) { 140085df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith break; 140114a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek } 14021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // read in the chunk 140385df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith size_t bytes_remaining = end - current; 140485df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ? 14051eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump chunk_length_remaining_ : bytes_remaining; 140614a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek const char* tmp_current = current + consumed_bytes; 1407daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry); 14081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump visitor_->ProcessBodyData(current, consumed_bytes); 14094d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek on_entry = current = tmp_current; 14104d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek chunk_length_remaining_ -= consumed_bytes; 14114d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek } 14124d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek if (chunk_length_remaining_ == 0) { 14134d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM; 14144d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek goto label_reading_chunk_term; 14154d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek } 14164d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 14174d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek goto bottom; // case BalsaFrameEnums::READING_CHUNK_DATA 14184d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek 14194d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek label_reading_chunk_term: 14204d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek case BalsaFrameEnums::READING_CHUNK_TERM: 14214d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek while (current < end) { 14224d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek const char c = *current; 14234d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek ++current; 14244d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek 14254d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek if (c == '\n') { 14264d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH; 1427daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek goto label_reading_chunk_length; 1428daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek } 1429e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek } 1430ec96a2d52d16e150baaf629cd35e3fabff5d8915Ted Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 1431e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek goto bottom; // case BalsaFrameEnums::READING_CHUNK_TERM 1432d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek 1433d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis label_reading_last_chunk_term: 1434d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis case BalsaFrameEnums::READING_LAST_CHUNK_TERM: 1435d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis while (current < end) { 1436aa0aeb1cbe117db68d35700cb3a34aace0f99b99Anna Zaks const char c = *current; 1437ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks 14388bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek if (!HeaderFramingFound(c)) { 14395eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // If not, however, since the spec only suggests that the 14406d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu // client SHOULD indicate the presence of trailers, we get to 14419c378f705405d37f49795d5e915989de774fe11fTed Kremenek // *test* that they did or didn't. 1442892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // If all of the bytes we've seen since: 1443d17da2b99f323fa91b01e1dd119cc32e0ee8197dTed Kremenek // OPTIONAL_WS 0 OPTIONAL_STUFF CRLF 1444a7581731b1453b51b26154d2409d42a5b6395079Zhongxing Xu // are either '\r', or '\n', then we can assume that we don't yet 1445892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // know if we need to parse headers, or if the next byte will make 1446892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // the HeaderFramingFound condition (above) true. 1447892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek if (HeaderFramingMayBeFound()) { 1448892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // If true, then we have seen only characters '\r' or '\n'. 1449892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek ++current; 1450892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek 1451892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // Lets try again! There is no state change here. 1452852274d4257134906995cb252fb3dfd2d71deae8Ted Kremenek continue; 14536d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu } else { 14545eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // If (!HeaderFramingMayBeFound()), then we know that we must be 1455ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks // reading the first non CRLF character of a trailer. 1456852274d4257134906995cb252fb3dfd2d71deae8Ted Kremenek parse_state_ = BalsaFrameEnums::READING_TRAILER; 1457892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 14589c378f705405d37f49795d5e915989de774fe11fTed Kremenek on_entry = current; 1459892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek goto label_reading_trailer; 1460c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek } 14615eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } else { 14626d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu // If we've found a "\r\n\r\n", then the message 1463892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // is done. 14649c378f705405d37f49795d5e915989de774fe11fTed Kremenek ++current; 1465c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 14665eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 1467ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks visitor_->MessageDone(); 14686d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu goto bottom; 14691b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek } 14705aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek break; // from while loop 14715aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek } 14725aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek visitor_->ProcessBodyInput(on_entry, current - on_entry); 14735aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek goto bottom; // case BalsaFrameEnums::READING_LAST_CHUNK_TERM 14745aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek 14755aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek label_reading_trailer: 14765aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek case BalsaFrameEnums::READING_TRAILER: 14776d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu while (current < end) { 14786d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu const char c = *current; 14793271f8d315712885ac87747369bb1d9f4b1ea81fTed Kremenek ++current; 14803271f8d315712885ac87747369bb1d9f4b1ea81fTed Kremenek // TODO(fenix): If we ever care about trailers as part of framing, 1481540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek // deal with them here (see below for part of the 'solution') 14829c378f705405d37f49795d5e915989de774fe11fTed Kremenek // if (LineFramingFound(c)) { 14839c378f705405d37f49795d5e915989de774fe11fTed Kremenek // trailer_lines_.push_back(make_pair(start_of_line_, 14849c378f705405d37f49795d5e915989de774fe11fTed Kremenek // trailer_length_ - 1)); 14851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump // start_of_line_ = trailer_length_; 14869c378f705405d37f49795d5e915989de774fe11fTed Kremenek // } 14879c378f705405d37f49795d5e915989de774fe11fTed Kremenek if (HeaderFramingFound(c)) { 1488892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek // ProcessTrailers(visitor_, &trailers_); 14891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 14908f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek visitor_->ProcessTrailerInput(on_entry, current - on_entry); 14918f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek visitor_->MessageDone(); 14928f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek goto bottom; 1493aa0aeb1cbe117db68d35700cb3a34aace0f99b99Anna Zaks } 1494ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } 14958f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek visitor_->ProcessTrailerInput(on_entry, current - on_entry); 14968f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek break; // case BalsaFrameEnums::READING_TRAILER 14975eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek 14988bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek // Note that there is no label: 14995eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // 'label_reading_until_close' 15005eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // here. This is because the state-machine exists immediately after 15015eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // reading the headers instead of transitioning here (as it would 15028f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek // do if it was consuming all the data it could, all the time). 15035eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek case BalsaFrameEnums::READING_UNTIL_CLOSE: 1504ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks { 15051b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek const size_t bytes_remaining = end - current; 1506540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek if (bytes_remaining > 0) { 1507540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek visitor_->ProcessBodyInput(current, bytes_remaining); 1508469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek visitor_->ProcessBodyData(current, bytes_remaining); 15099c378f705405d37f49795d5e915989de774fe11fTed Kremenek current += bytes_remaining; 15106889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks } 15111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump } 15126889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks goto bottom; // case BalsaFrameEnums::READING_UNTIL_CLOSE 15136889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks 15145bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek // label_reading_content: 15155bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek case BalsaFrameEnums::READING_CONTENT: 15165bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek#if DEBUGFRAMER 1517ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks LOG(INFO) << "ReadingContent: " << content_length_remaining_; 15185bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek#endif // DEBUGFRAMER 1519ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks while (content_length_remaining_ && current < end) { 15205bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek // read in the content 15215bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek const size_t bytes_remaining = end - current; 15225bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek const size_t consumed_bytes = 15235bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek (content_length_remaining_ < bytes_remaining) ? 1524892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek content_length_remaining_ : bytes_remaining; 152586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor visitor_->ProcessBodyInput(current, consumed_bytes); 152686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor visitor_->ProcessBodyData(current, consumed_bytes); 1527e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek current += consumed_bytes; 15288bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek content_length_remaining_ -= consumed_bytes; 15295eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek } 15305eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek if (content_length_remaining_ == 0) { 1531e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ; 1532e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek visitor_->MessageDone(); 1533e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek } 1534e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek goto bottom; // case BalsaFrameEnums::READING_CONTENT 1535e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek 1536e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek default: 15375eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek // The state-machine should never be in a state that isn't handled 1538e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek // above. This is a glaring logic error, and we should do something 1539e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek // drastic to ensure that this gets looked-at and fixed. 1540892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek LOG(FATAL) << "Unknown state: " << parse_state_ // COV_NF_LINE 1541e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek << " memory corruption?!"; // COV_NF_LINE 1542e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek } 1543e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek } 1544d9bc33efa195114d6f2a365c26e5b8dba4e1cc38Ted Kremenek bottom: 1545e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek#if DEBUGFRAMER 1546e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n" 1547e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek << std::string(input, current) 15485eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek << "\n$$$$$$$$$$$$$$" 1549ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks << BalsaFrameEnums::ParseStateToString(parse_state_) 1550ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks << "$$$$$$$$$$$$$$$" 1551ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks << " consumed: " << (current - input); 1552bd613137499b1d4c3b63dccd0aa21f6add243f4fTed Kremenek if (Error()) { 1553ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode()); 1554ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks } 1555469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek#endif // DEBUGFRAMER 1556469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek return current - input; 15579c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek} 15589c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek 15599c378f705405d37f49795d5e915989de774fe11fTed Kremenekconst uint32 BalsaFrame::kValidTerm1; 156093bd5ca766c4d7906878f4ffe76ce1b2080e540bJordy Roseconst uint32 BalsaFrame::kValidTerm1Mask; 15613d7c44e01d568e5d5c0fac9c6ccb3f080157ba19Anna Zaksconst uint32 BalsaFrame::kValidTerm2; 1562b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenekconst uint32 BalsaFrame::kValidTerm2Mask; 1563b107c4b7efb907d75620cd3c17f82fe27dc5b745Ted Kremenek 1564fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose} // namespace net 1565fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose