balsa_frame.cc revision f2477e01787aa58f445919b809d89e252beef54f
1d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis// Copyright 2013 The Chromium Authors. All rights reserved.
264924859b6b09d1cfb62fecf5954ec6c27cb58feTed Kremenek// Use of this source code is governed by a BSD-style license that can be
34af84313df0d2710fd57af89132e680294225cadTed Kremenek// found in the LICENSE file.
4d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek
5d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include "net/tools/balsa/balsa_frame.h"
6d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek
7d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <assert.h>
8d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#if __SSE2__
9d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <emmintrin.h>
1077349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#endif  // __SSE2__
1177349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek#include <strings.h>
1277349cb20bfd7069d081f84c91975bfa8ef60a32Ted Kremenek
13d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <limits>
14d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek#include <string>
15a7af5ea88a6c5bdf87497cca6c20831e8c546751Argyrios Kyrtzidis#include <utility>
16c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks#include <vector>
17c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks
1843dee220252ef0b42c5f8a3bb1eca97f84f2565fArgyrios Kyrtzidis#include "base/logging.h"
199b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "base/port.h"
209b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "base/strings/string_piece.h"
219b663716449b618ba0390b1dbebc54fa8e971124Ted Kremenek#include "net/tools/balsa/balsa_enums.h"
22d1e5a89226da79f7e6f43d40facc46abda9e5245Jordy Rose#include "net/tools/balsa/balsa_headers.h"
23199c3d6cd16aebbb9c7f0d42af9d922c9628bf70Ken Dyck#include "net/tools/balsa/balsa_visitor_interface.h"
2416f0049415ec596504891259e2a83e19871c0d52Chris Lattner#include "net/tools/balsa/buffer_interface.h"
2516f0049415ec596504891259e2a83e19871c0d52Chris Lattner#include "net/tools/balsa/simple_buffer.h"
26337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek#include "net/tools/balsa/split.h"
27c506357c3778092c2a3251243f12524e8eb89274Zhongxing Xu#include "net/tools/balsa/string_piece_utils.h"
281b63e4f732dbc73d90abf886b4d21f8e3a165f6dChris Lattner
2916f0049415ec596504891259e2a83e19871c0d52Chris Lattnernamespace net {
300bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek
31a95d3750441ac8ad03e36af8e6e74039c9a3109dTed Kremenek// Constants holding some header names for headers which can affect the way the
326cb7c1a43b0c8f739d1f54b7fdae5ede86033496Benjamin Kramer// HTTP message is framed, and so must be processed specially:
33c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaksstatic const char kContentLength[] = "content-length";
344323a57627e796dcfdfdb7d47672dc09ed308edaTed Kremenekstatic const size_t kContentLengthSize = sizeof(kContentLength) - 1;
350f5f0595d6a038843a7051c5a65fca7bce2915a0Ted Kremenekstatic const char kTransferEncoding[] = "transfer-encoding";
360f5f0595d6a038843a7051c5a65fca7bce2915a0Ted Kremenekstatic const size_t kTransferEncodingSize = sizeof(kTransferEncoding) - 1;
370f5f0595d6a038843a7051c5a65fca7bce2915a0Ted Kremenek
380f5f0595d6a038843a7051c5a65fca7bce2915a0Ted KremenekBalsaFrame::BalsaFrame()
39b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek    : last_char_was_slash_r_(false),
409ef6537a894c33003359b1f9b9676e9178e028b7Ted Kremenek      saw_non_newline_char_(false),
41b387a3f23e423d62c053be86294b703da1d1a222Ted Kremenek      start_was_space_(true),
42ab2b8c54bca82866876f91e756788916d3fa20c3Ted Kremenek      chunk_length_character_extracted_(false),
43c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks      is_request_(true),
44c2994283aa7538b7420c8e398cde7afa328d7042Anna Zaks      request_was_head_(false),
45749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks      max_header_length_(16 * 1024),
46749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks      max_request_uri_length_(2048),
47749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks      visitor_(&do_nothing_visitor_),
48749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks      chunk_length_remaining_(0),
49749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks      content_length_remaining_(0),
50749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks      last_slash_n_loc_(NULL),
515903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks      last_recorded_slash_n_loc_(NULL),
525903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks      last_slash_n_idx_(0),
535903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks      term_chars_(0),
54e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      parse_state_(BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE),
5532c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek      last_error_(BalsaFrameEnums::NO_ERROR),
5632c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek      headers_(NULL) {
5732c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek}
589c378f705405d37f49795d5e915989de774fe11fTed Kremenek
5932c4995826c76f282fc05fbbc3241d2dded4fb57Ted KremenekBalsaFrame::~BalsaFrame() {}
6032c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek
6132c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenekvoid BalsaFrame::Reset() {
6232c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek  last_char_was_slash_r_ = false;
6332c4995826c76f282fc05fbbc3241d2dded4fb57Ted Kremenek  saw_non_newline_char_ = false;
64bdb435ddaafd5069becd543d638112f68825b89dTed Kremenek  start_was_space_ = true;
65bdb435ddaafd5069becd543d638112f68825b89dTed Kremenek  chunk_length_character_extracted_ = false;
66bdb435ddaafd5069becd543d638112f68825b89dTed Kremenek  // is_request_ = true;               // not reset between messages.
673fd5f370a28552976c52e76c3035d79012d78ddaAnna Zaks  // request_was_head_ = false;        // not reset between messages.
686a86082f3a06a2dcceaaf63f78a0e52d64bcbaa3Anna Zaks  // max_header_length_ = 4096;        // not reset between messages.
693bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks  // max_request_uri_length_ = 2048;   // not reset between messages.
7025e695b2d574d919cc1bbddf3a2efe073d449b1cZhongxing Xu  // visitor_ = &do_nothing_visitor_;  // not reset between messages.
711d26f48dc2eea1c07431ca1519d7034a21b9bcffTed Kremenek  chunk_length_remaining_ = 0;
72e62f048960645b79363408fdead53fec2a063c52Anna Zaks  content_length_remaining_ = 0;
73d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  last_slash_n_loc_ = NULL;
74c77a55126fcad66fb086f8e100a494caa2496a2dZhongxing Xu  last_recorded_slash_n_loc_ = NULL;
7532a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek  last_slash_n_idx_ = 0;
7632a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek  term_chars_ = 0;
7750a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek  parse_state_ = BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE;
78c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek  last_error_ = BalsaFrameEnums::NO_ERROR;
798ad8c546372fe602708cb7ceeaf0ebbb866735c6Anna Zaks  lines_.clear();
808ad8c546372fe602708cb7ceeaf0ebbb866735c6Anna Zaks  if (headers_ != NULL) {
81a5a4166f8cb04490b7b27355874edfb98837cd45Zhongxing Xu    headers_->Clear();
82c77a55126fcad66fb086f8e100a494caa2496a2dZhongxing Xu  }
83e62f048960645b79363408fdead53fec2a063c52Anna Zaks}
84d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek
85d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenekconst char* BalsaFrameEnums::ParseStateToString(
86d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek    BalsaFrameEnums::ParseState error_code) {
87d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek  switch (error_code) {
88d767d81290288c030f3be0be1d3e62b9c8df51dcTed Kremenek    case PARSE_ERROR:
89c80135ba857da48173578b9c528fce6777e18168Ted Kremenek      return "PARSE_ERROR";
9050a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek    case READING_HEADER_AND_FIRSTLINE:
91d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis      return "READING_HEADER_AND_FIRSTLINE";
92cf118d41f7930a18dce97416ef7834a62642f587Ted Kremenek    case READING_CHUNK_LENGTH:
93e448ab4f9dd162802f5d7cfea60f7830cc61c654Ted Kremenek      return "READING_CHUNK_LENGTH";
9450a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek    case READING_CHUNK_EXTENSION:
9550a6d0ce344c02782e0207574005c3b2aaa5077cTed Kremenek      return "READING_CHUNK_EXTENSION";
96e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    case READING_CHUNK_DATA:
97e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      return "READING_CHUNK_DATA";
98e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    case READING_CHUNK_TERM:
99e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      return "READING_CHUNK_TERM";
1008bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    case READING_LAST_CHUNK_TERM:
1018bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek      return "READING_LAST_CHUNK_TERM";
102a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case READING_TRAILER:
1031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      return "READING_TRAILER";
104cfcd7fd0de701c5ce05e96de1ed2d0bf8c7035d9Ted Kremenek    case READING_UNTIL_CLOSE:
10552e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek      return "READING_UNTIL_CLOSE";
10652e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek    case READING_CONTENT:
1075974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "READING_CONTENT";
108a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case MESSAGE_FULLY_READ:
1095974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "MESSAGE_FULLY_READ";
1105974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case NUM_STATES:
1115974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "UNKNOWN_STATE";
1125974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek  }
1135974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek  return "UNKNOWN_STATE";
1145974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek}
1155974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek
1165974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenekconst char* BalsaFrameEnums::ErrorCodeToString(
1175974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    BalsaFrameEnums::ErrorCode error_code) {
1185974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek  switch (error_code) {
1195974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case NO_ERROR:
120b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek      return "NO_ERROR";
1215974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case NO_STATUS_LINE_IN_RESPONSE:
1225974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "NO_STATUS_LINE_IN_RESPONSE";
1235974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case NO_REQUEST_LINE_IN_REQUEST:
124b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek      return "NO_REQUEST_LINE_IN_REQUEST";
1251397663af9dbcc24dbf0e11de43931b3dc08fdbbTed Kremenek    case FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION:
1269c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek      return "FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION";
127c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek    case FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD:
1285974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD";
1295974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE:
1305974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE";
1315974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI:
132b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek      return "FAILED_TO_FIND_WS_AFTER_REQUEST_REQUEST_URI";
1335974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE:
1345974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "FAILED_TO_FIND_NL_AFTER_RESPONSE_REASON_PHRASE";
135b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek    case FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION:
1368bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek      return "FAILED_TO_FIND_NL_AFTER_REQUEST_HTTP_VERSION";
1375974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek    case FAILED_CONVERTING_STATUS_CODE_TO_INT:
13852e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek      return "FAILED_CONVERTING_STATUS_CODE_TO_INT";
139a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case REQUEST_URI_TOO_LONG:
140a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "REQUEST_URI_TOO_LONG";
141a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case HEADERS_TOO_LONG:
142a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "HEADERS_TOO_LONG";
143a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case UNPARSABLE_CONTENT_LENGTH:
144a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "UNPARSABLE_CONTENT_LENGTH";
145a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case MAYBE_BODY_BUT_NO_CONTENT_LENGTH:
146a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "MAYBE_BODY_BUT_NO_CONTENT_LENGTH";
147a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case REQUIRED_BODY_BUT_NO_CONTENT_LENGTH:
148a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "REQUIRED_BODY_BUT_NO_CONTENT_LENGTH";
149a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case HEADER_MISSING_COLON:
150a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "HEADER_MISSING_COLON";
151a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case INVALID_CHUNK_LENGTH:
152a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "INVALID_CHUNK_LENGTH";
153a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case CHUNK_LENGTH_OVERFLOW:
154a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "CHUNK_LENGTH_OVERFLOW";
155a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO:
1565974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO";
157a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT:
158a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT";
159a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case MULTIPLE_CONTENT_LENGTH_KEYS:
160a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "MULTIPLE_CONTENT_LENGTH_KEYS";
161a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case MULTIPLE_TRANSFER_ENCODING_KEYS:
162a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "MULTIPLE_TRANSFER_ENCODING_KEYS";
163a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case UNKNOWN_TRANSFER_ENCODING:
164a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "UNKNOWN_TRANSFER_ENCODING";
165a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case INVALID_HEADER_FORMAT:
166a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "INVALID_HEADER_FORMAT";
167a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case INTERNAL_LOGIC_ERROR:
168a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek      return "INTERNAL_LOGIC_ERROR";
169a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek    case NUM_ERROR_CODES:
1705974ec518289a719fdd18122060bbcfe49d439e3Ted Kremenek      return "UNKNOWN_ERROR";
171cfcd7fd0de701c5ce05e96de1ed2d0bf8c7035d9Ted Kremenek  }
172a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek  return "UNKNOWN_ERROR";
173a078ecf3484d62b01d9f8c01e0fecffd65c583e1Ted Kremenek}
17452e5602056e4cade24cbcca57767e94e1d430b03Ted Kremenek
175e070a1df66aab6d4168fb28f7559fdf996df3567Ted Kremenek// Summary:
176e070a1df66aab6d4168fb28f7559fdf996df3567Ted Kremenek//     Parses the first line of either a request or response.
177e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek//     Note that in the case of a detected warning, error_code will be set
178e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek//   but the function will not return false.
179e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek//     Exactly zero or one warning or error (but not both) may be detected
180e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek//   by this function.
1819c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek//     Note that this function will not write the data of the first-line
18232a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek//   into the header's buffer (that should already have been done elsewhere).
1838bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek//
184fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose// Pre-conditions:
185fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose//     begin != end
18632a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek//     *begin should be a character which is > ' '. This implies that there
18732a58084a4c53e6938dd81bfce224db25a5976d1Ted Kremenek//   is at least one non-whitespace characters between [begin, end).
1888bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek//   headers is a valid pointer to a BalsaHeaders class.
189183ff98f425d470c2a0276880aaf43496c9dad14Argyrios Kyrtzidis//     error_code is a valid pointer to a BalsaFrameEnums::ErrorCode value.
190c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose//     Entire first line must exist between [begin, end)
191c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose//     Exactly zero or one newlines -may- exist between [begin, end)
1928bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek//     [begin, end) should exist in the header's buffer.
1938bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek//
19435bdbf40624beba3fc00cb72ab444659939c1a6bTed Kremenek// Side-effects:
195537716ad8dd10f984b6cfe6985afade1185c5e3cJordy Rose//   headers will be modified
19666c40400e7d6272b0cd675ada18dd62c1f0362c7Anna Zaks//   error_code may be modified if either a warning or error is detected
19766c40400e7d6272b0cd675ada18dd62c1f0362c7Anna Zaks//
19835bdbf40624beba3fc00cb72ab444659939c1a6bTed Kremenek// Returns:
19966c40400e7d6272b0cd675ada18dd62c1f0362c7Anna Zaks//   True if no error (as opposed to warning) is detected.
200c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose//   False if an error (as opposed to warning) is detected.
201c2b7dfaad674587cfd220ff447b3710d252130c3Jordy Rose
2028bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek//
203dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose// If there is indeed non-whitespace in the line, then the following
204dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose// will take care of this for you:
205dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose//  while (*begin <= ' ') ++begin;
206dbd658e139b3e0bf084f75feaea8d844af9e319fJordy Rose//  ProcessFirstLine(begin, end, is_request, &headers, &error_code);
207e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenek//
20830726c6baee1417307236e854f1474fdb3cedb98Argyrios Kyrtzidisbool ParseHTTPFirstLine(const char* begin,
209ccc263b44c62ce3a02f797a3ddb3d6017cf0e5e4Ted Kremenek                        const char* end,
210ccc263b44c62ce3a02f797a3ddb3d6017cf0e5e4Ted Kremenek                        bool is_request,
211ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                        size_t max_request_uri_length,
212ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                        BalsaHeaders* headers,
213ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                        BalsaFrameEnums::ErrorCode* error_code) {
214ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  const char* current = begin;
215ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  // HTTP firstlines all have the following structure:
2169c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu  //  LWS         NONWS  LWS    NONWS   LWS    NONWS   NOTCRLF  CRLF
2173c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //  [\t \r\n]+ [^\t ]+ [\t ]+ [^\t ]+ [\t ]+ [^\t ]+ [^\r\n]+ "\r\n"
2183c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //  ws1        nws1    ws2    nws2    ws3    nws3             ws4
2193c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //  |          [-------)      [-------)      [----------------)
220ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  //    REQ:     method         request_uri    version
2213c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //   RESP:     version        statuscode     reason
2223c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //
223ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  //   The first NONWS->LWS component we'll call firstline_a.
2243c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //   The second firstline_b, and the third firstline_c.
2253c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //
2263c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //   firstline_a goes from nws1 to (but not including) ws2
2273c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //   firstline_b goes from nws2 to (but not including) ws3
2283c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  //   firstline_c goes from nws3 to (but not including) ws4
229ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  //
2303c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek  // In the code:
2319c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu  //    ws1 == whitespace_1_idx_
2320b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  //   nws1 == non_whitespace_1_idx_
2339c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu  //    ws2 == whitespace_2_idx_
2349c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu  //   nws2 == non_whitespace_2_idx_
235ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  //    ws3 == whitespace_3_idx_
236ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  //   nws3 == non_whitespace_3_idx_
237ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  //    ws4 == whitespace_4_idx_
238ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek
239ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  // Kill all whitespace (including '\r\n') at the end of the line.
240ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  --end;
241ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  if (*end != '\n') {
242ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek    *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
243ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek    LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
244ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek                << headers->OriginalHeadersForDebugging();
2459e9a3e612d57b583800d5f0e48bb28d4afbd8b84Ted Kremenek    return false;
246ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  }
247ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  while (begin < end && *end <= ' ') {
248ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek    --end;
249ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  }
250ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  DCHECK(*end != '\n');
251ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks  if (*end == '\n') {
252ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks    *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
253ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks    LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
254ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks                << headers->OriginalHeadersForDebugging();
255ff80afcfb2b00ccffcb6cb10528bec565fc59eddAnna Zaks    return false;
256ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  }
257ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  ++end;
258ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek
259ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  // The two following statements should not be possible.
260ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek  if (end == begin) {
261ce117a7d289f57f792e5cc3294280cfe070433deTed Kremenek    *error_code = BalsaFrameEnums::INTERNAL_LOGIC_ERROR;
2620b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    LOG(DFATAL) << "INTERNAL_LOGIC_ERROR Headers: \n"
2630b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks                << headers->OriginalHeadersForDebugging();
2640b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    return false;
2650b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  }
2660b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks
2670b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // whitespace_1_idx_
2680b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  headers->whitespace_1_idx_ = current - begin;
2690b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // This loop is commented out as it is never used in current code.  This is
2700b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // true only because we don't begin parsing the headers at all until we've
2710b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // encountered a non whitespace character at the beginning of the stream, at
2720b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // which point we begin our demarcation of header-start.  If we did -not- do
2730b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // this (for instance, only looked for [\r\n] instead of (< ' ')), this loop
2740b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // would be necessary for the proper functioning of this parsing.
2750b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // This is left here as this function may (in the future) be refactored out
2760b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // of the BalsaFrame class so that it may be shared between code in
2770b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // BalsaFrame and BalsaHeaders (where it would be used in some variant of the
2780b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // set_first_line() function (at which point it would be necessary).
2790b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks#if 0
2800b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  while (*current <= ' ') {
281241677a13cc46647a8f5098b3e3239bd9480dca2Ted Kremenek    ++current;
28277d7ef8d8a80ccb2ab3d25c80810571e3ab14ee4Ted Kremenek  }
283f185cc1ac77a84139c603eee3473b88dcb839c68Anna Zaks#endif
284f185cc1ac77a84139c603eee3473b88dcb839c68Anna Zaks  // non_whitespace_1_idx_
2856bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  headers->non_whitespace_1_idx_ = current - begin;
2866bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  do {
2876bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    // The first time through, we're guaranteed that the current character
2886bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    // won't be a whitespace (else the loop above wouldn't have terminated).
2890b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    // That implies that we're guaranteed to get at least one non-whitespace
2900b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    // character if we get into this loop at all.
2911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    ++current;
2926bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    if (current == end) {
2936bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks      headers->whitespace_2_idx_ = current - begin;
2946bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks      headers->non_whitespace_2_idx_ = current - begin;
295fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose      headers->whitespace_3_idx_ = current - begin;
2960b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      headers->non_whitespace_3_idx_ = current - begin;
2970b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      headers->whitespace_4_idx_ = current - begin;
298183ff98f425d470c2a0276880aaf43496c9dad14Argyrios Kyrtzidis      // FAILED_TO_FIND_WS_AFTER_REQUEST_METHOD   for request
299fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose      // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION for response
300fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose      *error_code =
301fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose        static_cast<BalsaFrameEnums::ErrorCode>(
3020b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks            BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION +
303fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose            is_request);
304fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose      if (!is_request) {  // FAILED_TO_FIND_WS_AFTER_RESPONSE_VERSION
3058bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        return false;
3066bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks      }
3076bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks      goto output_exhausted;
3086bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    }
3096bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  } while (*current > ' ');
3106bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  // whitespace_2_idx_
3110b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  headers->whitespace_2_idx_ = current - begin;
3126bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  do {
3136bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    ++current;
3140b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    // Note that due to the loop which consumes all of the whitespace
3156bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    // at the end of the line, current can never == end while in this function.
3166bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  } while (*current <= ' ');
3176bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  // non_whitespace_2_idx_
3186bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  headers->non_whitespace_2_idx_ = current - begin;
3196bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks  do {
3208bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    ++current;
3216bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks    if (current == end) {
3220b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      headers->whitespace_3_idx_ = current - begin;
3230b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      headers->non_whitespace_3_idx_ = current - begin;
3246bd528b9d703fdea51053719d9c53504a61a6bd7Anna Zaks      headers->whitespace_4_idx_ = current - begin;
32577d7ef8d8a80ccb2ab3d25c80810571e3ab14ee4Ted Kremenek      // FAILED_TO_FIND_START_OF_REQUEST_REQUEST_URI for request
3260b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      // FAILED_TO_FIND_START_OF_RESPONSE_STATUSCODE for response
3270b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      *error_code =
3280b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks        static_cast<BalsaFrameEnums::ErrorCode>(
3290b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks            BalsaFrameEnums::FAILED_TO_FIND_WS_AFTER_RESPONSE_STATUSCODE
3300b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks                                 + is_request);
3310b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks      goto output_exhausted;
3320b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    }
3330b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  } while (*current > ' ');
3340b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // whitespace_3_idx_
3350b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  headers->whitespace_3_idx_ = current - begin;
3360b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  do {
3371eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    ++current;
3380b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    // Note that due to the loop which consumes all of the whitespace
3390b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks    // at the end of the line, current can never == end while in this function.
3400b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  } while (*current <= ' ');
3410b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // non_whitespace_3_idx_
3420b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  headers->non_whitespace_3_idx_ = current - begin;
3430b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  headers->whitespace_4_idx_ = end - begin;
3440b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks
3450b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks output_exhausted:
3460b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // Note that we don't fail the parse immediately when parsing of the
3470b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // firstline fails.  Depending on the protocol type, we may want to accept
348dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks  // a firstline with only one or two elements, e.g., for HTTP/0.9:
3490b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  //   GET\r\n
3500b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks  // or
351dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks  //   GET /\r\n
3521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // should be parsed without issue (though the visitor should know that
353dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks  // parsing the entire line was not exactly as it should be).
354dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks  //
355ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  // Eventually, these errors may be removed alltogether, as the visitor can
356ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  // detect them on its own by examining the size of the various fields.
357dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks  // headers->set_first_line(non_whitespace_1_idx_, current);
358dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks
3591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  if (is_request) {
360e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    if ((headers->whitespace_3_idx_ - headers->non_whitespace_2_idx_) >
361846d4e923bf11bcdc2816758aafa331795f29230Ted Kremenek        max_request_uri_length) {
362846d4e923bf11bcdc2816758aafa331795f29230Ted Kremenek      // For requests, we need at least the method.  We could assume that a
3630a3ed3143b00f237decb1288c1ff574ae09eba4eTed Kremenek      // blank URI means "/".  If version isn't stated, it should be assumed
364e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      // to be HTTP/0.9 by the visitor.
365e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      *error_code = BalsaFrameEnums::REQUEST_URI_TOO_LONG;
366d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis      return false;
367056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks    }
368ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  } else {
369ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    headers->parsed_response_code_ = 0;
3700a3ed3143b00f237decb1288c1ff574ae09eba4eTed Kremenek    {
371cbb67480094b3bcb5b715acd827cbad55e2a204cSean Hunt      const char* parsed_response_code_current =
372056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks        begin + headers->non_whitespace_2_idx_;
373056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks      const char* parsed_response_code_end = begin + headers->whitespace_3_idx_;
374056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks      const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
375056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks
376d074441e027471a914cbb909a7aad1d43224950fZhongxing Xu      // Convert a string of [0-9]* into an int.
377d074441e027471a914cbb909a7aad1d43224950fZhongxing Xu      // Note that this allows for the conversion of response codes which
378056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks      // are outside the bounds of normal HTTP response codes (no checking
3799dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu      // is done to ensure that these are valid-- they're merely parsed)!
38000eb3f9c5b33e3d99aee1f8b75dd9c9678fdd66bFrancois Pichet      while (parsed_response_code_current < parsed_response_code_end) {
3819dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu        if (*parsed_response_code_current < '0' ||
3829dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu            *parsed_response_code_current > '9') {
383e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen          *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
384e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen          return false;
3859dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu        }
386e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen        size_t status_code_x_10 = headers->parsed_response_code_ * 10;
3879dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu        uint8 c = *parsed_response_code_current - '0';
388e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen        if ((headers->parsed_response_code_ > kMaxDiv10) ||
389e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen            (std::numeric_limits<size_t>::max() - status_code_x_10) < c) {
390e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen          // overflow.
3919dc84c9455df2a77195147d0210c915dc1775a88Zhongxing Xu          *error_code = BalsaFrameEnums::FAILED_CONVERTING_STATUS_CODE_TO_INT;
392e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen          return false;
393e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen        }
394e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen        headers->parsed_response_code_ = status_code_x_10 + c;
395e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen        ++parsed_response_code_current;
396e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen      }
397056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks    }
398056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks  }
399056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks  return true;
400056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks}
401056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks
402056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// begin - beginning of the firstline
403056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// end - end of the firstline
404056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks//
405056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// A precondition for this function is that there is non-whitespace between
406056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// [begin, end). If this precondition is not met, the function will not perform
407056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// as expected (and bad things may happen, and it will eat your first, second,
408d074441e027471a914cbb909a7aad1d43224950fZhongxing Xu// and third unborn children!).
409056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks//
410056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// Another precondition for this function is that [begin, end) includes
411dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks// at most one newline, which must be at the end of the line.
412dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaksvoid BalsaFrame::ProcessFirstLine(const char* begin, const char* end) {
413dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks  BalsaFrameEnums::ErrorCode previous_error = last_error_;
4149c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu  if (!ParseHTTPFirstLine(begin,
4159c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu                          end,
416d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis                          is_request_,
417ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                          max_request_uri_length_,
418ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                          headers_,
4193c0349e87cdbd7316d06d2411d86ee1086e717a5Ted Kremenek                          &last_error_)) {
4204ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu    parse_state_ = BalsaFrameEnums::PARSE_ERROR;
421056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks    visitor_->HandleHeaderError(this);
4224ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu    return;
4234ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu  }
424056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks  if (previous_error != last_error_) {
4254ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu    visitor_->HandleHeaderWarning(this);
4264ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu  }
427056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks
4284ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu  if (is_request_) {
4294ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu    int version_length =
430056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks        headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_;
4314ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu    visitor_->ProcessRequestFirstLine(
4324ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu        begin + headers_->non_whitespace_1_idx_,
4334ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu        headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
4344ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu        begin + headers_->non_whitespace_1_idx_,
435ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
436dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks        begin + headers_->non_whitespace_2_idx_,
437dd7ddf2b2296f95e7591ca3f9791f0eb9a15ee42Anna Zaks        headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
4384ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu        begin + headers_->non_whitespace_3_idx_,
4394ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu        version_length);
440056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks    if (version_length == 0)
441056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks      parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
442056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks  } else {
4438bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    visitor_->ProcessResponseFirstLine(
444056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks        begin + headers_->non_whitespace_1_idx_,
4452210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        headers_->whitespace_4_idx_ - headers_->non_whitespace_1_idx_,
4462210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        begin + headers_->non_whitespace_1_idx_,
4472210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        headers_->whitespace_2_idx_ - headers_->non_whitespace_1_idx_,
4482210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        begin + headers_->non_whitespace_2_idx_,
4492210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        headers_->whitespace_3_idx_ - headers_->non_whitespace_2_idx_,
4502210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        begin + headers_->non_whitespace_3_idx_,
4512210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu        headers_->whitespace_4_idx_ - headers_->non_whitespace_3_idx_);
4522210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu  }
4532210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu}
454b13453bd8a91f331d0910ca95ad52aa41b52f648Zhongxing Xu
455056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// 'stream_begin' points to the first character of the headers buffer.
456b13453bd8a91f331d0910ca95ad52aa41b52f648Zhongxing Xu// 'line_begin' points to the first character of the line.
4572210490e6d099b7a5b4f68f44a136e4dcf3cdea2Zhongxing Xu// 'current' points to a char which is ':'.
458056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks// 'line_end' points to the position of '\n' + 1.
4594ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu// 'line_begin' points to the position of first character of line.
4604ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xuvoid BalsaFrame::CleanUpKeyValueWhitespace(
461d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis    const char* stream_begin,
462056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks    const char* line_begin,
4634ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu    const char* current,
464d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis    const char* line_end,
465056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks    HeaderLineDescription* current_header_line) {
4664ffcb9974c6b7142c4a1483abfcb1f88b6371c45Zhongxing Xu  const char* colon_loc = current;
467d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis  DCHECK_LT(colon_loc, line_end);
468056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks  DCHECK_EQ(':', *colon_loc);
469056c4b46335a3bd2612414735d5749ee159c0165Anna Zaks  DCHECK_EQ(':', *current);
4709c6cd67ea416bace666d614c84d5531124287653Zhongxing Xu  DCHECK_GE(' ', *line_end)
4719c378f705405d37f49795d5e915989de774fe11fTed Kremenek    << "\"" << std::string(line_begin, line_end) << "\"";
4726889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks
4730bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek  // TODO(fenix): Investigate whether or not the bounds tests in the
4740bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek  // while loops here are redundant, and if so, remove them.
4750bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek  --current;
4766889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks  while (current > line_begin && *current <= ' ') --current;
4776889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks  current += (current != colon_loc);
4780bed8a12f2878d3cd94fb8bdba55b593d92dd11aTed Kremenek  current_header_line->key_end_idx = current - stream_begin;
479f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall
480f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall  current = colon_loc;
481892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek  DCHECK_EQ(':', *current);
482f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall  ++current;
483e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  while (current < line_end && *current <= ' ') ++current;
484e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  current_header_line->value_begin_idx = current - stream_begin;
485e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek
4861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  DCHECK_GE(current_header_line->key_end_idx,
487ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks            current_header_line->first_char_idx);
488e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  DCHECK_GE(current_header_line->value_begin_idx,
4891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            current_header_line->key_end_idx);
490e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  DCHECK_GE(current_header_line->last_char_idx,
491f85e193739c953358c865005855253af4f68a497John McCall            current_header_line->value_begin_idx);
492f85e193739c953358c865005855253af4f68a497John McCall}
4931b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek
4941b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenekinline void BalsaFrame::FindColonsAndParseIntoKeyValue() {
4951b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek  DCHECK(!lines_.empty());
4961b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek  const char* stream_begin = headers_->OriginalHeaderStreamBegin();
4979be88403e965cc49af76c9d33d818781d44b333eFrancois Pichet  // The last line is always just a newline (and is uninteresting).
498c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek  const Lines::size_type lines_size_m1 = lines_.size() - 1;
499ed8abf18329df67b0abcbb3a10458bd8c1d2a595Douglas Gregor#if __SSE2__
5001b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek  const __v16qi colons = { ':', ':', ':', ':', ':', ':', ':', ':',
5011b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek                           ':', ':', ':', ':', ':', ':', ':', ':'};
5026ad6f2848d7652ab2991286eb48be440d3493b28Francois Pichet  const char* header_lines_end_m16 = headers_->OriginalHeaderStreamEnd() - 16;
5034ca8ac2e61c37ddadf37024af86f3e1019af8532Douglas Gregor#endif  // __SSE2__
50421ff2e516b0e0bc8c1dbf965cb3d44bac3c64330John Wiegley  const char* current = stream_begin + lines_[1].first;
505552622067dc45013d240f73952fece703f5e63bdJohn Wiegley  // This code is a bit more subtle than it may appear at first glance.
5061b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek  // This code looks for a colon in the current line... but it also looks
507c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek  // beyond the current line. If there is no colon in the current line, then
5086b219d082434394c1ac401390ec1d1967727815aSebastian Redl  // for each subsequent line (until the colon which -has- been found is
509be230c36e32142cbdcdbe9c97511d097beeecbabDouglas Gregor  // associated with a line), no searching for a colon will be performed. In
510c7793c73ba8a343de3f2552d984851985a46f159Douglas Gregor  // this way, we minimize the amount of bytes we have scanned for a colon.
51128bbe4b8acc338476fe0825769b41fb32b423c72John Wiegley  for (Lines::size_type i = 1; i < lines_size_m1;) {
51228bbe4b8acc338476fe0825769b41fb32b423c72John Wiegley    const char* line_begin = stream_begin + lines_[i].first;
51301d08018b7cf5ce1601707cfd7a84d22015fc04eDouglas Gregor
514ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    // Here we handle possible continuations.  Note that we do not replace
515337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek    // the '\n' in the line before a continuation (at least, as of now),
516337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek    // which implies that any code which looks for a value must deal with
517ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    // "\r\n", etc -within- the line (and not just at the end of it).
518c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek    for (++i; i < lines_size_m1; ++i) {
519c768a0c46e6c064c3281d663777ee95aea8652eeTed Kremenek      const char c = *(stream_begin + lines_[i].first);
5205fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek      if (c > ' ') {
5215fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek        // Not a continuation, so stop.  Note that if the 'original' i = 1,
5225fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek        // and the next line is not a continuation, we'll end up with i = 2
52391a5755ad73c5dc1dfb167e448fdd74e75a6df56John McCall        // when we break. This handles the incrementing of i for the outer
524ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        // loop.
5255fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek        break;
5265fe98728dca1f3a7a378ce1a21984a0f8a0c0b8bTed Kremenek      }
527f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall    }
528f6a1648197562e0b133440d612d9af297d0a86ccJohn McCall    const char* line_end = stream_begin + lines_[i - 1].second;
529f111d935722ed488144600cea5ed03a6b5069e8fPeter Collingbourne    DCHECK_LT(line_begin - stream_begin, line_end - stream_begin);
530f111d935722ed488144600cea5ed03a6b5069e8fPeter Collingbourne
5311b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // We cleanup the whitespace at the end of the line before doing anything
5321b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // else of interest as it allows us to do nothing when irregularly formatted
5331b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // headers are parsed (e.g. those with only keys, only values, or no colon).
5341b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    //
5351b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // We're guaranteed to have *line_end > ' ' while line_end >= line_begin.
5361b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    --line_end;
53746eaf7789a1059a7b42b7dbd183150c72df5738fTed Kremenek    DCHECK_EQ('\n', *line_end)
5381b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek      << "\"" << std::string(line_begin, line_end) << "\"";
5391b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    while (*line_end <= ' ' && line_end > line_begin) {
540d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek      --line_end;
5411b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    }
542d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek    ++line_end;
5431b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    DCHECK_GE(' ', *line_end);
5441b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    DCHECK_LT(line_begin, line_end);
545534986f2b21e6050bf00163cd6423fd92155a6edRichard Smith
5461b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // We use '0' for the block idx, because we're always writing to the first
5471b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // block from the framer (we do this because the framer requires that the
548d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek    // entire header sequence be in a contiguous buffer).
549d40066b0fb883839a9100e5455e33190b9b8abacTed Kremenek    headers_->header_lines_.push_back(
550ba0513de93d2fab6db5ab30b6927209fcc883078Douglas Gregor        HeaderLineDescription(line_begin - stream_begin,
5511b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek                              line_end - stream_begin,
5521b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek                              line_end - stream_begin,
5538ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek                              line_end - stream_begin,
5548f08426e6f54ed20b959018f24dbea106a00b4adJordy Rose                              0));
5558bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    if (current >= line_end) {
5565eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
5575eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      visitor_->HandleHeaderWarning(this);
558ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      // Then the next colon will not be found within this header line-- time
5598ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek      // to try again with another header-line.
5608ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek      continue;
5618ad9cbc518a603176462f1fa1efe389023590082Ted Kremenek    } else if (current < line_begin) {
5624beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek      // When this condition is true, the last detected colon was part of a
563ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      // previous line.  We reset to the beginning of the line as we don't care
5644beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek      // about the presence of any colon before the beginning of the current
565ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      // line.
5664beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek      current = line_begin;
5674beaa9f51b2da57c64740cef2bd1c2fdb0c325d5Ted Kremenek    }
5681a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if __SSE2__
5691a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    while (current < header_lines_end_m16) {
5701a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      __m128i header_bytes =
5711a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek        _mm_loadu_si128(reinterpret_cast<const __m128i *>(current));
57214429b918bd2f4cb52abc75546a7fe37142054caArgyrios Kyrtzidis      __m128i colon_cmp =
573f8b5aae41e46f94fe90ed5f1ee98f36f0aa59dc9Ted Kremenek        _mm_cmpeq_epi8(header_bytes, reinterpret_cast<__m128i>(colons));
57414429b918bd2f4cb52abc75546a7fe37142054caArgyrios Kyrtzidis      int colon_msk = _mm_movemask_epi8(colon_cmp);
57514429b918bd2f4cb52abc75546a7fe37142054caArgyrios Kyrtzidis      if (colon_msk == 0) {
576f85e193739c953358c865005855253af4f68a497John McCall        current += 16;
5778bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        continue;
578f85e193739c953358c865005855253af4f68a497John McCall      }
579f85e193739c953358c865005855253af4f68a497John McCall      current += (ffs(colon_msk) - 1);
5805eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      if (current > line_end) {
5815eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        break;
582f85e193739c953358c865005855253af4f68a497John McCall      }
583f85e193739c953358c865005855253af4f68a497John McCall      goto found_colon;
584f85e193739c953358c865005855253af4f68a497John McCall    }
585ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks#endif  // __SSE2__
586e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen    for (; current < line_end; ++current) {
587f85e193739c953358c865005855253af4f68a497John McCall      if (*current != ':') {
588f85e193739c953358c865005855253af4f68a497John McCall        continue;
5891b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek      }
5901b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek      goto found_colon;
5911b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    }
5921b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // If we've gotten to here, then there was no colon
5931b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // in the line. The arguments we passed into the construction
5941b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // for the HeaderLineDescription object should be OK-- it assumes
5951b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // that the entire content is 'key' by default (which is true, as
596f85e193739c953358c865005855253af4f68a497John McCall    // there was no colon, there can be no value). Note that this is a
5971b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    // construct which is technically not allowed by the spec.
5981b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    last_error_ = BalsaFrameEnums::HEADER_MISSING_COLON;
5991b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    visitor_->HandleHeaderWarning(this);
6001b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    continue;
601eb382ec1507cf2c8c12d7443d0b67c076223aec6Patrick Beard found_colon:
6021b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    DCHECK_EQ(*current, ':');
6031b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    DCHECK_LE(current - stream_begin, line_end - stream_begin);
6041b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    DCHECK_LE(stream_begin - stream_begin, current - stream_begin);
6051b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek
606e08ce650a2b02410eddd1f60a4aa6b3d4be71e73Peter Collingbourne    HeaderLineDescription& current_header_line = headers_->header_lines_.back();
60756ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall    current_header_line.key_end_idx = current - stream_begin;
60861eee0ca33b29e102f11bab77c8b74cc00e2392bTanya Lattner    current_header_line.value_begin_idx = current_header_line.key_end_idx;
609276b061970939293f1abaf694bd3ef05b2cbda79Eli Friedman    if (current < line_end) {
610337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek      ++current_header_line.key_end_idx;
6111b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek
612337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek      CleanUpKeyValueWhitespace(stream_begin,
613337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek                                line_begin,
614337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek                                current,
615337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek                                line_end,
616337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek                                &current_header_line);
6171b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek    }
6181b49d762e9658b6b6d1b677dca005324a7b1126dTed Kremenek  }
619bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xu}
620bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xu
621bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xuvoid BalsaFrame::ProcessContentLengthLine(
622477323d58a0de352c6a61e08b5a83127c4adc904Zhongxing Xu    HeaderLines::size_type line_idx,
6231a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    BalsaHeadersEnums::ContentLengthStatus* status,
624bc9ad74a13e83303a3a5251f8bacbbca17341c17Zhongxing Xu    size_t* length) {
625f901a7de97f46ba2b1ff153f9fb83d00dc37cfcfDouglas Gregor  const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
626e739a29c62c67eaec0af5c4d5c75f9e8f11228bdTed Kremenek  const char* stream_begin = headers_->OriginalHeaderStreamBegin();
627e739a29c62c67eaec0af5c4d5c75f9e8f11228bdTed Kremenek  const char* line_end = stream_begin + header_line.last_char_idx;
628cc2c4b293d8590346f26b7ecc16d299226b8794fTed Kremenek  const char* value_begin = (stream_begin + header_line.value_begin_idx);
629bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek
630bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek  if (value_begin >= line_end) {
631bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek    // There is no non-whitespace value data.
632bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek#if DEBUGFRAMER
633bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek      LOG(INFO) << "invalid content-length -- no non-whitespace value data";
634bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek#endif
635e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
636bdd4c848349d4091d66b052efa453e6d69a77e36Ted Kremenek    return;
6371eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  }
6381a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek
6391a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek  *length = 0;
6401a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek  while (value_begin < line_end) {
6411a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    if (*value_begin < '0' || *value_begin > '9') {
6421a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      // bad! content-length found, and couldn't parse all of it!
6431a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      *status = BalsaHeadersEnums::INVALID_CONTENT_LENGTH;
6441a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if DEBUGFRAMER
6451a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      LOG(INFO) << "invalid content-length - non numeric character detected";
6461a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#endif  // DEBUGFRAMER
6471a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      return;
6481a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    }
6491a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    const size_t kMaxDiv10 = std::numeric_limits<size_t>::max() / 10;
6501a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    size_t length_x_10 = *length * 10;
6511a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    const unsigned char c = *value_begin - '0';
6521a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    if (*length > kMaxDiv10 ||
6531a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek        (std::numeric_limits<size_t>::max() - length_x_10) < c) {
6541a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      *status = BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW;
6551a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if DEBUGFRAMER
6561a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      LOG(INFO) << "content-length overflow";
6571a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#endif  // DEBUGFRAMER
6581a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek      return;
6591a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    }
6601a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    *length = length_x_10 + c;
6611a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek    ++value_begin;
6621a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek  }
6631a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#if DEBUGFRAMER
6641a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek  LOG(INFO) << "content_length parsed: " << *length;
6651a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek#endif  // DEBUGFRAMER
6661a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek  *status = BalsaHeadersEnums::VALID_CONTENT_LENGTH;
6671a45a5ff5d495cb6cd9a3d4d06317af79c0f634dTed Kremenek}
668540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek
669ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksvoid BalsaFrame::ProcessTransferEncodingLine(HeaderLines::size_type line_idx) {
670892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek  const HeaderLineDescription& header_line = headers_->header_lines_[line_idx];
671ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  const char* stream_begin = headers_->OriginalHeaderStreamBegin();
672540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek  const char* line_end = stream_begin + header_line.last_char_idx;
6731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  const char* value_begin = stream_begin + header_line.value_begin_idx;
674e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  size_t value_length = line_end - value_begin;
675ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
676e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  if ((value_length == 7) &&
677ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      !strncasecmp(value_begin, "chunked", 7)) {
678e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    headers_->transfer_encoding_is_chunked_ = true;
679b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek  } else if ((value_length == 8) &&
680c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek      !strncasecmp(value_begin, "identity", 8)) {
681ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    headers_->transfer_encoding_is_chunked_ = false;
682c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek  } else {
683ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    last_error_ = BalsaFrameEnums::UNKNOWN_TRANSFER_ENCODING;
684c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek    parse_state_ = BalsaFrameEnums::PARSE_ERROR;
685c95ad9ff6e574aecdd759542d5578bc65d586d93Ted Kremenek    visitor_->HandleHeaderError(this);
686e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    return;
68703509aea098772644bf4662dc1c88634818ceeccZhongxing Xu  }
688e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek}
689ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
690e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremeneknamespace {
691ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksbool SplitStringPiece(base::StringPiece original, char delim,
692e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek                      base::StringPiece* before, base::StringPiece* after) {
693e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  const char* p = original.data();
6942de56d1d0c3a504ad1529de2677628bdfbb95cd4John McCall  const char* end = p + original.size();
6958bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
696ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  while (p != end) {
6975eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    if (*p == delim) {
6985eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      ++p;
6995eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    } else {
700e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      const char* start = p;
701e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      while (++p != end && *p != delim) {
70206fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek        // Skip to the next occurence of the delimiter.
703ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      }
704ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      *before = base::StringPiece(start, p - start);
705b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek      if (p != end)
706bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu        *after = base::StringPiece(p + 1, end - (p + 1));
707031ccc0555a82afc2e8afe29e19dd57ff204e2deZhongxing Xu      else
708892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek        *after = base::StringPiece("");
7099c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek      StringPieceUtils::RemoveWhitespaceContext(before);
71048af2a9c1ed3259512f2d1431720add1fbe8fb5fTed Kremenek      StringPieceUtils::RemoveWhitespaceContext(after);
71148af2a9c1ed3259512f2d1431720add1fbe8fb5fTed Kremenek      return true;
712892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek    }
71348af2a9c1ed3259512f2d1431720add1fbe8fb5fTed Kremenek  }
714ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
715e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  *before = original;
716e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  *after = "";
71706fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek  return false;
718b277159055933e610bbc80262b600d3ad7e0595cTed Kremenek}
719b277159055933e610bbc80262b600d3ad7e0595cTed Kremenek
7209fcce65e7e1307b5b8da9be13e4092d6bb94dc1dRichard Smith// TODO(phython): Fix this function to properly deal with quoted values.
7219fcce65e7e1307b5b8da9be13e4092d6bb94dc1dRichard Smith// E.g. ";;foo", "\";;\"", or \"aa;
722ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks// The last example, the semi-colon is a separator between extensions.
723b277159055933e610bbc80262b600d3ad7e0595cTed Kremenekvoid ProcessChunkExtensionsManual(base::StringPiece all_extensions,
724ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                                  BalsaHeaders* extensions) {
72506fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek  base::StringPiece extension;
726e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  base::StringPiece remaining;
727337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek  StringPieceUtils::RemoveWhitespaceContext(&all_extensions);
728337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek  SplitStringPiece(all_extensions, ';', &extension, &remaining);
729337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek  while (!extension.empty()) {
730337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek    base::StringPiece key;
731337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek    base::StringPiece value;
732337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek    SplitStringPiece(extension, '=', &key, &value);
733337e4dbc6859589b8878146a88bebf754e916702Ted Kremenek    if (!value.empty()) {
73406fb99fb403bff1651429923f666a2ebe2b1522fTed Kremenek      // Strip quotation marks if they exist.
735744f1cd66bb6747ea71fbf1172698e7bf35ec88dTed Kremenek      if (!value.empty() && value[0] == '"')
7367ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu        value.remove_prefix(1);
7377ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu      if (!value.empty() && value[value.length() - 1] == '"')
7387ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu        value.remove_suffix(1);
7397ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu    }
740ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
741892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek    extensions->AppendHeader(key, value);
742ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
7437ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu    StringPieceUtils::RemoveWhitespaceContext(&remaining);
7447ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu    SplitStringPiece(remaining, ';', &extension, &remaining);
7457ce351db56fbce162a3b650518ce05b5c61ebf36Zhongxing Xu  }
746856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu}
747ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
74803509aea098772644bf4662dc1c88634818ceeccZhongxing Xu}  // anonymous namespace
749856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu
750ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksvoid BalsaFrame::ProcessChunkExtensions(const char* input, size_t size,
751856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu                                        BalsaHeaders* extensions) {
752856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu  ProcessChunkExtensionsManual(base::StringPiece(input, size), extensions);
753856c6bcaea56e05255e9f3997ddd56b5c18a14f0Zhongxing Xu}
7546b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu
755ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaksvoid BalsaFrame::ProcessHeaderLines() {
75603509aea098772644bf4662dc1c88634818ceeccZhongxing Xu  HeaderLines::size_type content_length_idx = 0;
7576b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu  HeaderLines::size_type transfer_encoding_idx = 0;
758ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
7596b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu  DCHECK(!lines_.empty());
7606b8513829895e56a7b97e787ea74520bc626512eZhongxing Xu#if DEBUGFRAMER
761e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  LOG(INFO) << "******@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@**********\n";
762e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek#endif  // DEBUGFRAMER
7631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
764e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  // There is no need to attempt to process headers if no header lines exist.
765ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  // There are at least two lines in the message which are not header lines.
7669c378f705405d37f49795d5e915989de774fe11fTed Kremenek  // These two non-header lines are the first line of the message, and the
767e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  // last line of the message (which is an empty line).
768ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  // Thus, we test to see if we have more than two lines total before attempting
769e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  // to parse any header lines.
770e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  if (lines_.size() > 2) {
7711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    const char* stream_begin = headers_->OriginalHeaderStreamBegin();
772e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek
773ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    // Then, for the rest of the header data, we parse these into key-value
774892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek    // pairs.
775ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    FindColonsAndParseIntoKeyValue();
776e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek    // At this point, we've parsed all of the headers.  Time to look for those
777f22679e3e5d5f5754931952e58112b4c863a4137Zhongxing Xu    // headers which we require for framing.
778f22679e3e5d5f5754931952e58112b4c863a4137Zhongxing Xu    const HeaderLines::size_type
779ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      header_lines_size = headers_->header_lines_.size();
780892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek    for (HeaderLines::size_type i = 0; i < header_lines_size; ++i) {
781ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      const HeaderLineDescription& current_header_line =
782f22679e3e5d5f5754931952e58112b4c863a4137Zhongxing Xu        headers_->header_lines_[i];
7831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      const char* key_begin =
78456ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall        (stream_begin + current_header_line.first_char_idx);
785e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      const char* key_end = (stream_begin + current_header_line.key_end_idx);
786ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      const size_t key_len = key_end - key_begin;
78756ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall      const char c = *key_begin;
78856ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall#if DEBUGFRAMER
78956ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall      LOG(INFO) << "[" << i << "]: " << std::string(key_begin, key_len)
790ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                << " c: '" << c << "' key_len: " << key_len;
791e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek#endif  // DEBUGFRAMER
792e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek      // If a header begins with either lowercase or uppercase 'c' or 't', then
7931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      // the header may be one of content-length, connection, content-encoding
794bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu      // or transfer-encoding. These headers are special, as they change the way
795ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      // that the message is framed, and so the framer is required to search
796bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu      // for them.
797ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
798bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu
799bb141217871e93767aa3f2de1b9946fa6d37066aZhongxing Xu      if (c == 'c' || c == 'C') {
800892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek        if ((key_len == kContentLengthSize) &&
801ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks            0 == strncasecmp(key_begin, kContentLength, kContentLengthSize)) {
802892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek          BalsaHeadersEnums::ContentLengthStatus content_length_status =
803892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            BalsaHeadersEnums::NO_CONTENT_LENGTH;
804ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks          size_t length = 0;
805e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek          ProcessContentLengthLine(i, &content_length_status, &length);
806892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek          if (content_length_idx != 0) {  // then we've already seen one!
8071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            if ((headers_->content_length_status_ != content_length_status) ||
808e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek                ((headers_->content_length_status_ ==
809ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks                  BalsaHeadersEnums::VALID_CONTENT_LENGTH) &&
810e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek                 length != headers_->content_length_)) {
811ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks              last_error_ = BalsaFrameEnums::MULTIPLE_CONTENT_LENGTH_KEYS;
812e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek              parse_state_ = BalsaFrameEnums::PARSE_ERROR;
8131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump              visitor_->HandleHeaderError(this);
8140835a3cdeefe714b4959d31127ea155e56393125Argyrios Kyrtzidis              return;
8150d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu            }
8160d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu            continue;
8170d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu          } else {
8180d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu            content_length_idx = i + 1;
8190d9d736c49b51691ced96759ec99399824e2a602Zhongxing Xu            headers_->content_length_status_ = content_length_status;
820f85e193739c953358c865005855253af4f68a497John McCall            headers_->content_length_ = length;
821f85e193739c953358c865005855253af4f68a497John McCall            content_length_remaining_ = length;
822ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks          }
8239c378f705405d37f49795d5e915989de774fe11fTed Kremenek
824f85e193739c953358c865005855253af4f68a497John McCall        }
825f85e193739c953358c865005855253af4f68a497John McCall      } else if (c == 't' || c == 'T') {
826f85e193739c953358c865005855253af4f68a497John McCall        if ((key_len == kTransferEncodingSize) &&
827f85e193739c953358c865005855253af4f68a497John McCall            0 == strncasecmp(key_begin, kTransferEncoding,
828f85e193739c953358c865005855253af4f68a497John McCall                             kTransferEncodingSize)) {
829f85e193739c953358c865005855253af4f68a497John McCall          if (transfer_encoding_idx != 0) {
830f85e193739c953358c865005855253af4f68a497John McCall            last_error_ = BalsaFrameEnums::MULTIPLE_TRANSFER_ENCODING_KEYS;
831f85e193739c953358c865005855253af4f68a497John McCall            parse_state_ = BalsaFrameEnums::PARSE_ERROR;
832f85e193739c953358c865005855253af4f68a497John McCall            visitor_->HandleHeaderError(this);
833f85e193739c953358c865005855253af4f68a497John McCall            return;
834f85e193739c953358c865005855253af4f68a497John McCall          }
835f85e193739c953358c865005855253af4f68a497John McCall          transfer_encoding_idx = i + 1;
836f85e193739c953358c865005855253af4f68a497John McCall        }
837ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      } else if (i == 0 && (key_len == 0 || c == ' ')) {
838e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        last_error_ = BalsaFrameEnums::INVALID_HEADER_FORMAT;
839e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        parse_state_ = BalsaFrameEnums::PARSE_ERROR;
840b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek        visitor_->HandleHeaderError(this);
84103e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor        return;
842ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      }
84303e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor    }
84403e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor    if (headers_->transfer_encoding_is_chunked_) {
845e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen      headers_->content_length_ = 0;
846e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen      headers_->content_length_status_ = BalsaHeadersEnums::NO_CONTENT_LENGTH;
84703e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor      content_length_remaining_ = 0;
848e711d7e7875920fee4180a26bfc67d67f0f71a2cErik Verbruggen    }
849ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    if (transfer_encoding_idx != 0) {
85003e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor      ProcessTransferEncodingLine(transfer_encoding_idx - 1);
85103e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor    }
85203e80030515c800d1ab44125b9052dfffd1bd04cDouglas Gregor  }
853c4f8706b6539e06a5de153bd72850bb2e0a71456Zhongxing Xu}
854ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
855c4f8706b6539e06a5de153bd72850bb2e0a71456Zhongxing Xuvoid BalsaFrame::AssignParseStateAfterHeadersHaveBeenParsed() {
856ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  // For responses, can't have a body if the request was a HEAD, or if it is
857c4f8706b6539e06a5de153bd72850bb2e0a71456Zhongxing Xu  // one of these response-codes.  rfc2616 section 4.3
8581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
85997ed4f68f5dba3e21e7a490ef0f9ffd3bfead7f8Ted Kremenek  if (is_request_ ||
860ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      !(request_was_head_ ||
861892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek        (headers_->parsed_response_code_ >= 100 &&
862ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks         headers_->parsed_response_code_ < 200) ||
863469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek        (headers_->parsed_response_code_ == 204) ||
864ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        (headers_->parsed_response_code_ == 304))) {
86597ed4f68f5dba3e21e7a490ef0f9ffd3bfead7f8Ted Kremenek    // Then we can have a body.
866ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    if (headers_->transfer_encoding_is_chunked_) {
867892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek      // Note that
868ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      // if ( Transfer-Encoding: chunked &&  Content-length: )
86997ed4f68f5dba3e21e7a490ef0f9ffd3bfead7f8Ted Kremenek      // then Transfer-Encoding: chunked trumps.
870af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek      // This is as specified in the spec.
871af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek      // rfc2616 section 4.4.3
872ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
873af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek    } else {
874ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks      // Errors parsing content-length definitely can cause
875af3374187c47acea45706eab6744be6b1c66a856Ted Kremenek      // protocol errors/warnings
8761eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      switch (headers_->content_length_status_) {
877b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        // If we have a content-length, and it is parsed
878ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        // properly, there are two options.
879b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        // 1) zero content, in which case the message is done, and
880b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        // 2) nonzero content, in which case we have to
881b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        //    consume the body.
882b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        case BalsaHeadersEnums::VALID_CONTENT_LENGTH:
883b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          if (headers_->content_length_ == 0) {
884b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek            parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
885b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          } else {
886b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek            parse_state_ = BalsaFrameEnums::READING_CONTENT;
887b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          }
888b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          break;
889b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        case BalsaHeadersEnums::CONTENT_LENGTH_OVERFLOW:
890b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        case BalsaHeadersEnums::INVALID_CONTENT_LENGTH:
891b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          // If there were characters left-over after parsing the
892b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          // content length, we should flag an error and stop.
893b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          parse_state_ = BalsaFrameEnums::PARSE_ERROR;
894b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          last_error_ = BalsaFrameEnums::UNPARSABLE_CONTENT_LENGTH;
895b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          visitor_->HandleHeaderError(this);
896b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          break;
897b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          // We can have: no transfer-encoding, no content length, and no
898b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          // connection: close...
899b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          // Unfortunately, this case doesn't seem to be covered in the spec.
900b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek          // We'll assume that the safest thing to do here is what the google
901ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks          // binaries before 2008 already do, which is to assume that
902e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek          // everything until the connection is closed is body.
903b673a41c92aa276f2e37164d0747be1cfb0c402bTed Kremenek        case BalsaHeadersEnums::NO_CONTENT_LENGTH:
9041eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          if (is_request_) {
905bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek            base::StringPiece method = headers_->request_method();
906bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek            // POSTs and PUTs should have a detectable body length.  If they
907bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek            // do not we consider it an error.
908ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks            if ((method.size() == 4 &&
909bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek                 strncmp(method.data(), "POST", 4) == 0) ||
910bbfd07a0c94f659beaf74316029ef73769cefb81Ted Kremenek                (method.size() == 3 &&
9111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                 strncmp(method.data(), "PUT", 3) == 0)) {
9121b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek              parse_state_ = BalsaFrameEnums::PARSE_ERROR;
913ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks              last_error_ =
9141b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek                  BalsaFrameEnums::REQUIRED_BODY_BUT_NO_CONTENT_LENGTH;
915ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks              visitor_->HandleHeaderError(this);
9161b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek              break;
9171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            }
9188ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor            parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
919ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks          } else {
9208ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor            parse_state_ = BalsaFrameEnums::READING_UNTIL_CLOSE;
921ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks            last_error_ = BalsaFrameEnums::MAYBE_BODY_BUT_NO_CONTENT_LENGTH;
9228ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor            visitor_->HandleHeaderWarning(this);
9238ecdb65716cd7914ffb2eeee993fa9039fcd31e8Douglas Gregor          }
924f4e3cfbe8abd124be6341ef5d714819b4fbd9082Peter Collingbourne          break;
925ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks          // The COV_NF_... statements here provide hints to the apparatus
926f4e3cfbe8abd124be6341ef5d714819b4fbd9082Peter Collingbourne          // which computes coverage reports/ratios that this code is never
927f4e3cfbe8abd124be6341ef5d714819b4fbd9082Peter Collingbourne          // intended to be executed, and should technically be impossible.
928ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks          // COV_NF_START
929e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        default:
9301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          LOG(FATAL) << "Saw a content_length_status: "
931e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek           << headers_->content_length_status_ << " which is unknown.";
9329c378f705405d37f49795d5e915989de774fe11fTed Kremenek          // COV_NF_END
933a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek      }
934a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek    }
935a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek  }
936a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek}
937a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek
938a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremeneksize_t BalsaFrame::ProcessHeaders(const char* message_start,
939a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek                                  size_t message_length) {
9401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  const char* const original_message_start = message_start;
9419c378f705405d37f49795d5e915989de774fe11fTed Kremenek  const char* const message_end = message_start + message_length;
9428bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek  const char* message_current = message_start;
943ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  const char* checkpoint = message_start;
9445eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek
9455eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek  if (message_length == 0) {
9465eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    goto bottom;
947a3d1eb85853eae7b719f679b40923826b5e4b7dfTed Kremenek  }
948e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek
949e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek  while (message_current < message_end) {
9506987c7b74146b9658b1925c5981f8b0cd0672b55Zhongxing Xu    size_t base_idx = headers_->GetReadableBytesFromHeaderStream();
95172374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek
952ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    // Yes, we could use strchr (assuming null termination), or
95303509aea098772644bf4662dc1c88634818ceeccZhongxing Xu    // memchr, but as it turns out that is slower than this tight loop
954ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    // for the input that we see.
955031ccc0555a82afc2e8afe29e19dd57ff204e2deZhongxing Xu    if (!saw_non_newline_char_) {
956892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek      do {
9579c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek        const char c = *message_current;
95872374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek        if (c != '\r' && c != '\n') {
95972374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek          if (c <= ' ') {
960892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            parse_state_ = BalsaFrameEnums::PARSE_ERROR;
961ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks            last_error_ = BalsaFrameEnums::NO_REQUEST_LINE_IN_REQUEST;
9621b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek            visitor_->HandleHeaderError(this);
96372374594c5d9ade02451bc85cf9dfa5b0ea106e7Ted Kremenek            goto bottom;
9644b9c2d235fb9449e249d74f48ecfec601650de93John McCall          } else {
9654b9c2d235fb9449e249d74f48ecfec601650de93John McCall            saw_non_newline_char_ = true;
9664b9c2d235fb9449e249d74f48ecfec601650de93John McCall            checkpoint = message_start = message_current;
9678bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek            goto read_real_message;
9684b9c2d235fb9449e249d74f48ecfec601650de93John McCall          }
9694b9c2d235fb9449e249d74f48ecfec601650de93John McCall        }
9705eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        ++message_current;
9715eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      } while (message_current < message_end);
9725eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      goto bottom;  // this is necessary to skip 'last_char_was_slash_r' checks
9734b9c2d235fb9449e249d74f48ecfec601650de93John McCall    } else {
9744b9c2d235fb9449e249d74f48ecfec601650de93John McCall read_real_message:
9755eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      // Note that SSE2 can be enabled on certain piii platforms.
9765eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek#if __SSE2__
9775eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      {
9784b9c2d235fb9449e249d74f48ecfec601650de93John McCall        const char* const message_end_m16 = message_end - 16;
9794b9c2d235fb9449e249d74f48ecfec601650de93John McCall        __v16qi newlines = { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
9804b9c2d235fb9449e249d74f48ecfec601650de93John McCall                             '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' };
9814b9c2d235fb9449e249d74f48ecfec601650de93John McCall        while (message_current < message_end_m16) {
9821b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek          // What this does (using compiler intrinsics):
9831b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek          //
9841b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek          // Load 16 '\n's into an xmm register
9855903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          // Load 16 bytes of currennt message into an xmm register
9865903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          // Do byte-wise equals on those two xmm registers
9875903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          // Take the first bit of each byte, and put that into the first
9885903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          //   16 bits of a mask
9895903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          // If the mask is zero, no '\n' found. increment by 16 and try again
9905903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          // Else scan forward to find the first set bit.
9915903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          // Increment current by the index of the first set bit
9925903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          //   (ffs returns index of first set bit + 1)
9935903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          __m128i msg_bytes =
9945903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            _mm_loadu_si128(const_cast<__m128i *>(
9955903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks                    reinterpret_cast<const __m128i *>(message_current)));
9965903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          __m128i newline_cmp =
9975903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            _mm_cmpeq_epi8(msg_bytes, reinterpret_cast<__m128i>(newlines));
9985903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          int newline_msk = _mm_movemask_epi8(newline_cmp);
9995903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          if (newline_msk == 0) {
10005903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            message_current += 16;
10015903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            continue;
10025903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          }
10030b3ade86a1c60cf0c7b56aa238aff458eb7f5974Anna Zaks          message_current += (ffs(newline_msk) - 1);
10045903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          const size_t relative_idx = message_current - message_start;
10055903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          const size_t message_current_idx = 1 + base_idx + relative_idx;
10065903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          lines_.push_back(std::make_pair(last_slash_n_idx_,
10075903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks                                          message_current_idx));
10085903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          if (lines_.size() == 1) {
10095903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            headers_->WriteFromFramer(checkpoint,
10105903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks                                      1 + message_current - checkpoint);
1011253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks            checkpoint = message_current + 1;
10125903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            const char* begin = headers_->OriginalHeaderStreamBegin();
10135903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks#if DEBUGFRAMER
10145903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
10155903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          LOG(INFO) << "is_request_: " << is_request_;
10165903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks#endif
10175903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            ProcessFirstLine(begin, begin + lines_[0].second);
10185903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
10195903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks              goto process_lines;
10205903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
10215903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks              goto bottom;
10225903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          }
10235903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          const size_t chars_since_last_slash_n = (message_current_idx -
10245903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks                                                   last_slash_n_idx_);
10255903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          last_slash_n_idx_ = message_current_idx;
10265903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          if (chars_since_last_slash_n > 2) {
10275903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            // We have a slash-n, but the last slash n was
1028253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks            // more than 2 characters away from this. Thus, we know
1029253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks            // that this cannot be an end-of-header.
1030253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks            ++message_current;
1031253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks            continue;
1032253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks          }
10335903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          if ((chars_since_last_slash_n == 1) ||
10345903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks              (((message_current > message_start) &&
10355903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks                (*(message_current - 1) == '\r')) ||
10365903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks               (last_char_was_slash_r_))) {
10375903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            goto process_lines;
10385903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          }
10395903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          ++message_current;
10405903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks        }
10415903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks      }
1042c03a39e16762627b421247b12a2658be630a3300Anna Zaks#endif  // __SSE2__
1043253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks      while (message_current < message_end) {
1044253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks        if (*message_current != '\n') {
104527c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek          ++message_current;
104627c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek          continue;
1047c03a39e16762627b421247b12a2658be630a3300Anna Zaks        }
104827c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek        const size_t relative_idx = message_current - message_start;
1049c03a39e16762627b421247b12a2658be630a3300Anna Zaks        const size_t message_current_idx = 1 + base_idx + relative_idx;
1050ca804539d908d3a0e8c72a0df5f1f571d29490bbTed Kremenek        lines_.push_back(std::make_pair(last_slash_n_idx_,
1051253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks                                        message_current_idx));
1052253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks        if (lines_.size() == 1) {
1053749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks          headers_->WriteFromFramer(checkpoint,
1054749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks                                    1 + message_current - checkpoint);
1055749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks          checkpoint = message_current + 1;
10565903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          const char* begin = headers_->OriginalHeaderStreamBegin();
10573bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks#if DEBUGFRAMER
10585903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          LOG(INFO) << "First line " << std::string(begin, lines_[0].second);
10595903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          LOG(INFO) << "is_request_: " << is_request_;
10603bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks#endif
1061e62f048960645b79363408fdead53fec2a063c52Anna Zaks          ProcessFirstLine(begin, begin + lines_[0].second);
10623bbd8cd831788c506f2980293eb3c7e1b3ca2501Anna Zaks          if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ)
10635903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            goto process_lines;
10645903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks          else if (parse_state_ == BalsaFrameEnums::PARSE_ERROR)
10655903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks            goto bottom;
10665903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks        }
1067b47dbcbc12430fdf3e5a5b9f59cdec5480e89e75Anna Zaks        const size_t chars_since_last_slash_n = (message_current_idx -
1068253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks                                                 last_slash_n_idx_);
1069253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks        last_slash_n_idx_ = message_current_idx;
10705903a373db3d27794c90b25687e0dd6adb0e497dAnna Zaks        if (chars_since_last_slash_n > 2) {
1071749bbe6f5f23676244f12a0d41511c8e73516febAnna Zaks          // false positive.
1072253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks          ++message_current;
1073253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks          continue;
1074253955ca25c7e7049963b5db613c0cd15d66e4f8Anna Zaks        }
107527c54e57c4a012dcdf2b40cf985b70d0b9caa69eTed Kremenek        if ((chars_since_last_slash_n == 1) ||
1076e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek            (((message_current > message_start) &&
1077e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek              (*(message_current - 1) == '\r')) ||
1078e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek             (last_char_was_slash_r_))) {
1079e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek          goto process_lines;
1080e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        }
1081e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        ++message_current;
10828bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek      }
10835eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    }
10845eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    continue;
10855eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek process_lines:
10861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    ++message_current;
108705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    DCHECK(message_current >= message_start);
108805a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    if (message_current > message_start) {
1089a8538d902fce9cfec20f39b34492268b51643819Ted Kremenek      headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
10901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
109105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek
10921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // Check if we have exceeded maximum headers length
109303509aea098772644bf4662dc1c88634818ceeccZhongxing Xu    // Although we check for this limit before and after we call this function
109405a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    // we check it here as well to make sure that in case the visitor changed
10951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // the max_header_length_ (for example after processing the first line)
10962de56d1d0c3a504ad1529de2677628bdfbb95cd4John McCall    // we handle it gracefully.
10971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    if (headers_->GetReadableBytesFromHeaderStream() > max_header_length_) {
109805a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      parse_state_ = BalsaFrameEnums::PARSE_ERROR;
109905a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
110005a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      visitor_->HandleHeaderError(this);
110105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      goto bottom;
110205a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    }
11031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11049c378f705405d37f49795d5e915989de774fe11fTed Kremenek    // Since we know that we won't be writing any more bytes of the header,
11052de56d1d0c3a504ad1529de2677628bdfbb95cd4John McCall    // we tell that to the headers object. The headers object may make
110603509aea098772644bf4662dc1c88634818ceeccZhongxing Xu    // more efficient allocation decisions when this is signaled.
11071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    headers_->DoneWritingFromFramer();
11085eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    {
110905a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      const char* readable_ptr = NULL;
11101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      size_t readable_size = 0;
111156ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall      headers_->GetReadablePtrFromHeaderStream(&readable_ptr, &readable_size);
111205a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      visitor_->ProcessHeaderInput(readable_ptr, readable_size);
111356ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall    }
111456ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall
11151eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // Ok, now that we've written everything into our header buffer, it is
111605a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    // time to process the header lines (extract proper values for headers
111705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    // which are important for framing).
11181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    ProcessHeaderLines();
11199c378f705405d37f49795d5e915989de774fe11fTed Kremenek    if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
11201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      goto bottom;
112105a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    }
112256ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall    AssignParseStateAfterHeadersHaveBeenParsed();
112305a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    if (parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
112456ca35d396d8692c384c785f9aeebcf22563fe1eJohn McCall      goto bottom;
11251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
11265eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    visitor_->ProcessHeaders(*headers_);
112705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek    visitor_->HeaderDone();
11281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ) {
112905a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek      visitor_->MessageDone();
11301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
11319c378f705405d37f49795d5e915989de774fe11fTed Kremenek    goto bottom;
11321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  }
11339c378f705405d37f49795d5e915989de774fe11fTed Kremenek  // If we've gotten to here, it means that we've consumed all of the
11345eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek  // available input. We need to record whether or not the last character we
113505a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek  // saw was a '\r' so that a subsequent call to ProcessInput correctly finds
113605a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek  // a header framing that is split across the two calls.
113705a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek  last_char_was_slash_r_ = (*(message_end - 1) == '\r');
113805a2378c708688c8ef498a5cea40ed7f5db15fa5Ted Kremenek  DCHECK(message_current >= message_start);
11396ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek  if (message_current > message_start) {
11406ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek    headers_->WriteFromFramer(checkpoint, message_current - checkpoint);
11416ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek  }
11426ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek bottom:
11436ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek  return message_current - original_message_start;
1144294fd0a62b95f512637910bf85c7efa6c2354b50Ted Kremenek}
11458bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek
1146294fd0a62b95f512637910bf85c7efa6c2354b50Ted Kremenek
11475eca482fe895ea57bc82410222e6426c09e63284Ted Kremeneksize_t BalsaFrame::BytesSafeToSplice() const {
1148294fd0a62b95f512637910bf85c7efa6c2354b50Ted Kremenek  switch (parse_state_) {
11496ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek    case BalsaFrameEnums::READING_CHUNK_DATA:
115003509aea098772644bf4662dc1c88634818ceeccZhongxing Xu      return chunk_length_remaining_;
11516ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek    case BalsaFrameEnums::READING_UNTIL_CLOSE:
11526ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      return std::numeric_limits<size_t>::max();
11536ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek    case BalsaFrameEnums::READING_CONTENT:
11546ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      return content_length_remaining_;
11556ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek    default:
11561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      return 0;
115703509aea098772644bf4662dc1c88634818ceeccZhongxing Xu  }
11586ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek}
11596ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek
11606ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenekvoid BalsaFrame::BytesSpliced(size_t bytes_spliced) {
11616ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek  switch (parse_state_) {
11621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    case BalsaFrameEnums::READING_CHUNK_DATA:
11636ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      if (chunk_length_remaining_ >= bytes_spliced) {
11646ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek        chunk_length_remaining_ -= bytes_spliced;
11656ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek        if (chunk_length_remaining_ == 0) {
11666ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek          parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
11676ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek        }
11681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        return;
11696ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      } else {
11706ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek        last_error_ =
11716ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek          BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
11726ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek        goto error_exit;
11736ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      }
11746ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek
11756ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek    case BalsaFrameEnums::READING_UNTIL_CLOSE:
11766ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      return;
11771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11785eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    case BalsaFrameEnums::READING_CONTENT:
11796ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek      if (content_length_remaining_ >= bytes_spliced) {
11806ae8a3600656c478d27f25639bed765f4fe71732Ted Kremenek        content_length_remaining_ -= bytes_spliced;
11819c378f705405d37f49795d5e915989de774fe11fTed Kremenek        if (content_length_remaining_ == 0) {
1182a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks          parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1183ad62deeb70e97da6bd514dd390ea1ce6af6ad81dAnna Zaks          visitor_->MessageDone();
11841aae01a8308d2f8e31adab3f4d7ac35543aac680Anna Zaks        }
1185a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks        return;
1186a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks      } else {
1187f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks        last_error_ =
1188f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks          BalsaFrameEnums::CALLED_BYTES_SPLICED_AND_EXCEEDED_SAFE_SPLICE_AMOUNT;
1189b2331834a0515c80862ee51325c758a053829f15Ted Kremenek        goto error_exit;
11901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      }
11911aae01a8308d2f8e31adab3f4d7ac35543aac680Anna Zaks
1192a19f4af7a94835ce4693bfe12d6270754e79eb56Anna Zaks    default:
11934e82d3cf6fd4c907265e3fa3aac0a835c35dc759Anna Zaks      last_error_ = BalsaFrameEnums::CALLED_BYTES_SPLICED_WHEN_UNSAFE_TO_DO_SO;
1194b2331834a0515c80862ee51325c758a053829f15Ted Kremenek      goto error_exit;
1195b2331834a0515c80862ee51325c758a053829f15Ted Kremenek  }
11961eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
119721028dd8850c64a414f7a82dfddcc291351203d6Ted Kremenek error_exit:
119821028dd8850c64a414f7a82dfddcc291351203d6Ted Kremenek  parse_state_ = BalsaFrameEnums::PARSE_ERROR;
119921028dd8850c64a414f7a82dfddcc291351203d6Ted Kremenek  visitor_->HandleBodyError(this);
12000fb0bc4067d6c9d7c0e655300ef309b05d3adfc9Ted Kremenek};
1201f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks
1202f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// You may note that the state-machine contained within this function has both
12034e82d3cf6fd4c907265e3fa3aac0a835c35dc759Anna Zaks// switch and goto labels for nearly the same thing. For instance, the
12048ff5c41f2bde7ebbe568b4c15e59f14b8befae66Anna Zaks// following two labels refer to the same code block:
1205f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks//   label_reading_chunk_data:
12068ff5c41f2bde7ebbe568b4c15e59f14b8befae66Anna Zaks//   case BalsaFrameEnums::READING_CHUNK_DATA:
12071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump// The 'case' statement is required for the switch statement which occurs when
1208f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// ProcessInput is invoked. The goto label is required as the state-machine
1209f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks// does not use a computed goto in any subsequent operations.
1210f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks//
1211cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// Since several states exit the state machine for various reasons, there is
1212cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// also one label at the bottom of the function. When it is appropriate to
1213cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// return from the function, that part of the state machine instead issues a
1214cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// goto bottom; This results in less code duplication, and makes debugging
1215cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks// easier (as you can add a statement to a section of code which is guaranteed
12168bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek// to be invoked when the function is exiting.
12175eca482fe895ea57bc82410222e6426c09e63284Ted Kremeneksize_t BalsaFrame::ProcessInput(const char* input, size_t size) {
1218cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  const char* current = input;
1219cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  const char* on_entry = current;
1220cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  const char* end = current + size;
1221cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks#if DEBUGFRAMER
1222cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  LOG(INFO) << "\n=============="
1223cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks            << BalsaFrameEnums::ParseStateToString(parse_state_)
1224cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks            << "===============\n";
1225cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks#endif  // DEBUGFRAMER
1226cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks
1227cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  DCHECK(headers_ != NULL);
1228cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  if (headers_ == NULL) return 0;
12295eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek
1230cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
1231cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    const size_t header_length = headers_->GetReadableBytesFromHeaderStream();
1232cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // Yes, we still have to check this here as the user can change the
1233cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // max_header_length amount!
1234cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // Also it is possible that we have reached the maximum allowed header size,
12350835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu    // and we have more to consume (remember we are still inside
1236b38911f16b4943548db6a3695fc6ae23070b25d2Ted Kremenek    // READING_HEADER_AND_FIRSTLINE) in which case we directly declare an error.
12370835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu    if (header_length > max_header_length_ ||
12385eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        (header_length == max_header_length_ && size > 0)) {
12395eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      parse_state_ = BalsaFrameEnums::PARSE_ERROR;
12405eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
12410835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu      visitor_->HandleHeaderError(this);
124273c498a08f4968b6987d1453c7b77929dcc6d5f7Argyrios Kyrtzidis      goto bottom;
12435eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    }
12445eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    size_t bytes_to_process = max_header_length_ - header_length;
12455eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    if (bytes_to_process > size) {
12465eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      bytes_to_process = size;
1247cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    }
12481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    current += ProcessHeaders(input, bytes_to_process);
12491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // If we are still reading headers check if we have crossed the headers
1250cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // limit. Note that we check for >= as opposed to >. This is because if
12510835e4cccfef3ea5346962722b79484f6b3ca602Zhongxing Xu    // header_length_after equals max_header_length_ and we are still in the
1252cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // parse_state_  BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE we know for
1253cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // sure that the headers limit will be crossed later on
12548bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    if (parse_state_ == BalsaFrameEnums::READING_HEADER_AND_FIRSTLINE) {
12555eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      // Note that headers_ is valid only if we are still reading headers.
12565eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      const size_t header_length_after =
1257cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks          headers_->GetReadableBytesFromHeaderStream();
1258cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks      if (header_length_after >= max_header_length_) {
1259cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks        parse_state_ = BalsaFrameEnums::PARSE_ERROR;
12601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        last_error_ = BalsaFrameEnums::HEADERS_TOO_LONG;
1261cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks        visitor_->HandleHeaderError(this);
1262cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks      }
12638bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek    }
12645eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    goto bottom;
12655eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek  } else if (parse_state_ == BalsaFrameEnums::MESSAGE_FULLY_READ ||
1266cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks             parse_state_ == BalsaFrameEnums::PARSE_ERROR) {
1267cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    // Can do nothing more 'till we're reset.
1268cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks    goto bottom;
1269cd656cab3fa3dd4b0c974c6ae1c0e60880b18c22Anna Zaks  }
1270f236b6503a4dbc44c1fccb8756bd57c9d0efdf05Anna Zaks
1271f233d48cfc513b045e2c2cfca5c175220fbd0a82Ted Kremenek  while (current < end) {
1272f233d48cfc513b045e2c2cfca5c175220fbd0a82Ted Kremenek    switch (parse_state_) {
1273e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenek label_reading_chunk_length:
1274754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek      case BalsaFrameEnums::READING_CHUNK_LENGTH:
1275ad8dcf4a9df0e24051dc31bf9e6f3cd138a34298Chris Lattner        // In this state we read the chunk length.
1276754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek        // Note that once we hit a character which is not in:
12778bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        // [0-9;A-Fa-f\n], we transition to a different state.
12785eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        //
12791eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        {
1280754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          // If we used strtol, etc, we'd have to buffer this line.
1281754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          // This is more annoying than simply doing the conversion
1282754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          // here. This code accounts for overflow.
12834a4e524afef40d6f3ddb25d0e407c814e4ca56a8Ted Kremenek          static const signed char buf[] = {
1284754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek            // %0  %1  %2  %3  %4  %5  %6  %7  %8  \t  \n  %b  %c  \r  %e  %f
1285754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek               -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -2, -1, -1, -2, -1, -1,
12861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            // %10 %11 %12 %13 %14 %15 %16 %17 %18 %19 %1a %1b %1c %1d %1e %1f
1287d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1288754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek            // ' ' %21 %22 %23 %24 %25 %26 %27 %28 %29 %2a %2b %2c %2d %2e %2f
12891c96b24285d05c0eac455ae96d7c9ff43d42bc96Zhongxing Xu               -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1290ad8dcf4a9df0e24051dc31bf9e6f3cd138a34298Chris Lattner            // %30 %31 %32 %33 %34 %35 %36 %37 %38 %39 %3a ';' %3c %3d %3e %3f
12911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -2, -1, -1, -1, -1,
1292ad8dcf4a9df0e24051dc31bf9e6f3cd138a34298Chris Lattner            // %40 'A' 'B' 'C' 'D' 'E' 'F' %47 %48 %49 %4a %4b %4c %4d %4e %4f
129324f1a967741ff9f8025ee23be12ba6feacc31f77Ted Kremenek               -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1294a8538d902fce9cfec20f39b34492268b51643819Ted Kremenek            // %50 %51 %52 %53 %54 %55 %56 %57 %58 %59 %5a %5b %5c %5d %5e %5f
1295754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1296754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek            // %60 'a' 'b' 'c' 'd' 'e' 'f' %67 %68 %69 %6a %6b %6c %6d %6e %6f
1297754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek               -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
12981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            // %70 %71 %72 %73 %74 %75 %76 %77 %78 %79 %7a %7b %7c %7d %7e %7f
1299b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikie               -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
1300754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          };
1301754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          // valid cases:
13021c96b24285d05c0eac455ae96d7c9ff43d42bc96Zhongxing Xu          //  "09123\n"                      // -> 09123
1303754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          //  "09123\r\n"                    // -> 09123
13042055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu          //  "09123  \n"                    // -> 09123
13052055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu          //  "09123  \r\n"                  // -> 09123
13062055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu          //  "09123  12312\n"               // -> 09123
1307754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          //  "09123  12312\r\n"             // -> 09123
1308754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          //  "09123; foo=bar\n"             // -> 09123
13091eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          //  "09123; foo=bar\r\n"           // -> 09123
1310754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          //  "FFFFFFFFFFFFFFFF\r\n"         // -> FFFFFFFFFFFFFFFF
1311b3cfd58c9b13325d994e5f9b5065e6a22d91911dTed Kremenek          //  "FFFFFFFFFFFFFFFF 22\r\n"      // -> FFFFFFFFFFFFFFFF
13121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          // invalid cases:
1313754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          // "[ \t]+[^\n]*\n"
1314a8538d902fce9cfec20f39b34492268b51643819Ted Kremenek          // "FFFFFFFFFFFFFFFFF\r\n"  (would overflow)
1315754607e7cff2d902d9af8b771409449fb2f8d2bfTed Kremenek          // "\r\n"
1316f233d48cfc513b045e2c2cfca5c175220fbd0a82Ted Kremenek          // "\n"
1317d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis          while (current < end) {
131873099bfea9f5d4ec05265170bbefec3d76fb6b5eTed Kremenek            const char c = *current;
1319af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks            ++current;
1320af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks            const signed char addition = buf[static_cast<int>(c)];
1321af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks            if (addition >= 0) {
1322af498a28797c075c48d7e943df5f5a8e78ed8eb0Anna Zaks              chunk_length_character_extracted_ = true;
13234d2ae4a70336dc2aa11389b34946be152bb454c9Anna Zaks              size_t length_x_16 = chunk_length_remaining_ * 16;
132473099bfea9f5d4ec05265170bbefec3d76fb6b5eTed Kremenek              const size_t kMaxDiv16 = std::numeric_limits<size_t>::max() / 16;
132573099bfea9f5d4ec05265170bbefec3d76fb6b5eTed Kremenek              if ((chunk_length_remaining_ > kMaxDiv16) ||
1326d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis                  ((std::numeric_limits<size_t>::max() - length_x_16) <
1327daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek                   static_cast<size_t>(addition))) {
1328e36de1fe51c39d9161915dd3dbef880954af6476Ted Kremenek                // overflow -- asked for a chunk-length greater than 2^64 - 1!!
1329d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis                parse_state_ = BalsaFrameEnums::PARSE_ERROR;
13308bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek                last_error_ = BalsaFrameEnums::CHUNK_LENGTH_OVERFLOW;
13319c378f705405d37f49795d5e915989de774fe11fTed Kremenek                visitor_->ProcessBodyInput(on_entry, current - on_entry);
13325eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek                visitor_->HandleChunkingError(this);
1333daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek                goto bottom;
13345b9bd2137ebef350af803c634e3fdf5d74678100Ted Kremenek              }
13352055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu              chunk_length_remaining_ = length_x_16 + addition;
1336b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek              continue;
13372055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu            }
13382055effed54d614b51e3501a174c9b1fe92e4de4Zhongxing Xu
1339daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek            if (!chunk_length_character_extracted_ || addition == -1) {
1340daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek              // ^[0-9;A-Fa-f][ \t\n] -- was not matched, either because no
13415b9bd2137ebef350af803c634e3fdf5d74678100Ted Kremenek              // characters were converted, or an unexpected character was
1342692416c214a3b234236dedcf875735a9cc29e90bTed Kremenek              // seen.
13438bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek              parse_state_ = BalsaFrameEnums::PARSE_ERROR;
134434feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek              last_error_ = BalsaFrameEnums::INVALID_CHUNK_LENGTH;
134534feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek              visitor_->ProcessBodyInput(on_entry, current - on_entry);
134634feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek              visitor_->HandleChunkingError(this);
13471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump              goto bottom;
134834feff654c6304e0a59ceb1376989d28dbc956ffTed Kremenek            }
1349e71f3d587844110d836c82250830b27b1651afdbTed Kremenek
1350e71f3d587844110d836c82250830b27b1651afdbTed Kremenek            --current;
1351e71f3d587844110d836c82250830b27b1651afdbTed Kremenek            parse_state_ = BalsaFrameEnums::READING_CHUNK_EXTENSION;
1352e71f3d587844110d836c82250830b27b1651afdbTed Kremenek            visitor_->ProcessChunkLength(chunk_length_remaining_);
13539c378f705405d37f49795d5e915989de774fe11fTed Kremenek            goto label_reading_chunk_extension;
135472afb3739da0da02158242ae41a50cfe0bea78b4Ted Kremenek          }
135572afb3739da0da02158242ae41a50cfe0bea78b4Ted Kremenek        }
135685df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith        visitor_->ProcessBodyInput(on_entry, current - on_entry);
135785df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith        goto bottom;  // case BalsaFrameEnums::READING_CHUNK_LENGTH
13581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1359daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek label_reading_chunk_extension:
136085df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith      case BalsaFrameEnums::READING_CHUNK_EXTENSION:
136185df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith        {
136285df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith          // TODO(phython): Convert this scanning to be 16 bytes at a time if
136314a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek          // there is data to be read.
136414a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek          const char* extensions_start = current;
13651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          size_t extensions_length = 0;
1366daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek          while (current < end) {
1367daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek            const char c = *current;
1368aa1c4e5a6b87b62d991c55a0d4522bcd778068d7Ted Kremenek            if (c == '\r' || c == '\n') {
13691eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump              extensions_length =
137014a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek                  (extensions_start == current) ?
137185df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith                  0 :
13729c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek                  current - extensions_start - 1;
137348569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek            }
1374b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek
13751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            ++current;
13768bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek            if (c == '\n') {
1377a591bc04d21fa62ebffcb2c7814d738ca8f5e2f9Ted Kremenek              chunk_length_character_extracted_ = false;
13781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump              visitor_->ProcessChunkExtensions(
1379daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek                  extensions_start, extensions_length);
1380daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek              if (chunk_length_remaining_ != 0) {
1381daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek                parse_state_ = BalsaFrameEnums::READING_CHUNK_DATA;
13821c96b24285d05c0eac455ae96d7c9ff43d42bc96Zhongxing Xu                goto label_reading_chunk_data;
1383daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek              }
1384daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek              HeaderFramingFound('\n');
13851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump              parse_state_ = BalsaFrameEnums::READING_LAST_CHUNK_TERM;
1386daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek              goto label_reading_last_chunk_term;
1387daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek            }
138848569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek          }
13898bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek          visitor_->ProcessChunkExtensions(
139048569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek              extensions_start, extensions_length);
139148569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek        }
139248569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek
139348569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek        visitor_->ProcessBodyInput(on_entry, current - on_entry);
139448569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek        goto bottom;  // case BalsaFrameEnums::READING_CHUNK_EXTENSION
139548569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek
139648569f9562740ac1f4b175cb17ce3d49035402c4Ted Kremenek label_reading_chunk_data:
13975014ab113eb211b8320ae30b173d7020352663c6Ted Kremenek      case BalsaFrameEnums::READING_CHUNK_DATA:
1398b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenek        while (current < end) {
139914a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek          if (chunk_length_remaining_ == 0) {
140085df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith            break;
140114a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek          }
14021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          // read in the chunk
140385df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith          size_t bytes_remaining = end - current;
140485df96c1f04867e26ba069aa0cc6a4cd6a01292eRichard Smith          size_t consumed_bytes = (chunk_length_remaining_ < bytes_remaining) ?
14051eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            chunk_length_remaining_ : bytes_remaining;
140614a1140c9f4e20b12a54db8745b74699b9872cd2Ted Kremenek          const char* tmp_current = current + consumed_bytes;
1407daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek          visitor_->ProcessBodyInput(on_entry, tmp_current - on_entry);
14081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          visitor_->ProcessBodyData(current, consumed_bytes);
14094d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          on_entry = current = tmp_current;
14104d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          chunk_length_remaining_ -= consumed_bytes;
14114d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek        }
14124d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek        if (chunk_length_remaining_ == 0) {
14134d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          parse_state_ = BalsaFrameEnums::READING_CHUNK_TERM;
14144d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          goto label_reading_chunk_term;
14154d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek        }
14164d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek        visitor_->ProcessBodyInput(on_entry, current - on_entry);
14174d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek        goto bottom;  // case BalsaFrameEnums::READING_CHUNK_DATA
14184d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek
14194d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek label_reading_chunk_term:
14204d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek      case BalsaFrameEnums::READING_CHUNK_TERM:
14214d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek        while (current < end) {
14224d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          const char c = *current;
14234d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          ++current;
14244d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek
14254d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek          if (c == '\n') {
14264d3175c1e5a44251ea97b0c81e80f060629d9c08Ted Kremenek            parse_state_ = BalsaFrameEnums::READING_CHUNK_LENGTH;
1427daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek            goto label_reading_chunk_length;
1428daeb9a7376830d637e02b5bc51faf4750a7bce70Ted Kremenek          }
1429e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        }
1430ec96a2d52d16e150baaf629cd35e3fabff5d8915Ted Kremenek        visitor_->ProcessBodyInput(on_entry, current - on_entry);
1431e695e1cd7d8a579455e8969be36cbaf10a316a64Ted Kremenek        goto bottom;  // case BalsaFrameEnums::READING_CHUNK_TERM
1432d27f8169f4b68337a489547a41ac45bf7a5d1ddfTed Kremenek
1433d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis label_reading_last_chunk_term:
1434d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis      case BalsaFrameEnums::READING_LAST_CHUNK_TERM:
1435d2592a34a059e7cbb2b11dc53649ac4912422909Argyrios Kyrtzidis        while (current < end) {
1436aa0aeb1cbe117db68d35700cb3a34aace0f99b99Anna Zaks          const char c = *current;
1437ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks
14388bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek          if (!HeaderFramingFound(c)) {
14395eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            // If not, however, since the spec only suggests that the
14406d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu            // client SHOULD indicate the presence of trailers, we get to
14419c378f705405d37f49795d5e915989de774fe11fTed Kremenek            // *test* that they did or didn't.
1442892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            // If all of the bytes we've seen since:
1443d17da2b99f323fa91b01e1dd119cc32e0ee8197dTed Kremenek            //   OPTIONAL_WS 0 OPTIONAL_STUFF CRLF
1444a7581731b1453b51b26154d2409d42a5b6395079Zhongxing Xu            // are either '\r', or '\n', then we can assume that we don't yet
1445892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            // know if we need to parse headers, or if the next byte will make
1446892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            // the HeaderFramingFound condition (above) true.
1447892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            if (HeaderFramingMayBeFound()) {
1448892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek              // If true, then we have seen only characters '\r' or '\n'.
1449892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek              ++current;
1450892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek
1451892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek              // Lets try again! There is no state change here.
1452852274d4257134906995cb252fb3dfd2d71deae8Ted Kremenek              continue;
14536d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu            } else {
14545eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek              // If (!HeaderFramingMayBeFound()), then we know that we must be
1455ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks              // reading the first non CRLF character of a trailer.
1456852274d4257134906995cb252fb3dfd2d71deae8Ted Kremenek              parse_state_ = BalsaFrameEnums::READING_TRAILER;
1457892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek              visitor_->ProcessBodyInput(on_entry, current - on_entry);
14589c378f705405d37f49795d5e915989de774fe11fTed Kremenek              on_entry = current;
1459892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek              goto label_reading_trailer;
1460c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek            }
14615eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek          } else {
14626d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu            // If we've found a "\r\n\r\n", then the message
1463892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            // is done.
14649c378f705405d37f49795d5e915989de774fe11fTed Kremenek            ++current;
1465c8413fd03f73084a5c93028f8b4db619fc388087Ted Kremenek            parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
14665eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek            visitor_->ProcessBodyInput(on_entry, current - on_entry);
1467ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks            visitor_->MessageDone();
14686d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu            goto bottom;
14691b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek          }
14705aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek          break;  // from while loop
14715aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek        }
14725aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek        visitor_->ProcessBodyInput(on_entry, current - on_entry);
14735aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek        goto bottom;  // case BalsaFrameEnums::READING_LAST_CHUNK_TERM
14745aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek
14755aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek label_reading_trailer:
14765aac0b6ae95f137b1783f3e6227241fb457b8f8bTed Kremenek      case BalsaFrameEnums::READING_TRAILER:
14776d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu        while (current < end) {
14786d69b5d82281992e981caa9bc038e3f6cac6594aZhongxing Xu          const char c = *current;
14793271f8d315712885ac87747369bb1d9f4b1ea81fTed Kremenek          ++current;
14803271f8d315712885ac87747369bb1d9f4b1ea81fTed Kremenek          // TODO(fenix): If we ever care about trailers as part of framing,
1481540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek          // deal with them here (see below for part of the 'solution')
14829c378f705405d37f49795d5e915989de774fe11fTed Kremenek          // if (LineFramingFound(c)) {
14839c378f705405d37f49795d5e915989de774fe11fTed Kremenek          // trailer_lines_.push_back(make_pair(start_of_line_,
14849c378f705405d37f49795d5e915989de774fe11fTed Kremenek          //                                   trailer_length_ - 1));
14851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump          // start_of_line_ = trailer_length_;
14869c378f705405d37f49795d5e915989de774fe11fTed Kremenek          // }
14879c378f705405d37f49795d5e915989de774fe11fTed Kremenek          if (HeaderFramingFound(c)) {
1488892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            // ProcessTrailers(visitor_, &trailers_);
14891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump            parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
14908f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek            visitor_->ProcessTrailerInput(on_entry, current - on_entry);
14918f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek            visitor_->MessageDone();
14928f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek            goto bottom;
1493aa0aeb1cbe117db68d35700cb3a34aace0f99b99Anna Zaks          }
1494ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        }
14958f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek        visitor_->ProcessTrailerInput(on_entry, current - on_entry);
14968f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek        break;  // case BalsaFrameEnums::READING_TRAILER
14975eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek
14988bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek        // Note that there is no label:
14995eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        //   'label_reading_until_close'
15005eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        // here. This is because the state-machine exists immediately after
15015eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        // reading the headers instead of transitioning here (as it would
15028f3407ef22bc7efe6ca4169381e09d0d657ec192Ted Kremenek        // do if it was consuming all the data it could, all the time).
15035eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek      case BalsaFrameEnums::READING_UNTIL_CLOSE:
1504ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        {
15051b8bd4d71c2098126041b4de4267175a82f0103cTed Kremenek          const size_t bytes_remaining = end - current;
1506540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek          if (bytes_remaining > 0) {
1507540cbe2b60294fe7b926c26b4f1840f544fe3011Ted Kremenek            visitor_->ProcessBodyInput(current, bytes_remaining);
1508469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek            visitor_->ProcessBodyData(current, bytes_remaining);
15099c378f705405d37f49795d5e915989de774fe11fTed Kremenek            current += bytes_remaining;
15106889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks          }
15111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump        }
15126889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks        goto bottom;  // case BalsaFrameEnums::READING_UNTIL_CLOSE
15136889679d72859960e0fc8d1080487f63c4df1e0aAnna Zaks
15145bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek        // label_reading_content:
15155bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek      case BalsaFrameEnums::READING_CONTENT:
15165bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek#if DEBUGFRAMER
1517ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        LOG(INFO) << "ReadingContent: " << content_length_remaining_;
15185bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek#endif  // DEBUGFRAMER
1519ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks        while (content_length_remaining_ && current < end) {
15205bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek          // read in the content
15215bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek          const size_t bytes_remaining = end - current;
15225bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek          const size_t consumed_bytes =
15235bd04952d4ae7ca894f583583208f0cec4735a90Ted Kremenek            (content_length_remaining_ < bytes_remaining) ?
1524892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek            content_length_remaining_ : bytes_remaining;
152586f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor          visitor_->ProcessBodyInput(current, consumed_bytes);
152686f194083504938df72135b5b66bf0c5cafd9498Douglas Gregor          visitor_->ProcessBodyData(current, consumed_bytes);
1527e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek          current += consumed_bytes;
15288bef8238181a30e52dea380789a7e2d760eac532Ted Kremenek          content_length_remaining_ -= consumed_bytes;
15295eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        }
15305eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        if (content_length_remaining_ == 0) {
1531e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek          parse_state_ = BalsaFrameEnums::MESSAGE_FULLY_READ;
1532e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek          visitor_->MessageDone();
1533e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek        }
1534e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek        goto bottom;  // case BalsaFrameEnums::READING_CONTENT
1535e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek
1536e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek      default:
15375eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek        // The state-machine should never be in a state that isn't handled
1538e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek        // above.  This is a glaring logic error, and we should do something
1539e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek        // drastic to ensure that this gets looked-at and fixed.
1540892697dd2287caf7c29aaaa82909b0e90b8b63feTed Kremenek        LOG(FATAL) << "Unknown state: " << parse_state_  // COV_NF_LINE
1541e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek          << " memory corruption?!";                     // COV_NF_LINE
1542e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek    }
1543e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek  }
1544d9bc33efa195114d6f2a365c26e5b8dba4e1cc38Ted Kremenek bottom:
1545e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek#if DEBUGFRAMER
1546e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek  LOG(INFO) << "\n^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n\n"
1547e3939d7446959afb6b650fe08e952d0f64ab6794Ted Kremenek    << std::string(input, current)
15485eca482fe895ea57bc82410222e6426c09e63284Ted Kremenek    << "\n$$$$$$$$$$$$$$"
1549ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    << BalsaFrameEnums::ParseStateToString(parse_state_)
1550ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    << "$$$$$$$$$$$$$$$"
1551ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    << " consumed: " << (current - input);
1552bd613137499b1d4c3b63dccd0aa21f6add243f4fTed Kremenek  if (Error()) {
1553ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks    LOG(INFO) << BalsaFrameEnums::ErrorCodeToString(ErrorCode());
1554ebae6d0209e1ec3d5ea14f9e63bd0d740218ed14Anna Zaks  }
1555469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek#endif  // DEBUGFRAMER
1556469ecbded3616416ef938ed94a67f86149faf226Ted Kremenek  return current - input;
15579c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek}
15589c14953d0c84f7cf5adfb4cd3c0f05a9b1723c1cTed Kremenek
15599c378f705405d37f49795d5e915989de774fe11fTed Kremenekconst uint32 BalsaFrame::kValidTerm1;
156093bd5ca766c4d7906878f4ffe76ce1b2080e540bJordy Roseconst uint32 BalsaFrame::kValidTerm1Mask;
15613d7c44e01d568e5d5c0fac9c6ccb3f080157ba19Anna Zaksconst uint32 BalsaFrame::kValidTerm2;
1562b9bbd592c7ea72ada8d982e40a729beb9b53371eTed Kremenekconst uint32 BalsaFrame::kValidTerm2Mask;
1563b107c4b7efb907d75620cd3c17f82fe27dc5b745Ted Kremenek
1564fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose}  // namespace net
1565fe27971d54d26997149d6b84057f04ff398d1d5dJordy Rose