1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// The rules for header parsing were borrowed from Firefox:
6// http://lxr.mozilla.org/seamonkey/source/netwerk/protocol/http/src/nsHttpResponseHead.cpp
7// The rules for parsing content-types were also borrowed from Firefox:
8// http://lxr.mozilla.org/mozilla/source/netwerk/base/src/nsURLHelper.cpp#834
9
10#include "net/http/http_response_headers.h"
11
12#include <algorithm>
13
14#include "base/logging.h"
15#include "base/metrics/histogram.h"
16#include "base/pickle.h"
17#include "base/string_number_conversions.h"
18#include "base/string_util.h"
19#include "base/time.h"
20#include "net/base/escape.h"
21#include "net/http/http_util.h"
22
23using base::Time;
24using base::TimeDelta;
25
26namespace net {
27
28//-----------------------------------------------------------------------------
29
30namespace {
31
32// These headers are RFC 2616 hop-by-hop headers;
33// not to be stored by caches.
34const char* const kHopByHopResponseHeaders[] = {
35  "connection",
36  "proxy-connection",
37  "keep-alive",
38  "trailer",
39  "transfer-encoding",
40  "upgrade"
41};
42
43// These headers are challenge response headers;
44// not to be stored by caches.
45const char* const kChallengeResponseHeaders[] = {
46  "www-authenticate",
47  "proxy-authenticate"
48};
49
50// These headers are cookie setting headers;
51// not to be stored by caches or disclosed otherwise.
52const char* const kCookieResponseHeaders[] = {
53  "set-cookie",
54  "set-cookie2"
55};
56
57// These response headers are not copied from a 304/206 response to the cached
58// response headers.  This list is based on Mozilla's nsHttpResponseHead.cpp.
59const char* const kNonUpdatedHeaders[] = {
60  "connection",
61  "proxy-connection",
62  "keep-alive",
63  "www-authenticate",
64  "proxy-authenticate",
65  "trailer",
66  "transfer-encoding",
67  "upgrade",
68  // these should never change:
69  "content-location",
70  "content-md5",
71  "etag",
72  // assume cache-control: no-transform
73  "content-encoding",
74  "content-range",
75  "content-type",
76  // some broken microsoft servers send 'content-length: 0' with 304s
77  "content-length"
78};
79
80bool ShouldUpdateHeader(const std::string::const_iterator& name_begin,
81                        const std::string::const_iterator& name_end) {
82  for (size_t i = 0; i < arraysize(kNonUpdatedHeaders); ++i) {
83    if (LowerCaseEqualsASCII(name_begin, name_end, kNonUpdatedHeaders[i]))
84      return false;
85  }
86  return true;
87}
88
89// Functions for histogram initialization.  The code 0 is put in the
90// response map to track response codes that are invalid.
91// TODO(gavinp): Greatly prune the collected codes once we learn which
92// ones are not sent in practice, to reduce upload size & memory use.
93
94enum {
95  HISTOGRAM_MIN_HTTP_RESPONSE_CODE = 100,
96  HISTOGRAM_MAX_HTTP_RESPONSE_CODE = 599,
97};
98
99std::vector<int> GetAllHttpResponseCodes() {
100  std::vector<int> codes;
101  codes.reserve(
102      HISTOGRAM_MAX_HTTP_RESPONSE_CODE - HISTOGRAM_MIN_HTTP_RESPONSE_CODE + 2);
103  codes.push_back(0);
104  for (int i = HISTOGRAM_MIN_HTTP_RESPONSE_CODE;
105       i <= HISTOGRAM_MAX_HTTP_RESPONSE_CODE; ++i)
106    codes.push_back(i);
107  return codes;
108}
109
110int MapHttpResponseCode(int code) {
111  if (HISTOGRAM_MIN_HTTP_RESPONSE_CODE <= code &&
112      code <= HISTOGRAM_MAX_HTTP_RESPONSE_CODE)
113    return code;
114  return 0;
115}
116
117}  // namespace
118
119struct HttpResponseHeaders::ParsedHeader {
120  // A header "continuation" contains only a subsequent value for the
121  // preceding header.  (Header values are comma separated.)
122  bool is_continuation() const { return name_begin == name_end; }
123
124  std::string::const_iterator name_begin;
125  std::string::const_iterator name_end;
126  std::string::const_iterator value_begin;
127  std::string::const_iterator value_end;
128};
129
130//-----------------------------------------------------------------------------
131
132HttpResponseHeaders::HttpResponseHeaders(const std::string& raw_input)
133    : response_code_(-1) {
134  Parse(raw_input);
135
136  // The most important thing to do with this histogram is find out
137  // the existence of unusual HTTP response codes.  As it happens
138  // right now, there aren't double-constructions of response headers
139  // using this constructor, so our counts should also be accurate,
140  // without instantiating the histogram in two places.  It is also
141  // important that this histogram not collect data in the other
142  // constructor, which rebuilds an histogram from a pickle, since
143  // that would actually create a double call between the original
144  // HttpResponseHeader that was serialized, and initialization of the
145  // new object from that pickle.
146  UMA_HISTOGRAM_CUSTOM_ENUMERATION("Net.HttpResponseCode",
147                                   MapHttpResponseCode(response_code_),
148                                   // Note the third argument is only
149                                   // evaluated once, see macro
150                                   // definition for details.
151                                   GetAllHttpResponseCodes());
152}
153
154HttpResponseHeaders::HttpResponseHeaders(const Pickle& pickle, void** iter)
155    : response_code_(-1) {
156  std::string raw_input;
157  if (pickle.ReadString(iter, &raw_input))
158    Parse(raw_input);
159}
160
161void HttpResponseHeaders::Persist(Pickle* pickle, PersistOptions options) {
162  if (options == PERSIST_RAW) {
163    pickle->WriteString(raw_headers_);
164    return;  // Done.
165  }
166
167  HeaderSet filter_headers;
168
169  // Construct set of headers to filter out based on options.
170  if ((options & PERSIST_SANS_NON_CACHEABLE) == PERSIST_SANS_NON_CACHEABLE)
171    AddNonCacheableHeaders(&filter_headers);
172
173  if ((options & PERSIST_SANS_COOKIES) == PERSIST_SANS_COOKIES)
174    AddCookieHeaders(&filter_headers);
175
176  if ((options & PERSIST_SANS_CHALLENGES) == PERSIST_SANS_CHALLENGES)
177    AddChallengeHeaders(&filter_headers);
178
179  if ((options & PERSIST_SANS_HOP_BY_HOP) == PERSIST_SANS_HOP_BY_HOP)
180    AddHopByHopHeaders(&filter_headers);
181
182  if ((options & PERSIST_SANS_RANGES) == PERSIST_SANS_RANGES)
183    AddHopContentRangeHeaders(&filter_headers);
184
185  std::string blob;
186  blob.reserve(raw_headers_.size());
187
188  // This copies the status line w/ terminator null.
189  // Note raw_headers_ has embedded nulls instead of \n,
190  // so this just copies the first header line.
191  blob.assign(raw_headers_.c_str(), strlen(raw_headers_.c_str()) + 1);
192
193  for (size_t i = 0; i < parsed_.size(); ++i) {
194    DCHECK(!parsed_[i].is_continuation());
195
196    // Locate the start of the next header.
197    size_t k = i;
198    while (++k < parsed_.size() && parsed_[k].is_continuation()) {}
199    --k;
200
201    std::string header_name(parsed_[i].name_begin, parsed_[i].name_end);
202    StringToLowerASCII(&header_name);
203
204    if (filter_headers.find(header_name) == filter_headers.end()) {
205      // Make sure there is a null after the value.
206      blob.append(parsed_[i].name_begin, parsed_[k].value_end);
207      blob.push_back('\0');
208    }
209
210    i = k;
211  }
212  blob.push_back('\0');
213
214  pickle->WriteString(blob);
215}
216
217void HttpResponseHeaders::Update(const HttpResponseHeaders& new_headers) {
218  DCHECK(new_headers.response_code() == 304 ||
219         new_headers.response_code() == 206);
220
221  // Copy up to the null byte.  This just copies the status line.
222  std::string new_raw_headers(raw_headers_.c_str());
223  new_raw_headers.push_back('\0');
224
225  HeaderSet updated_headers;
226
227  // NOTE: we write the new headers then the old headers for convenience.  The
228  // order should not matter.
229
230  // Figure out which headers we want to take from new_headers:
231  for (size_t i = 0; i < new_headers.parsed_.size(); ++i) {
232    const HeaderList& new_parsed = new_headers.parsed_;
233
234    DCHECK(!new_parsed[i].is_continuation());
235
236    // Locate the start of the next header.
237    size_t k = i;
238    while (++k < new_parsed.size() && new_parsed[k].is_continuation()) {}
239    --k;
240
241    const std::string::const_iterator& name_begin = new_parsed[i].name_begin;
242    const std::string::const_iterator& name_end = new_parsed[i].name_end;
243    if (ShouldUpdateHeader(name_begin, name_end)) {
244      std::string name(name_begin, name_end);
245      StringToLowerASCII(&name);
246      updated_headers.insert(name);
247
248      // Preserve this header line in the merged result, making sure there is
249      // a null after the value.
250      new_raw_headers.append(name_begin, new_parsed[k].value_end);
251      new_raw_headers.push_back('\0');
252    }
253
254    i = k;
255  }
256
257  // Now, build the new raw headers.
258  MergeWithHeaders(new_raw_headers, updated_headers);
259}
260
261void HttpResponseHeaders::MergeWithHeaders(const std::string& raw_headers,
262                                           const HeaderSet& headers_to_remove) {
263  std::string new_raw_headers(raw_headers);
264  for (size_t i = 0; i < parsed_.size(); ++i) {
265    DCHECK(!parsed_[i].is_continuation());
266
267    // Locate the start of the next header.
268    size_t k = i;
269    while (++k < parsed_.size() && parsed_[k].is_continuation()) {}
270    --k;
271
272    std::string name(parsed_[i].name_begin, parsed_[i].name_end);
273    StringToLowerASCII(&name);
274    if (headers_to_remove.find(name) == headers_to_remove.end()) {
275      // It's ok to preserve this header in the final result.
276      new_raw_headers.append(parsed_[i].name_begin, parsed_[k].value_end);
277      new_raw_headers.push_back('\0');
278    }
279
280    i = k;
281  }
282  new_raw_headers.push_back('\0');
283
284  // Make this object hold the new data.
285  raw_headers_.clear();
286  parsed_.clear();
287  Parse(new_raw_headers);
288}
289
290void HttpResponseHeaders::RemoveHeader(const std::string& name) {
291  // Copy up to the null byte.  This just copies the status line.
292  std::string new_raw_headers(raw_headers_.c_str());
293  new_raw_headers.push_back('\0');
294
295  std::string lowercase_name(name);
296  StringToLowerASCII(&lowercase_name);
297  HeaderSet to_remove;
298  to_remove.insert(lowercase_name);
299  MergeWithHeaders(new_raw_headers, to_remove);
300}
301
302void HttpResponseHeaders::AddHeader(const std::string& header) {
303  DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 2]);
304  DCHECK_EQ('\0', raw_headers_[raw_headers_.size() - 1]);
305  // Don't copy the last null.
306  std::string new_raw_headers(raw_headers_, 0, raw_headers_.size() - 1);
307  new_raw_headers.append(header);
308  new_raw_headers.push_back('\0');
309  new_raw_headers.push_back('\0');
310
311  // Make this object hold the new data.
312  raw_headers_.clear();
313  parsed_.clear();
314  Parse(new_raw_headers);
315}
316
317void HttpResponseHeaders::ReplaceStatusLine(const std::string& new_status) {
318  // Copy up to the null byte.  This just copies the status line.
319  std::string new_raw_headers(new_status);
320  new_raw_headers.push_back('\0');
321
322  HeaderSet empty_to_remove;
323  MergeWithHeaders(new_raw_headers, empty_to_remove);
324}
325
326void HttpResponseHeaders::Parse(const std::string& raw_input) {
327  raw_headers_.reserve(raw_input.size());
328
329  // ParseStatusLine adds a normalized status line to raw_headers_
330  std::string::const_iterator line_begin = raw_input.begin();
331  std::string::const_iterator line_end =
332      find(line_begin, raw_input.end(), '\0');
333  // has_headers = true, if there is any data following the status line.
334  // Used by ParseStatusLine() to decide if a HTTP/0.9 is really a HTTP/1.0.
335  bool has_headers = (line_end != raw_input.end() &&
336                      (line_end + 1) != raw_input.end() &&
337                      *(line_end + 1) != '\0');
338  ParseStatusLine(line_begin, line_end, has_headers);
339
340  if (line_end == raw_input.end()) {
341    raw_headers_.push_back('\0');
342    return;
343  }
344
345  // Including a terminating null byte.
346  size_t status_line_len = raw_headers_.size();
347
348  // Now, we add the rest of the raw headers to raw_headers_, and begin parsing
349  // it (to populate our parsed_ vector).
350  raw_headers_.append(line_end + 1, raw_input.end());
351
352  // Adjust to point at the null byte following the status line
353  line_end = raw_headers_.begin() + status_line_len - 1;
354
355  HttpUtil::HeadersIterator headers(line_end + 1, raw_headers_.end(),
356                                    std::string(1, '\0'));
357  while (headers.GetNext()) {
358    AddHeader(headers.name_begin(),
359              headers.name_end(),
360              headers.values_begin(),
361              headers.values_end());
362  }
363}
364
365// Append all of our headers to the final output string.
366void HttpResponseHeaders::GetNormalizedHeaders(std::string* output) const {
367  // copy up to the null byte.  this just copies the status line.
368  output->assign(raw_headers_.c_str());
369
370  // headers may appear multiple times (not necessarily in succession) in the
371  // header data, so we build a map from header name to generated header lines.
372  // to preserve the order of the original headers, the actual values are kept
373  // in a separate list.  finally, the list of headers is flattened to form
374  // the normalized block of headers.
375  //
376  // NOTE: We take special care to preserve the whitespace around any commas
377  // that may occur in the original response headers.  Because our consumer may
378  // be a web app, we cannot be certain of the semantics of commas despite the
379  // fact that RFC 2616 says that they should be regarded as value separators.
380  //
381  typedef base::hash_map<std::string, size_t> HeadersMap;
382  HeadersMap headers_map;
383  HeadersMap::iterator iter = headers_map.end();
384
385  std::vector<std::string> headers;
386
387  for (size_t i = 0; i < parsed_.size(); ++i) {
388    DCHECK(!parsed_[i].is_continuation());
389
390    std::string name(parsed_[i].name_begin, parsed_[i].name_end);
391    std::string lower_name = StringToLowerASCII(name);
392
393    iter = headers_map.find(lower_name);
394    if (iter == headers_map.end()) {
395      iter = headers_map.insert(
396          HeadersMap::value_type(lower_name, headers.size())).first;
397      headers.push_back(name + ": ");
398    } else {
399      headers[iter->second].append(", ");
400    }
401
402    std::string::const_iterator value_begin = parsed_[i].value_begin;
403    std::string::const_iterator value_end = parsed_[i].value_end;
404    while (++i < parsed_.size() && parsed_[i].is_continuation())
405      value_end = parsed_[i].value_end;
406    --i;
407
408    headers[iter->second].append(value_begin, value_end);
409  }
410
411  for (size_t i = 0; i < headers.size(); ++i) {
412    output->push_back('\n');
413    output->append(headers[i]);
414  }
415
416  output->push_back('\n');
417}
418
419bool HttpResponseHeaders::GetNormalizedHeader(const std::string& name,
420                                              std::string* value) const {
421  // If you hit this assertion, please use EnumerateHeader instead!
422  DCHECK(!HttpUtil::IsNonCoalescingHeader(name));
423
424  value->clear();
425
426  bool found = false;
427  size_t i = 0;
428  while (i < parsed_.size()) {
429    i = FindHeader(i, name);
430    if (i == std::string::npos)
431      break;
432
433    found = true;
434
435    if (!value->empty())
436      value->append(", ");
437
438    std::string::const_iterator value_begin = parsed_[i].value_begin;
439    std::string::const_iterator value_end = parsed_[i].value_end;
440    while (++i < parsed_.size() && parsed_[i].is_continuation())
441      value_end = parsed_[i].value_end;
442    value->append(value_begin, value_end);
443  }
444
445  return found;
446}
447
448std::string HttpResponseHeaders::GetStatusLine() const {
449  // copy up to the null byte.
450  return std::string(raw_headers_.c_str());
451}
452
453std::string HttpResponseHeaders::GetStatusText() const {
454  // GetStatusLine() is already normalized, so it has the format:
455  // <http_version> SP <response_code> SP <status_text>
456  std::string status_text = GetStatusLine();
457  std::string::const_iterator begin = status_text.begin();
458  std::string::const_iterator end = status_text.end();
459  for (int i = 0; i < 2; ++i)
460    begin = find(begin, end, ' ') + 1;
461  return std::string(begin, end);
462}
463
464bool HttpResponseHeaders::EnumerateHeaderLines(void** iter,
465                                               std::string* name,
466                                               std::string* value) const {
467  size_t i = reinterpret_cast<size_t>(*iter);
468  if (i == parsed_.size())
469    return false;
470
471  DCHECK(!parsed_[i].is_continuation());
472
473  name->assign(parsed_[i].name_begin, parsed_[i].name_end);
474
475  std::string::const_iterator value_begin = parsed_[i].value_begin;
476  std::string::const_iterator value_end = parsed_[i].value_end;
477  while (++i < parsed_.size() && parsed_[i].is_continuation())
478    value_end = parsed_[i].value_end;
479
480  value->assign(value_begin, value_end);
481
482  *iter = reinterpret_cast<void*>(i);
483  return true;
484}
485
486bool HttpResponseHeaders::EnumerateHeader(void** iter, const std::string& name,
487                                          std::string* value) const {
488  size_t i;
489  if (!iter || !*iter) {
490    i = FindHeader(0, name);
491  } else {
492    i = reinterpret_cast<size_t>(*iter);
493    if (i >= parsed_.size()) {
494      i = std::string::npos;
495    } else if (!parsed_[i].is_continuation()) {
496      i = FindHeader(i, name);
497    }
498  }
499
500  if (i == std::string::npos) {
501    value->clear();
502    return false;
503  }
504
505  if (iter)
506    *iter = reinterpret_cast<void*>(i + 1);
507  value->assign(parsed_[i].value_begin, parsed_[i].value_end);
508  return true;
509}
510
511bool HttpResponseHeaders::HasHeaderValue(const std::string& name,
512                                         const std::string& value) const {
513  // The value has to be an exact match.  This is important since
514  // 'cache-control: no-cache' != 'cache-control: no-cache="foo"'
515  void* iter = NULL;
516  std::string temp;
517  while (EnumerateHeader(&iter, name, &temp)) {
518    if (value.size() == temp.size() &&
519        std::equal(temp.begin(), temp.end(), value.begin(),
520                   base::CaseInsensitiveCompare<char>()))
521      return true;
522  }
523  return false;
524}
525
526bool HttpResponseHeaders::HasHeader(const std::string& name) const {
527  return FindHeader(0, name) != std::string::npos;
528}
529
530HttpResponseHeaders::HttpResponseHeaders() : response_code_(-1) {
531}
532
533HttpResponseHeaders::~HttpResponseHeaders() {
534}
535
536// Note: this implementation implicitly assumes that line_end points at a valid
537// sentinel character (such as '\0').
538// static
539HttpVersion HttpResponseHeaders::ParseVersion(
540    std::string::const_iterator line_begin,
541    std::string::const_iterator line_end) {
542  std::string::const_iterator p = line_begin;
543
544  // RFC2616 sec 3.1: HTTP-Version   = "HTTP" "/" 1*DIGIT "." 1*DIGIT
545  // TODO: (1*DIGIT apparently means one or more digits, but we only handle 1).
546  // TODO: handle leading zeros, which is allowed by the rfc1616 sec 3.1.
547
548  if ((line_end - p < 4) || !LowerCaseEqualsASCII(p, p + 4, "http")) {
549    DVLOG(1) << "missing status line";
550    return HttpVersion();
551  }
552
553  p += 4;
554
555  if (p >= line_end || *p != '/') {
556    DVLOG(1) << "missing version";
557    return HttpVersion();
558  }
559
560  std::string::const_iterator dot = find(p, line_end, '.');
561  if (dot == line_end) {
562    DVLOG(1) << "malformed version";
563    return HttpVersion();
564  }
565
566  ++p;  // from / to first digit.
567  ++dot;  // from . to second digit.
568
569  if (!(*p >= '0' && *p <= '9' && *dot >= '0' && *dot <= '9')) {
570    DVLOG(1) << "malformed version number";
571    return HttpVersion();
572  }
573
574  uint16 major = *p - '0';
575  uint16 minor = *dot - '0';
576
577  return HttpVersion(major, minor);
578}
579
580// Note: this implementation implicitly assumes that line_end points at a valid
581// sentinel character (such as '\0').
582void HttpResponseHeaders::ParseStatusLine(
583    std::string::const_iterator line_begin,
584    std::string::const_iterator line_end,
585    bool has_headers) {
586  // Extract the version number
587  parsed_http_version_ = ParseVersion(line_begin, line_end);
588
589  // Clamp the version number to one of: {0.9, 1.0, 1.1}
590  if (parsed_http_version_ == HttpVersion(0, 9) && !has_headers) {
591    http_version_ = HttpVersion(0, 9);
592    raw_headers_ = "HTTP/0.9";
593  } else if (parsed_http_version_ >= HttpVersion(1, 1)) {
594    http_version_ = HttpVersion(1, 1);
595    raw_headers_ = "HTTP/1.1";
596  } else {
597    // Treat everything else like HTTP 1.0
598    http_version_ = HttpVersion(1, 0);
599    raw_headers_ = "HTTP/1.0";
600  }
601  if (parsed_http_version_ != http_version_) {
602    DVLOG(1) << "assuming HTTP/" << http_version_.major_value() << "."
603             << http_version_.minor_value();
604  }
605
606  // TODO(eroman): this doesn't make sense if ParseVersion failed.
607  std::string::const_iterator p = find(line_begin, line_end, ' ');
608
609  if (p == line_end) {
610    DVLOG(1) << "missing response status; assuming 200 OK";
611    raw_headers_.append(" 200 OK");
612    raw_headers_.push_back('\0');
613    response_code_ = 200;
614    return;
615  }
616
617  // Skip whitespace.
618  while (*p == ' ')
619    ++p;
620
621  std::string::const_iterator code = p;
622  while (*p >= '0' && *p <= '9')
623    ++p;
624
625  if (p == code) {
626    DVLOG(1) << "missing response status number; assuming 200";
627    raw_headers_.append(" 200 OK");
628    response_code_ = 200;
629    return;
630  }
631  raw_headers_.push_back(' ');
632  raw_headers_.append(code, p);
633  raw_headers_.push_back(' ');
634  base::StringToInt(code, p, &response_code_);
635
636  // Skip whitespace.
637  while (*p == ' ')
638    ++p;
639
640  // Trim trailing whitespace.
641  while (line_end > p && line_end[-1] == ' ')
642    --line_end;
643
644  if (p == line_end) {
645    DVLOG(1) << "missing response status text; assuming OK";
646    // Not super critical what we put here. Just use "OK"
647    // even if it isn't descriptive of response_code_.
648    raw_headers_.append("OK");
649  } else {
650    raw_headers_.append(p, line_end);
651  }
652
653  raw_headers_.push_back('\0');
654}
655
656size_t HttpResponseHeaders::FindHeader(size_t from,
657                                       const std::string& search) const {
658  for (size_t i = from; i < parsed_.size(); ++i) {
659    if (parsed_[i].is_continuation())
660      continue;
661    const std::string::const_iterator& name_begin = parsed_[i].name_begin;
662    const std::string::const_iterator& name_end = parsed_[i].name_end;
663    if (static_cast<size_t>(name_end - name_begin) == search.size() &&
664        std::equal(name_begin, name_end, search.begin(),
665                   base::CaseInsensitiveCompare<char>()))
666      return i;
667  }
668
669  return std::string::npos;
670}
671
672void HttpResponseHeaders::AddHeader(std::string::const_iterator name_begin,
673                                    std::string::const_iterator name_end,
674                                    std::string::const_iterator values_begin,
675                                    std::string::const_iterator values_end) {
676  // If the header can be coalesced, then we should split it up.
677  if (values_begin == values_end ||
678      HttpUtil::IsNonCoalescingHeader(name_begin, name_end)) {
679    AddToParsed(name_begin, name_end, values_begin, values_end);
680  } else {
681    HttpUtil::ValuesIterator it(values_begin, values_end, ',');
682    while (it.GetNext()) {
683      AddToParsed(name_begin, name_end, it.value_begin(), it.value_end());
684      // clobber these so that subsequent values are treated as continuations
685      name_begin = name_end = raw_headers_.end();
686    }
687  }
688}
689
690void HttpResponseHeaders::AddToParsed(std::string::const_iterator name_begin,
691                                      std::string::const_iterator name_end,
692                                      std::string::const_iterator value_begin,
693                                      std::string::const_iterator value_end) {
694  ParsedHeader header;
695  header.name_begin = name_begin;
696  header.name_end = name_end;
697  header.value_begin = value_begin;
698  header.value_end = value_end;
699  parsed_.push_back(header);
700}
701
702void HttpResponseHeaders::AddNonCacheableHeaders(HeaderSet* result) const {
703  // Add server specified transients.  Any 'cache-control: no-cache="foo,bar"'
704  // headers present in the response specify additional headers that we should
705  // not store in the cache.
706  const std::string kCacheControl = "cache-control";
707  const std::string kPrefix = "no-cache=\"";
708  std::string value;
709  void* iter = NULL;
710  while (EnumerateHeader(&iter, kCacheControl, &value)) {
711    if (value.size() > kPrefix.size() &&
712        value.compare(0, kPrefix.size(), kPrefix) == 0) {
713      // if it doesn't end with a quote, then treat as malformed
714      if (value[value.size()-1] != '\"')
715        continue;
716
717      // trim off leading and trailing bits
718      size_t len = value.size() - kPrefix.size() - 1;
719      TrimString(value.substr(kPrefix.size(), len), HTTP_LWS, &value);
720
721      size_t begin_pos = 0;
722      for (;;) {
723        // find the end of this header name
724        size_t comma_pos = value.find(',', begin_pos);
725        if (comma_pos == std::string::npos)
726          comma_pos = value.size();
727        size_t end = comma_pos;
728        while (end > begin_pos && strchr(HTTP_LWS, value[end - 1]))
729          end--;
730
731        // assuming the header is not emtpy, lowercase and insert into set
732        if (end > begin_pos) {
733          std::string name = value.substr(begin_pos, end - begin_pos);
734          StringToLowerASCII(&name);
735          result->insert(name);
736        }
737
738        // repeat
739        begin_pos = comma_pos + 1;
740        while (begin_pos < value.size() && strchr(HTTP_LWS, value[begin_pos]))
741          begin_pos++;
742        if (begin_pos >= value.size())
743          break;
744      }
745    }
746  }
747}
748
749void HttpResponseHeaders::AddHopByHopHeaders(HeaderSet* result) {
750  for (size_t i = 0; i < arraysize(kHopByHopResponseHeaders); ++i)
751    result->insert(std::string(kHopByHopResponseHeaders[i]));
752}
753
754void HttpResponseHeaders::AddCookieHeaders(HeaderSet* result) {
755  for (size_t i = 0; i < arraysize(kCookieResponseHeaders); ++i)
756    result->insert(std::string(kCookieResponseHeaders[i]));
757}
758
759void HttpResponseHeaders::AddChallengeHeaders(HeaderSet* result) {
760  for (size_t i = 0; i < arraysize(kChallengeResponseHeaders); ++i)
761    result->insert(std::string(kChallengeResponseHeaders[i]));
762}
763
764void HttpResponseHeaders::AddHopContentRangeHeaders(HeaderSet* result) {
765  result->insert("content-range");
766}
767
768void HttpResponseHeaders::GetMimeTypeAndCharset(std::string* mime_type,
769                                                std::string* charset) const {
770  mime_type->clear();
771  charset->clear();
772
773  std::string name = "content-type";
774  std::string value;
775
776  bool had_charset = false;
777
778  void* iter = NULL;
779  while (EnumerateHeader(&iter, name, &value))
780    HttpUtil::ParseContentType(value, mime_type, charset, &had_charset);
781}
782
783bool HttpResponseHeaders::GetMimeType(std::string* mime_type) const {
784  std::string unused;
785  GetMimeTypeAndCharset(mime_type, &unused);
786  return !mime_type->empty();
787}
788
789bool HttpResponseHeaders::GetCharset(std::string* charset) const {
790  std::string unused;
791  GetMimeTypeAndCharset(&unused, charset);
792  return !charset->empty();
793}
794
795bool HttpResponseHeaders::IsRedirect(std::string* location) const {
796  if (!IsRedirectResponseCode(response_code_))
797    return false;
798
799  // If we lack a Location header, then we can't treat this as a redirect.
800  // We assume that the first non-empty location value is the target URL that
801  // we want to follow.  TODO(darin): Is this consistent with other browsers?
802  size_t i = std::string::npos;
803  do {
804    i = FindHeader(++i, "location");
805    if (i == std::string::npos)
806      return false;
807    // If the location value is empty, then it doesn't count.
808  } while (parsed_[i].value_begin == parsed_[i].value_end);
809
810  if (location) {
811    // Escape any non-ASCII characters to preserve them.  The server should
812    // only be returning ASCII here, but for compat we need to do this.
813    *location = EscapeNonASCII(
814        std::string(parsed_[i].value_begin, parsed_[i].value_end));
815  }
816
817  return true;
818}
819
820// static
821bool HttpResponseHeaders::IsRedirectResponseCode(int response_code) {
822  // Users probably want to see 300 (multiple choice) pages, so we don't count
823  // them as redirects that need to be followed.
824  return (response_code == 301 ||
825          response_code == 302 ||
826          response_code == 303 ||
827          response_code == 307);
828}
829
830// From RFC 2616 section 13.2.4:
831//
832// The calculation to determine if a response has expired is quite simple:
833//
834//   response_is_fresh = (freshness_lifetime > current_age)
835//
836// Of course, there are other factors that can force a response to always be
837// validated or re-fetched.
838//
839bool HttpResponseHeaders::RequiresValidation(const Time& request_time,
840                                             const Time& response_time,
841                                             const Time& current_time) const {
842  TimeDelta lifetime =
843      GetFreshnessLifetime(response_time);
844  if (lifetime == TimeDelta())
845    return true;
846
847  return lifetime <= GetCurrentAge(request_time, response_time, current_time);
848}
849
850// From RFC 2616 section 13.2.4:
851//
852// The max-age directive takes priority over Expires, so if max-age is present
853// in a response, the calculation is simply:
854//
855//   freshness_lifetime = max_age_value
856//
857// Otherwise, if Expires is present in the response, the calculation is:
858//
859//   freshness_lifetime = expires_value - date_value
860//
861// Note that neither of these calculations is vulnerable to clock skew, since
862// all of the information comes from the origin server.
863//
864// Also, if the response does have a Last-Modified time, the heuristic
865// expiration value SHOULD be no more than some fraction of the interval since
866// that time. A typical setting of this fraction might be 10%:
867//
868//   freshness_lifetime = (date_value - last_modified_value) * 0.10
869//
870TimeDelta HttpResponseHeaders::GetFreshnessLifetime(
871    const Time& response_time) const {
872  // Check for headers that force a response to never be fresh.  For backwards
873  // compat, we treat "Pragma: no-cache" as a synonym for "Cache-Control:
874  // no-cache" even though RFC 2616 does not specify it.
875  if (HasHeaderValue("cache-control", "no-cache") ||
876      HasHeaderValue("cache-control", "no-store") ||
877      HasHeaderValue("pragma", "no-cache") ||
878      HasHeaderValue("vary", "*"))  // see RFC 2616 section 13.6
879    return TimeDelta();  // not fresh
880
881  // NOTE: "Cache-Control: max-age" overrides Expires, so we only check the
882  // Expires header after checking for max-age in GetFreshnessLifetime.  This
883  // is important since "Expires: <date in the past>" means not fresh, but
884  // it should not trump a max-age value.
885
886  TimeDelta max_age_value;
887  if (GetMaxAgeValue(&max_age_value))
888    return max_age_value;
889
890  // If there is no Date header, then assume that the server response was
891  // generated at the time when we received the response.
892  Time date_value;
893  if (!GetDateValue(&date_value))
894    date_value = response_time;
895
896  Time expires_value;
897  if (GetExpiresValue(&expires_value)) {
898    // The expires value can be a date in the past!
899    if (expires_value > date_value)
900      return expires_value - date_value;
901
902    return TimeDelta();  // not fresh
903  }
904
905  // From RFC 2616 section 13.4:
906  //
907  //   A response received with a status code of 200, 203, 206, 300, 301 or 410
908  //   MAY be stored by a cache and used in reply to a subsequent request,
909  //   subject to the expiration mechanism, unless a cache-control directive
910  //   prohibits caching.
911  //   ...
912  //   A response received with any other status code (e.g. status codes 302
913  //   and 307) MUST NOT be returned in a reply to a subsequent request unless
914  //   there are cache-control directives or another header(s) that explicitly
915  //   allow it.
916  //
917  // From RFC 2616 section 14.9.4:
918  //
919  //   When the must-revalidate directive is present in a response received by
920  //   a cache, that cache MUST NOT use the entry after it becomes stale to
921  //   respond to a subsequent request without first revalidating it with the
922  //   origin server. (I.e., the cache MUST do an end-to-end revalidation every
923  //   time, if, based solely on the origin server's Expires or max-age value,
924  //   the cached response is stale.)
925  //
926  if ((response_code_ == 200 || response_code_ == 203 ||
927       response_code_ == 206) &&
928      !HasHeaderValue("cache-control", "must-revalidate")) {
929    // TODO(darin): Implement a smarter heuristic.
930    Time last_modified_value;
931    if (GetLastModifiedValue(&last_modified_value)) {
932      // The last-modified value can be a date in the past!
933      if (last_modified_value <= date_value)
934        return (date_value - last_modified_value) / 10;
935    }
936  }
937
938  // These responses are implicitly fresh (unless otherwise overruled):
939  if (response_code_ == 300 || response_code_ == 301 || response_code_ == 410)
940    return TimeDelta::FromMicroseconds(kint64max);
941
942  return TimeDelta();  // not fresh
943}
944
945// From RFC 2616 section 13.2.3:
946//
947// Summary of age calculation algorithm, when a cache receives a response:
948//
949//   /*
950//    * age_value
951//    *      is the value of Age: header received by the cache with
952//    *              this response.
953//    * date_value
954//    *      is the value of the origin server's Date: header
955//    * request_time
956//    *      is the (local) time when the cache made the request
957//    *              that resulted in this cached response
958//    * response_time
959//    *      is the (local) time when the cache received the
960//    *              response
961//    * now
962//    *      is the current (local) time
963//    */
964//   apparent_age = max(0, response_time - date_value);
965//   corrected_received_age = max(apparent_age, age_value);
966//   response_delay = response_time - request_time;
967//   corrected_initial_age = corrected_received_age + response_delay;
968//   resident_time = now - response_time;
969//   current_age   = corrected_initial_age + resident_time;
970//
971TimeDelta HttpResponseHeaders::GetCurrentAge(const Time& request_time,
972                                             const Time& response_time,
973                                             const Time& current_time) const {
974  // If there is no Date header, then assume that the server response was
975  // generated at the time when we received the response.
976  Time date_value;
977  if (!GetDateValue(&date_value))
978    date_value = response_time;
979
980  // If there is no Age header, then assume age is zero.  GetAgeValue does not
981  // modify its out param if the value does not exist.
982  TimeDelta age_value;
983  GetAgeValue(&age_value);
984
985  TimeDelta apparent_age = std::max(TimeDelta(), response_time - date_value);
986  TimeDelta corrected_received_age = std::max(apparent_age, age_value);
987  TimeDelta response_delay = response_time - request_time;
988  TimeDelta corrected_initial_age = corrected_received_age + response_delay;
989  TimeDelta resident_time = current_time - response_time;
990  TimeDelta current_age = corrected_initial_age + resident_time;
991
992  return current_age;
993}
994
995bool HttpResponseHeaders::GetMaxAgeValue(TimeDelta* result) const {
996  std::string name = "cache-control";
997  std::string value;
998
999  const char kMaxAgePrefix[] = "max-age=";
1000  const size_t kMaxAgePrefixLen = arraysize(kMaxAgePrefix) - 1;
1001
1002  void* iter = NULL;
1003  while (EnumerateHeader(&iter, name, &value)) {
1004    if (value.size() > kMaxAgePrefixLen) {
1005      if (LowerCaseEqualsASCII(value.begin(),
1006                               value.begin() + kMaxAgePrefixLen,
1007                               kMaxAgePrefix)) {
1008        int64 seconds;
1009        base::StringToInt64(value.begin() + kMaxAgePrefixLen,
1010                            value.end(),
1011                            &seconds);
1012        *result = TimeDelta::FromSeconds(seconds);
1013        return true;
1014      }
1015    }
1016  }
1017
1018  return false;
1019}
1020
1021bool HttpResponseHeaders::GetAgeValue(TimeDelta* result) const {
1022  std::string value;
1023  if (!EnumerateHeader(NULL, "Age", &value))
1024    return false;
1025
1026  int64 seconds;
1027  base::StringToInt64(value, &seconds);
1028  *result = TimeDelta::FromSeconds(seconds);
1029  return true;
1030}
1031
1032bool HttpResponseHeaders::GetDateValue(Time* result) const {
1033  return GetTimeValuedHeader("Date", result);
1034}
1035
1036bool HttpResponseHeaders::GetLastModifiedValue(Time* result) const {
1037  return GetTimeValuedHeader("Last-Modified", result);
1038}
1039
1040bool HttpResponseHeaders::GetExpiresValue(Time* result) const {
1041  return GetTimeValuedHeader("Expires", result);
1042}
1043
1044bool HttpResponseHeaders::GetTimeValuedHeader(const std::string& name,
1045                                              Time* result) const {
1046  std::string value;
1047  if (!EnumerateHeader(NULL, name, &value))
1048    return false;
1049
1050  std::wstring value_wide(value.begin(), value.end());  // inflate ascii
1051  return Time::FromString(value_wide.c_str(), result);
1052}
1053
1054bool HttpResponseHeaders::IsKeepAlive() const {
1055  if (http_version_ < HttpVersion(1, 0))
1056    return false;
1057
1058  // NOTE: It is perhaps risky to assume that a Proxy-Connection header is
1059  // meaningful when we don't know that this response was from a proxy, but
1060  // Mozilla also does this, so we'll do the same.
1061  std::string connection_val;
1062  if (!EnumerateHeader(NULL, "connection", &connection_val))
1063    EnumerateHeader(NULL, "proxy-connection", &connection_val);
1064
1065  bool keep_alive;
1066
1067  if (http_version_ == HttpVersion(1, 0)) {
1068    // HTTP/1.0 responses default to NOT keep-alive
1069    keep_alive = LowerCaseEqualsASCII(connection_val, "keep-alive");
1070  } else {
1071    // HTTP/1.1 responses default to keep-alive
1072    keep_alive = !LowerCaseEqualsASCII(connection_val, "close");
1073  }
1074
1075  return keep_alive;
1076}
1077
1078bool HttpResponseHeaders::HasStrongValidators() const {
1079  std::string etag_value;
1080  EnumerateHeader(NULL, "etag", &etag_value);
1081  if (!etag_value.empty()) {
1082    size_t slash = etag_value.find('/');
1083    if (slash == std::string::npos || slash == 0)
1084      return true;
1085
1086    std::string::const_iterator i = etag_value.begin();
1087    std::string::const_iterator j = etag_value.begin() + slash;
1088    HttpUtil::TrimLWS(&i, &j);
1089    if (!LowerCaseEqualsASCII(i, j, "w"))
1090      return true;
1091  }
1092
1093  Time last_modified;
1094  if (!GetLastModifiedValue(&last_modified))
1095    return false;
1096
1097  Time date;
1098  if (!GetDateValue(&date))
1099    return false;
1100
1101  return ((date - last_modified).InSeconds() >= 60);
1102}
1103
1104// From RFC 2616:
1105// Content-Length = "Content-Length" ":" 1*DIGIT
1106int64 HttpResponseHeaders::GetContentLength() const {
1107  void* iter = NULL;
1108  std::string content_length_val;
1109  if (!EnumerateHeader(&iter, "content-length", &content_length_val))
1110    return -1;
1111
1112  if (content_length_val.empty())
1113    return -1;
1114
1115  if (content_length_val[0] == '+')
1116    return -1;
1117
1118  int64 result;
1119  bool ok = base::StringToInt64(content_length_val, &result);
1120  if (!ok || result < 0)
1121    return -1;
1122
1123  return result;
1124}
1125
1126// From RFC 2616 14.16:
1127// content-range-spec =
1128//     bytes-unit SP byte-range-resp-spec "/" ( instance-length | "*" )
1129// byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) | "*"
1130// instance-length = 1*DIGIT
1131// bytes-unit = "bytes"
1132bool HttpResponseHeaders::GetContentRange(int64* first_byte_position,
1133                                          int64* last_byte_position,
1134                                          int64* instance_length) const {
1135  void* iter = NULL;
1136  std::string content_range_spec;
1137  *first_byte_position = *last_byte_position = *instance_length = -1;
1138  if (!EnumerateHeader(&iter, "content-range", &content_range_spec))
1139    return false;
1140
1141  // If the header value is empty, we have an invalid header.
1142  if (content_range_spec.empty())
1143    return false;
1144
1145  size_t space_position = content_range_spec.find(' ');
1146  if (space_position == std::string::npos)
1147    return false;
1148
1149  // Invalid header if it doesn't contain "bytes-unit".
1150  std::string::const_iterator content_range_spec_begin =
1151      content_range_spec.begin();
1152  std::string::const_iterator content_range_spec_end =
1153      content_range_spec.begin() + space_position;
1154  HttpUtil::TrimLWS(&content_range_spec_begin, &content_range_spec_end);
1155  if (!LowerCaseEqualsASCII(content_range_spec_begin,
1156                            content_range_spec_end,
1157                            "bytes")) {
1158    return false;
1159  }
1160
1161  size_t slash_position = content_range_spec.find('/', space_position + 1);
1162  if (slash_position == std::string::npos)
1163    return false;
1164
1165  // Obtain the part behind the space and before slash.
1166  std::string::const_iterator byte_range_resp_spec_begin =
1167      content_range_spec.begin() + space_position + 1;
1168  std::string::const_iterator byte_range_resp_spec_end =
1169      content_range_spec.begin() + slash_position;
1170  HttpUtil::TrimLWS(&byte_range_resp_spec_begin, &byte_range_resp_spec_end);
1171
1172  // Parse the byte-range-resp-spec part.
1173  std::string byte_range_resp_spec(byte_range_resp_spec_begin,
1174                                   byte_range_resp_spec_end);
1175  // If byte-range-resp-spec != "*".
1176  if (!LowerCaseEqualsASCII(byte_range_resp_spec, "*")) {
1177    size_t minus_position = byte_range_resp_spec.find('-');
1178    if (minus_position != std::string::npos) {
1179      // Obtain first-byte-pos.
1180      std::string::const_iterator first_byte_pos_begin =
1181          byte_range_resp_spec.begin();
1182      std::string::const_iterator first_byte_pos_end =
1183          byte_range_resp_spec.begin() + minus_position;
1184      HttpUtil::TrimLWS(&first_byte_pos_begin, &first_byte_pos_end);
1185
1186      bool ok = base::StringToInt64(first_byte_pos_begin,
1187                                    first_byte_pos_end,
1188                                    first_byte_position);
1189
1190      // Obtain last-byte-pos.
1191      std::string::const_iterator last_byte_pos_begin =
1192          byte_range_resp_spec.begin() + minus_position + 1;
1193      std::string::const_iterator last_byte_pos_end =
1194          byte_range_resp_spec.end();
1195      HttpUtil::TrimLWS(&last_byte_pos_begin, &last_byte_pos_end);
1196
1197      ok &= base::StringToInt64(last_byte_pos_begin,
1198                                last_byte_pos_end,
1199                                last_byte_position);
1200      if (!ok) {
1201        *first_byte_position = *last_byte_position = -1;
1202        return false;
1203      }
1204      if (*first_byte_position < 0 || *last_byte_position < 0 ||
1205          *first_byte_position > *last_byte_position)
1206        return false;
1207    } else {
1208      return false;
1209    }
1210  }
1211
1212  // Parse the instance-length part.
1213  // If instance-length == "*".
1214  std::string::const_iterator instance_length_begin =
1215      content_range_spec.begin() + slash_position + 1;
1216  std::string::const_iterator instance_length_end =
1217      content_range_spec.end();
1218  HttpUtil::TrimLWS(&instance_length_begin, &instance_length_end);
1219
1220  if (LowerCaseEqualsASCII(instance_length_begin, instance_length_end, "*")) {
1221    return false;
1222  } else if (!base::StringToInt64(instance_length_begin,
1223                                  instance_length_end,
1224                                  instance_length)) {
1225    *instance_length = -1;
1226    return false;
1227  }
1228
1229  // We have all the values; let's verify that they make sense for a 206
1230  // response.
1231  if (*first_byte_position < 0 || *last_byte_position < 0 ||
1232      *instance_length < 0 || *instance_length - 1 < *last_byte_position)
1233    return false;
1234
1235  return true;
1236}
1237
1238}  // namespace net
1239