1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef NET_HTTP_HTTP_RESPONSE_HEADERS_H_
6#define NET_HTTP_HTTP_RESPONSE_HEADERS_H_
7
8#include <string>
9#include <vector>
10
11#include "base/basictypes.h"
12#include "base/containers/hash_tables.h"
13#include "base/memory/ref_counted.h"
14#include "base/strings/string_piece.h"
15#include "net/base/net_export.h"
16#include "net/base/net_log.h"
17#include "net/http/http_version.h"
18
19class Pickle;
20class PickleIterator;
21
22namespace base {
23class Time;
24class TimeDelta;
25}
26
27namespace net {
28
29// HttpResponseHeaders: parses and holds HTTP response headers.
30class NET_EXPORT HttpResponseHeaders
31    : public base::RefCountedThreadSafe<HttpResponseHeaders> {
32 public:
33  // Persist options.
34  typedef int PersistOptions;
35  static const PersistOptions PERSIST_RAW = -1;  // Raw, unparsed headers.
36  static const PersistOptions PERSIST_ALL = 0;  // Parsed headers.
37  static const PersistOptions PERSIST_SANS_COOKIES = 1 << 0;
38  static const PersistOptions PERSIST_SANS_CHALLENGES = 1 << 1;
39  static const PersistOptions PERSIST_SANS_HOP_BY_HOP = 1 << 2;
40  static const PersistOptions PERSIST_SANS_NON_CACHEABLE = 1 << 3;
41  static const PersistOptions PERSIST_SANS_RANGES = 1 << 4;
42  static const PersistOptions PERSIST_SANS_SECURITY_STATE = 1 << 5;
43
44  // Parses the given raw_headers.  raw_headers should be formatted thus:
45  // includes the http status response line, each line is \0-terminated, and
46  // it's terminated by an empty line (ie, 2 \0s in a row).
47  // (Note that line continuations should have already been joined;
48  // see HttpUtil::AssembleRawHeaders)
49  //
50  // HttpResponseHeaders does not perform any encoding changes on the input.
51  //
52  explicit HttpResponseHeaders(const std::string& raw_headers);
53
54  // Initializes from the representation stored in the given pickle.  The data
55  // for this object is found relative to the given pickle_iter, which should
56  // be passed to the pickle's various Read* methods.
57  HttpResponseHeaders(const Pickle& pickle, PickleIterator* pickle_iter);
58
59  // Appends a representation of this object to the given pickle.
60  // The options argument can be a combination of PersistOptions.
61  void Persist(Pickle* pickle, PersistOptions options);
62
63  // Performs header merging as described in 13.5.3 of RFC 2616.
64  void Update(const HttpResponseHeaders& new_headers);
65
66  // Removes all instances of a particular header.
67  void RemoveHeader(const std::string& name);
68
69  // Removes a particular header line. The header name is compared
70  // case-insensitively.
71  void RemoveHeaderLine(const std::string& name, const std::string& value);
72
73  // Adds a particular header.  |header| has to be a single header without any
74  // EOL termination, just [<header-name>: <header-values>]
75  // If a header with the same name is already stored, the two headers are not
76  // merged together by this method; the one provided is simply put at the
77  // end of the list.
78  void AddHeader(const std::string& header);
79
80  // Replaces the current status line with the provided one (|new_status| should
81  // not have any EOL).
82  void ReplaceStatusLine(const std::string& new_status);
83
84  // Creates a normalized header string.  The output will be formatted exactly
85  // like so:
86  //     HTTP/<version> <status_code> <status_text>\n
87  //     [<header-name>: <header-values>\n]*
88  // meaning, each line is \n-terminated, and there is no extra whitespace
89  // beyond the single space separators shown (of course, values can contain
90  // whitespace within them).  If a given header-name appears more than once
91  // in the set of headers, they are combined into a single line like so:
92  //     <header-name>: <header-value1>, <header-value2>, ...<header-valueN>\n
93  //
94  // DANGER: For some headers (e.g., "Set-Cookie"), the normalized form can be
95  // a lossy format.  This is due to the fact that some servers generate
96  // Set-Cookie headers that contain unquoted commas (usually as part of the
97  // value of an "expires" attribute).  So, use this function with caution.  Do
98  // not expect to be able to re-parse Set-Cookie headers from this output.
99  //
100  // NOTE: Do not make any assumptions about the encoding of this output
101  // string.  It may be non-ASCII, and the encoding used by the server is not
102  // necessarily known to us.  Do not assume that this output is UTF-8!
103  //
104  // TODO(darin): remove this method
105  //
106  void GetNormalizedHeaders(std::string* output) const;
107
108  // Fetch the "normalized" value of a single header, where all values for the
109  // header name are separated by commas.  See the GetNormalizedHeaders for
110  // format details.  Returns false if this header wasn't found.
111  //
112  // NOTE: Do not make any assumptions about the encoding of this output
113  // string.  It may be non-ASCII, and the encoding used by the server is not
114  // necessarily known to us.  Do not assume that this output is UTF-8!
115  //
116  // TODO(darin): remove this method
117  //
118  bool GetNormalizedHeader(const std::string& name, std::string* value) const;
119
120  // Returns the normalized status line.  For HTTP/0.9 responses (i.e.,
121  // responses that lack a status line), this is the manufactured string
122  // "HTTP/0.9 200 OK".
123  std::string GetStatusLine() const;
124
125  // Get the HTTP version of the normalized status line.
126  HttpVersion GetHttpVersion() const {
127    return http_version_;
128  }
129
130  // Get the HTTP version determined while parsing; or (0,0) if parsing failed
131  HttpVersion GetParsedHttpVersion() const {
132    return parsed_http_version_;
133  }
134
135  // Get the HTTP status text of the normalized status line.
136  std::string GetStatusText() const;
137
138  // Enumerate the "lines" of the response headers.  This skips over the status
139  // line.  Use GetStatusLine if you are interested in that.  Note that this
140  // method returns the un-coalesced response header lines, so if a response
141  // header appears on multiple lines, then it will appear multiple times in
142  // this enumeration (in the order the header lines were received from the
143  // server).  Also, a given header might have an empty value.  Initialize a
144  // 'void*' variable to NULL and pass it by address to EnumerateHeaderLines.
145  // Call EnumerateHeaderLines repeatedly until it returns false.  The
146  // out-params 'name' and 'value' are set upon success.
147  bool EnumerateHeaderLines(void** iter,
148                            std::string* name,
149                            std::string* value) const;
150
151  // Enumerate the values of the specified header.   If you are only interested
152  // in the first header, then you can pass NULL for the 'iter' parameter.
153  // Otherwise, to iterate across all values for the specified header,
154  // initialize a 'void*' variable to NULL and pass it by address to
155  // EnumerateHeader. Note that a header might have an empty value. Call
156  // EnumerateHeader repeatedly until it returns false.
157  bool EnumerateHeader(void** iter,
158                       const base::StringPiece& name,
159                       std::string* value) const;
160
161  // Returns true if the response contains the specified header-value pair.
162  // Both name and value are compared case insensitively.
163  bool HasHeaderValue(const base::StringPiece& name,
164                      const base::StringPiece& value) const;
165
166  // Returns true if the response contains the specified header.
167  // The name is compared case insensitively.
168  bool HasHeader(const base::StringPiece& name) const;
169
170  // Get the mime type and charset values in lower case form from the headers.
171  // Empty strings are returned if the values are not present.
172  void GetMimeTypeAndCharset(std::string* mime_type,
173                             std::string* charset) const;
174
175  // Get the mime type in lower case from the headers.  If there's no mime
176  // type, returns false.
177  bool GetMimeType(std::string* mime_type) const;
178
179  // Get the charset in lower case from the headers.  If there's no charset,
180  // returns false.
181  bool GetCharset(std::string* charset) const;
182
183  // Returns true if this response corresponds to a redirect.  The target
184  // location of the redirect is optionally returned if location is non-null.
185  bool IsRedirect(std::string* location) const;
186
187  // Returns true if the HTTP response code passed in corresponds to a
188  // redirect.
189  static bool IsRedirectResponseCode(int response_code);
190
191  // Returns true if the response cannot be reused without validation.  The
192  // result is relative to the current_time parameter, which is a parameter to
193  // support unit testing.  The request_time parameter indicates the time at
194  // which the request was made that resulted in this response, which was
195  // received at response_time.
196  bool RequiresValidation(const base::Time& request_time,
197                          const base::Time& response_time,
198                          const base::Time& current_time) const;
199
200  // Returns the amount of time the server claims the response is fresh from
201  // the time the response was generated.  See section 13.2.4 of RFC 2616.  See
202  // RequiresValidation for a description of the response_time parameter.
203  base::TimeDelta GetFreshnessLifetime(const base::Time& response_time) const;
204
205  // Returns the age of the response.  See section 13.2.3 of RFC 2616.
206  // See RequiresValidation for a description of this method's parameters.
207  base::TimeDelta GetCurrentAge(const base::Time& request_time,
208                                const base::Time& response_time,
209                                const base::Time& current_time) const;
210
211  // The following methods extract values from the response headers.  If a
212  // value is not present, then false is returned.  Otherwise, true is returned
213  // and the out param is assigned to the corresponding value.
214  bool GetMaxAgeValue(base::TimeDelta* value) const;
215  bool GetAgeValue(base::TimeDelta* value) const;
216  bool GetDateValue(base::Time* value) const;
217  bool GetLastModifiedValue(base::Time* value) const;
218  bool GetExpiresValue(base::Time* value) const;
219
220  // Extracts the time value of a particular header.  This method looks for the
221  // first matching header value and parses its value as a HTTP-date.
222  bool GetTimeValuedHeader(const std::string& name, base::Time* result) const;
223
224  // Determines if this response indicates a keep-alive connection.
225  bool IsKeepAlive() const;
226
227  // Returns true if this response has a strong etag or last-modified header.
228  // See section 13.3.3 of RFC 2616.
229  bool HasStrongValidators() const;
230
231  // Extracts the value of the Content-Length header or returns -1 if there is
232  // no such header in the response.
233  int64 GetContentLength() const;
234
235  // Extracts the value of the specified header or returns -1 if there is no
236  // such header in the response.
237  int64 GetInt64HeaderValue(const std::string& header) const;
238
239  // Extracts the values in a Content-Range header and returns true if they are
240  // valid for a 206 response; otherwise returns false.
241  // The following values will be outputted:
242  // |*first_byte_position| = inclusive position of the first byte of the range
243  // |*last_byte_position| = inclusive position of the last byte of the range
244  // |*instance_length| = size in bytes of the object requested
245  // If any of the above values is unknown, its value will be -1.
246  bool GetContentRange(int64* first_byte_position,
247                       int64* last_byte_position,
248                       int64* instance_length) const;
249
250  // Returns true if the response is chunk-encoded.
251  bool IsChunkEncoded() const;
252
253  // Creates a Value for use with the NetLog containing the response headers.
254  base::Value* NetLogCallback(NetLog::LogLevel log_level) const;
255
256  // Takes in a Value created by the above function, and attempts to create a
257  // copy of the original headers.  Returns true on success.  On failure,
258  // clears |http_response_headers|.
259  // TODO(mmenke):  Long term, we want to remove this, and migrate external
260  //                consumers to be NetworkDelegates.
261  static bool FromNetLogParam(
262      const base::Value* event_param,
263      scoped_refptr<HttpResponseHeaders>* http_response_headers);
264
265  // Returns the HTTP response code.  This is 0 if the response code text seems
266  // to exist but could not be parsed.  Otherwise, it defaults to 200 if the
267  // response code is not found in the raw headers.
268  int response_code() const { return response_code_; }
269
270  // Returns the raw header string.
271  const std::string& raw_headers() const { return raw_headers_; }
272
273 private:
274  friend class base::RefCountedThreadSafe<HttpResponseHeaders>;
275
276  typedef base::hash_set<std::string> HeaderSet;
277
278  // The members of this structure point into raw_headers_.
279  struct ParsedHeader;
280  typedef std::vector<ParsedHeader> HeaderList;
281
282  HttpResponseHeaders();
283  ~HttpResponseHeaders();
284
285  // Initializes from the given raw headers.
286  void Parse(const std::string& raw_input);
287
288  // Helper function for ParseStatusLine.
289  // Tries to extract the "HTTP/X.Y" from a status line formatted like:
290  //    HTTP/1.1 200 OK
291  // with line_begin and end pointing at the begin and end of this line.  If the
292  // status line is malformed, returns HttpVersion(0,0).
293  static HttpVersion ParseVersion(std::string::const_iterator line_begin,
294                                  std::string::const_iterator line_end);
295
296  // Tries to extract the status line from a header block, given the first
297  // line of said header block.  If the status line is malformed, we'll
298  // construct a valid one.  Example input:
299  //    HTTP/1.1 200 OK
300  // with line_begin and end pointing at the begin and end of this line.
301  // Output will be a normalized version of this.
302  void ParseStatusLine(std::string::const_iterator line_begin,
303                       std::string::const_iterator line_end,
304                       bool has_headers);
305
306  // Find the header in our list (case-insensitive) starting with parsed_ at
307  // index |from|.  Returns string::npos if not found.
308  size_t FindHeader(size_t from, const base::StringPiece& name) const;
309
310  // Add a header->value pair to our list.  If we already have header in our
311  // list, append the value to it.
312  void AddHeader(std::string::const_iterator name_begin,
313                 std::string::const_iterator name_end,
314                 std::string::const_iterator value_begin,
315                 std::string::const_iterator value_end);
316
317  // Add to parsed_ given the fields of a ParsedHeader object.
318  void AddToParsed(std::string::const_iterator name_begin,
319                   std::string::const_iterator name_end,
320                   std::string::const_iterator value_begin,
321                   std::string::const_iterator value_end);
322
323  // Replaces the current headers with the merged version of |raw_headers| and
324  // the current headers without the headers in |headers_to_remove|. Note that
325  // |headers_to_remove| are removed from the current headers (before the
326  // merge), not after the merge.
327  void MergeWithHeaders(const std::string& raw_headers,
328                        const HeaderSet& headers_to_remove);
329
330  // Adds the values from any 'cache-control: no-cache="foo,bar"' headers.
331  void AddNonCacheableHeaders(HeaderSet* header_names) const;
332
333  // Adds the set of header names that contain cookie values.
334  static void AddSensitiveHeaders(HeaderSet* header_names);
335
336  // Adds the set of rfc2616 hop-by-hop response headers.
337  static void AddHopByHopHeaders(HeaderSet* header_names);
338
339  // Adds the set of challenge response headers.
340  static void AddChallengeHeaders(HeaderSet* header_names);
341
342  // Adds the set of cookie response headers.
343  static void AddCookieHeaders(HeaderSet* header_names);
344
345  // Adds the set of content range response headers.
346  static void AddHopContentRangeHeaders(HeaderSet* header_names);
347
348  // Adds the set of transport security state headers.
349  static void AddSecurityStateHeaders(HeaderSet* header_names);
350
351  // We keep a list of ParsedHeader objects.  These tell us where to locate the
352  // header-value pairs within raw_headers_.
353  HeaderList parsed_;
354
355  // The raw_headers_ consists of the normalized status line (terminated with a
356  // null byte) and then followed by the raw null-terminated headers from the
357  // input that was passed to our constructor.  We preserve the input [*] to
358  // maintain as much ancillary fidelity as possible (since it is sometimes
359  // hard to tell what may matter down-stream to a consumer of XMLHttpRequest).
360  // [*] The status line may be modified.
361  std::string raw_headers_;
362
363  // This is the parsed HTTP response code.
364  int response_code_;
365
366  // The normalized http version (consistent with what GetStatusLine() returns).
367  HttpVersion http_version_;
368
369  // The parsed http version number (not normalized).
370  HttpVersion parsed_http_version_;
371
372  DISALLOW_COPY_AND_ASSIGN(HttpResponseHeaders);
373};
374
375}  // namespace net
376
377#endif  // NET_HTTP_HTTP_RESPONSE_HEADERS_H_
378