1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/child/multipart_response_delegate.h"
6
7#include "base/logging.h"
8#include "base/strings/string_number_conversions.h"
9#include "base/strings/string_util.h"
10#include "net/base/net_util.h"
11#include "net/http/http_util.h"
12#include "third_party/WebKit/public/platform/WebHTTPHeaderVisitor.h"
13#include "third_party/WebKit/public/platform/WebString.h"
14#include "third_party/WebKit/public/platform/WebURL.h"
15#include "third_party/WebKit/public/platform/WebURLLoaderClient.h"
16
17using blink::WebHTTPHeaderVisitor;
18using blink::WebString;
19using blink::WebURLLoader;
20using blink::WebURLLoaderClient;
21using blink::WebURLResponse;
22
23namespace content {
24
25namespace {
26
27// The list of response headers that we do not copy from the original
28// response when generating a WebURLResponse for a MIME payload.
29const char* kReplaceHeaders[] = {
30  "content-type",
31  "content-length",
32  "content-disposition",
33  "content-range",
34  "range",
35  "set-cookie"
36};
37
38class HeaderCopier : public WebHTTPHeaderVisitor {
39 public:
40  HeaderCopier(WebURLResponse* response)
41      : response_(response) {
42  }
43  virtual void visitHeader(const WebString& name, const WebString& value) {
44    const std::string& name_utf8 = name.utf8();
45    for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
46      if (LowerCaseEqualsASCII(name_utf8, kReplaceHeaders[i]))
47        return;
48    }
49    response_->setHTTPHeaderField(name, value);
50  }
51 private:
52  WebURLResponse* response_;
53};
54
55}  // namespace
56
57MultipartResponseDelegate::MultipartResponseDelegate(
58    WebURLLoaderClient* client,
59    WebURLLoader* loader,
60    const WebURLResponse& response,
61    const std::string& boundary)
62    : client_(client),
63      loader_(loader),
64      original_response_(response),
65      encoded_data_length_(0),
66      boundary_("--"),
67      first_received_data_(true),
68      processing_headers_(false),
69      stop_sending_(false),
70      has_sent_first_response_(false) {
71  // Some servers report a boundary prefixed with "--".  See bug 5786.
72  if (StartsWithASCII(boundary, "--", true)) {
73    boundary_.assign(boundary);
74  } else {
75    boundary_.append(boundary);
76  }
77}
78
79void MultipartResponseDelegate::OnReceivedData(const char* data,
80                                               int data_len,
81                                               int encoded_data_length) {
82  // stop_sending_ means that we've already received the final boundary token.
83  // The server should stop sending us data at this point, but if it does, we
84  // just throw it away.
85  if (stop_sending_)
86    return;
87
88  data_.append(data, data_len);
89  encoded_data_length_ += encoded_data_length;
90  if (first_received_data_) {
91    // Some servers don't send a boundary token before the first chunk of
92    // data.  We handle this case anyway (Gecko does too).
93    first_received_data_ = false;
94
95    // Eat leading \r\n
96    int pos = PushOverLine(data_, 0);
97    if (pos)
98      data_ = data_.substr(pos);
99
100    if (data_.length() < boundary_.length() + 2) {
101      // We don't have enough data yet to make a boundary token.  Just wait
102      // until the next chunk of data arrives.
103      first_received_data_ = true;
104      return;
105    }
106
107    if (0 != data_.compare(0, boundary_.length(), boundary_)) {
108      data_ = boundary_ + "\n" + data_;
109    }
110  }
111  DCHECK(!first_received_data_);
112
113  // Headers
114  if (processing_headers_) {
115    // Eat leading \r\n
116    int pos = PushOverLine(data_, 0);
117    if (pos)
118      data_ = data_.substr(pos);
119
120    if (ParseHeaders()) {
121      // Successfully parsed headers.
122      processing_headers_ = false;
123    } else {
124      // Get more data before trying again.
125      return;
126    }
127  }
128  DCHECK(!processing_headers_);
129
130  size_t boundary_pos;
131  while ((boundary_pos = FindBoundary()) != std::string::npos) {
132    if (client_) {
133      // Strip out trailing \n\r characters in the buffer preceding the
134      // boundary on the same lines as Firefox.
135      size_t data_length = boundary_pos;
136      if (boundary_pos > 0 && data_[boundary_pos - 1] == '\n') {
137        data_length--;
138        if (boundary_pos > 1 && data_[boundary_pos - 2] == '\r') {
139          data_length--;
140        }
141      }
142      if (data_length > 0) {
143        // Send the last data chunk.
144        client_->didReceiveData(loader_,
145                                data_.data(),
146                                static_cast<int>(data_length),
147                                encoded_data_length_);
148        encoded_data_length_ = 0;
149      }
150    }
151    size_t boundary_end_pos = boundary_pos + boundary_.length();
152    if (boundary_end_pos < data_.length() && '-' == data_[boundary_end_pos]) {
153      // This was the last boundary so we can stop processing.
154      stop_sending_ = true;
155      data_.clear();
156      return;
157    }
158
159    // We can now throw out data up through the boundary
160    int offset = PushOverLine(data_, boundary_end_pos);
161    data_ = data_.substr(boundary_end_pos + offset);
162
163    // Ok, back to parsing headers
164    if (!ParseHeaders()) {
165      processing_headers_ = true;
166      break;
167    }
168  }
169
170  // At this point, we should send over any data we have, but keep enough data
171  // buffered to handle a boundary that may have been truncated.
172  if (!processing_headers_ && data_.length() > boundary_.length()) {
173    // If the last character is a new line character, go ahead and just send
174    // everything we have buffered.  This matches an optimization in Gecko.
175    int send_length = data_.length() - boundary_.length();
176    if (data_[data_.length() - 1] == '\n')
177      send_length = data_.length();
178    if (client_)
179      client_->didReceiveData(loader_,
180                              data_.data(),
181                              send_length,
182                              encoded_data_length_);
183    data_ = data_.substr(send_length);
184    encoded_data_length_ = 0;
185  }
186}
187
188void MultipartResponseDelegate::OnCompletedRequest() {
189  // If we have any pending data and we're not in a header, go ahead and send
190  // it to WebCore.
191  if (!processing_headers_ && !data_.empty() && !stop_sending_ && client_) {
192    client_->didReceiveData(loader_,
193                            data_.data(),
194                            static_cast<int>(data_.length()),
195                            encoded_data_length_);
196    encoded_data_length_ = 0;
197  }
198}
199
200int MultipartResponseDelegate::PushOverLine(const std::string& data,
201                                            size_t pos) {
202  int offset = 0;
203  if (pos < data.length() && (data[pos] == '\r' || data[pos] == '\n')) {
204    ++offset;
205    if (pos + 1 < data.length() && data[pos + 1] == '\n')
206      ++offset;
207  }
208  return offset;
209}
210
211bool MultipartResponseDelegate::ParseHeaders() {
212  int line_feed_increment = 1;
213
214  // Grab the headers being liberal about line endings.
215  size_t line_start_pos = 0;
216  size_t line_end_pos = data_.find('\n');
217  while (line_end_pos != std::string::npos) {
218    // Handle CRLF
219    if (line_end_pos > line_start_pos && data_[line_end_pos - 1] == '\r') {
220      line_feed_increment = 2;
221      --line_end_pos;
222    } else {
223      line_feed_increment = 1;
224    }
225    if (line_start_pos == line_end_pos) {
226      // A blank line, end of headers
227      line_end_pos += line_feed_increment;
228      break;
229    }
230    // Find the next header line.
231    line_start_pos = line_end_pos + line_feed_increment;
232    line_end_pos = data_.find('\n', line_start_pos);
233  }
234  // Truncated in the middle of a header, stop parsing.
235  if (line_end_pos == std::string::npos)
236    return false;
237
238  // Eat headers
239  std::string headers("\n");
240  headers.append(data_, 0, line_end_pos);
241  data_ = data_.substr(line_end_pos);
242
243  // Create a WebURLResponse based on the original set of headers + the
244  // replacement headers.  We only replace the same few headers that gecko
245  // does.  See netwerk/streamconv/converters/nsMultiMixedConv.cpp.
246  std::string content_type = net::GetSpecificHeader(headers, "content-type");
247  std::string mime_type;
248  std::string charset;
249  bool has_charset = false;
250  net::HttpUtil::ParseContentType(content_type, &mime_type, &charset,
251                                  &has_charset, NULL);
252  WebURLResponse response(original_response_.url());
253  response.setMIMEType(WebString::fromUTF8(mime_type));
254  response.setTextEncodingName(WebString::fromUTF8(charset));
255
256  HeaderCopier copier(&response);
257  original_response_.visitHTTPHeaderFields(&copier);
258
259  for (size_t i = 0; i < arraysize(kReplaceHeaders); ++i) {
260    std::string name(kReplaceHeaders[i]);
261    std::string value = net::GetSpecificHeader(headers, name);
262    if (!value.empty()) {
263      response.setHTTPHeaderField(WebString::fromUTF8(name),
264                                  WebString::fromUTF8(value));
265    }
266  }
267  // To avoid recording every multipart load as a separate visit in
268  // the history database, we want to keep track of whether the response
269  // is part of a multipart payload.  We do want to record the first visit,
270  // so we only set isMultipartPayload to true after the first visit.
271  response.setIsMultipartPayload(has_sent_first_response_);
272  has_sent_first_response_ = true;
273  // Send the response!
274  if (client_)
275    client_->didReceiveResponse(loader_, response);
276
277  return true;
278}
279
280// Boundaries are supposed to be preceeded with --, but it looks like gecko
281// doesn't require the dashes to exist.  See nsMultiMixedConv::FindToken.
282size_t MultipartResponseDelegate::FindBoundary() {
283  size_t boundary_pos = data_.find(boundary_);
284  if (boundary_pos != std::string::npos) {
285    // Back up over -- for backwards compat
286    // TODO(tc): Don't we only want to do this once?  Gecko code doesn't seem
287    // to care.
288    if (boundary_pos >= 2) {
289      if ('-' == data_[boundary_pos - 1] && '-' == data_[boundary_pos - 2]) {
290        boundary_pos -= 2;
291        boundary_ = "--" + boundary_;
292      }
293    }
294  }
295  return boundary_pos;
296}
297
298bool MultipartResponseDelegate::ReadMultipartBoundary(
299    const WebURLResponse& response,
300    std::string* multipart_boundary) {
301  std::string content_type =
302      response.httpHeaderField(WebString::fromUTF8("Content-Type")).utf8();
303
304  size_t boundary_start_offset = content_type.find("boundary=");
305  if (boundary_start_offset == std::string::npos)
306    return false;
307
308  boundary_start_offset += strlen("boundary=");
309
310  size_t boundary_end_offset = content_type.find(';', boundary_start_offset);
311
312  if (boundary_end_offset == std::string::npos)
313    boundary_end_offset = content_type.length();
314
315  size_t boundary_length = boundary_end_offset - boundary_start_offset;
316
317  *multipart_boundary =
318      content_type.substr(boundary_start_offset, boundary_length);
319  // The byte range response can have quoted boundary strings. This is legal
320  // as per MIME specifications. Individual data fragements however don't
321  // contain quoted boundary strings.
322  base::TrimString(*multipart_boundary, "\"", multipart_boundary);
323  return true;
324}
325
326bool MultipartResponseDelegate::ReadContentRanges(
327    const WebURLResponse& response,
328    int64* content_range_lower_bound,
329    int64* content_range_upper_bound,
330    int64* content_range_instance_size) {
331
332  std::string content_range = response.httpHeaderField("Content-Range").utf8();
333  if (content_range.empty()) {
334    content_range = response.httpHeaderField("Range").utf8();
335  }
336
337  if (content_range.empty()) {
338    DLOG(WARNING) << "Failed to read content range from response.";
339    return false;
340  }
341
342  size_t byte_range_lower_bound_start_offset = content_range.find(" ");
343  if (byte_range_lower_bound_start_offset == std::string::npos) {
344    return false;
345  }
346
347  // Skip over the initial space.
348  byte_range_lower_bound_start_offset++;
349
350  // Find the lower bound.
351  size_t byte_range_lower_bound_end_offset =
352      content_range.find("-", byte_range_lower_bound_start_offset);
353  if (byte_range_lower_bound_end_offset == std::string::npos) {
354    return false;
355  }
356
357  size_t byte_range_lower_bound_characters =
358      byte_range_lower_bound_end_offset - byte_range_lower_bound_start_offset;
359  std::string byte_range_lower_bound =
360      content_range.substr(byte_range_lower_bound_start_offset,
361                           byte_range_lower_bound_characters);
362
363  // Find the upper bound.
364  size_t byte_range_upper_bound_start_offset =
365      byte_range_lower_bound_end_offset + 1;
366
367  size_t byte_range_upper_bound_end_offset =
368      content_range.find("/", byte_range_upper_bound_start_offset);
369  if (byte_range_upper_bound_end_offset == std::string::npos) {
370    return false;
371  }
372
373  size_t byte_range_upper_bound_characters =
374      byte_range_upper_bound_end_offset - byte_range_upper_bound_start_offset;
375  std::string byte_range_upper_bound =
376      content_range.substr(byte_range_upper_bound_start_offset,
377                           byte_range_upper_bound_characters);
378
379  // Find the instance size.
380  size_t byte_range_instance_size_start_offset =
381      byte_range_upper_bound_end_offset + 1;
382
383  size_t byte_range_instance_size_end_offset =
384      content_range.length();
385
386  size_t byte_range_instance_size_characters =
387      byte_range_instance_size_end_offset -
388      byte_range_instance_size_start_offset;
389  std::string byte_range_instance_size =
390      content_range.substr(byte_range_instance_size_start_offset,
391                           byte_range_instance_size_characters);
392
393  if (!base::StringToInt64(byte_range_lower_bound, content_range_lower_bound))
394    return false;
395  if (!base::StringToInt64(byte_range_upper_bound, content_range_upper_bound))
396    return false;
397  if (!base::StringToInt64(byte_range_instance_size,
398                           content_range_instance_size)) {
399    return false;
400  }
401  return true;
402}
403
404}  // namespace content
405