1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/proxy/proxy_script_fetcher_impl.h"
6
7#include "base/compiler_specific.h"
8#include "base/logging.h"
9#include "base/message_loop/message_loop.h"
10#include "base/strings/string_util.h"
11#include "net/base/data_url.h"
12#include "net/base/io_buffer.h"
13#include "net/base/load_flags.h"
14#include "net/base/net_errors.h"
15#include "net/base/net_string_util.h"
16#include "net/base/request_priority.h"
17#include "net/cert/cert_status_flags.h"
18#include "net/http/http_response_headers.h"
19#include "net/url_request/url_request_context.h"
20
21// TODO(eroman):
22//   - Support auth-prompts (http://crbug.com/77366)
23
24namespace net {
25
26namespace {
27
28// The maximum size (in bytes) allowed for a PAC script. Responses exceeding
29// this will fail with ERR_FILE_TOO_BIG.
30const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
31
32// The maximum duration (in milliseconds) allowed for fetching the PAC script.
33// Responses exceeding this will fail with ERR_TIMED_OUT.
34const int kDefaultMaxDurationMs = 300000;  // 5 minutes
35
36// Returns true if |mime_type| is one of the known PAC mime type.
37bool IsPacMimeType(const std::string& mime_type) {
38  static const char * const kSupportedPacMimeTypes[] = {
39    "application/x-ns-proxy-autoconfig",
40    "application/x-javascript-config",
41  };
42  for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
43    if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
44      return true;
45  }
46  return false;
47}
48
49// Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
50// to |*utf16|.
51// If |charset| is empty, then we don't know what it was and guess.
52void ConvertResponseToUTF16(const std::string& charset,
53                            const std::string& bytes,
54                            base::string16* utf16) {
55  const char* codepage;
56
57  if (charset.empty()) {
58    // Assume ISO-8859-1 if no charset was specified.
59    codepage = kCharsetLatin1;
60  } else {
61    // Otherwise trust the charset that was provided.
62    codepage = charset.c_str();
63  }
64
65  // Be generous in the conversion -- if any characters lie outside of |charset|
66  // (i.e. invalid), then substitute them with U+FFFD rather than failing.
67  ConvertToUTF16WithSubstitutions(bytes, codepage, utf16);
68}
69
70}  // namespace
71
72ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
73    URLRequestContext* url_request_context)
74    : url_request_context_(url_request_context),
75      buf_(new IOBuffer(kBufSize)),
76      next_id_(0),
77      cur_request_id_(0),
78      result_code_(OK),
79      result_text_(NULL),
80      max_response_bytes_(kDefaultMaxResponseBytes),
81      max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)),
82      weak_factory_(this) {
83  DCHECK(url_request_context);
84}
85
86ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
87  // The URLRequest's destructor will cancel the outstanding request, and
88  // ensure that the delegate (this) is not called again.
89}
90
91base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
92    base::TimeDelta timeout) {
93  base::TimeDelta prev = max_duration_;
94  max_duration_ = timeout;
95  return prev;
96}
97
98size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
99  size_t prev = max_response_bytes_;
100  max_response_bytes_ = size_bytes;
101  return prev;
102}
103
104void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
105  DCHECK_EQ(request, cur_request_.get());
106
107  // Use |result_code_| as the request's error if we have already set it to
108  // something specific.
109  if (result_code_ == OK && !request->status().is_success())
110    result_code_ = request->status().error();
111
112  FetchCompleted();
113}
114
115int ProxyScriptFetcherImpl::Fetch(
116    const GURL& url, base::string16* text, const CompletionCallback& callback) {
117  // It is invalid to call Fetch() while a request is already in progress.
118  DCHECK(!cur_request_.get());
119  DCHECK(!callback.is_null());
120  DCHECK(text);
121
122  // Handle base-64 encoded data-urls that contain custom PAC scripts.
123  if (url.SchemeIs("data")) {
124    std::string mime_type;
125    std::string charset;
126    std::string data;
127    if (!DataURL::Parse(url, &mime_type, &charset, &data))
128      return ERR_FAILED;
129
130    ConvertResponseToUTF16(charset, data, text);
131    return OK;
132  }
133
134  cur_request_ =
135      url_request_context_->CreateRequest(url, DEFAULT_PRIORITY, this, NULL);
136  cur_request_->set_method("GET");
137
138  // Make sure that the PAC script is downloaded using a direct connection,
139  // to avoid circular dependencies (fetching is a part of proxy resolution).
140  // Also disable the use of the disk cache. The cache is disabled so that if
141  // the user switches networks we don't potentially use the cached response
142  // from old network when we should in fact be re-fetching on the new network.
143  // If the PAC script is hosted on an HTTPS server we bypass revocation
144  // checking in order to avoid a circular dependency when attempting to fetch
145  // the OCSP response or CRL. We could make the revocation check go direct but
146  // the proxy might be the only way to the outside world.
147  cur_request_->SetLoadFlags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
148                             LOAD_DISABLE_CERT_REVOCATION_CHECKING);
149
150  // Save the caller's info for notification on completion.
151  callback_ = callback;
152  result_text_ = text;
153
154  bytes_read_so_far_.clear();
155
156  // Post a task to timeout this request if it takes too long.
157  cur_request_id_ = ++next_id_;
158  base::MessageLoop::current()->PostDelayedTask(
159      FROM_HERE,
160      base::Bind(&ProxyScriptFetcherImpl::OnTimeout,
161                 weak_factory_.GetWeakPtr(),
162                 cur_request_id_),
163      max_duration_);
164
165  // Start the request.
166  cur_request_->Start();
167  return ERR_IO_PENDING;
168}
169
170void ProxyScriptFetcherImpl::Cancel() {
171  // ResetCurRequestState will free the URLRequest, which will cause
172  // cancellation.
173  ResetCurRequestState();
174}
175
176URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() const {
177  return url_request_context_;
178}
179
180void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
181                                            AuthChallengeInfo* auth_info) {
182  DCHECK_EQ(request, cur_request_.get());
183  // TODO(eroman): http://crbug.com/77366
184  LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
185  result_code_ = ERR_NOT_IMPLEMENTED;
186  request->CancelAuth();
187}
188
189void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
190                                                   const SSLInfo& ssl_info,
191                                                   bool fatal) {
192  DCHECK_EQ(request, cur_request_.get());
193  // Revocation check failures are not fatal.
194  if (IsCertStatusMinorError(ssl_info.cert_status)) {
195    request->ContinueDespiteLastError();
196    return;
197  }
198  LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
199  // Certificate errors are in same space as net errors.
200  result_code_ = MapCertStatusToNetError(ssl_info.cert_status);
201  request->Cancel();
202}
203
204void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
205  DCHECK_EQ(request, cur_request_.get());
206
207  if (!request->status().is_success()) {
208    OnResponseCompleted(request);
209    return;
210  }
211
212  // Require HTTP responses to have a success status code.
213  if (request->url().SchemeIsHTTPOrHTTPS()) {
214    // NOTE about status codes: We are like Firefox 3 in this respect.
215    // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
216    if (request->GetResponseCode() != 200) {
217      VLOG(1) << "Fetched PAC script had (bad) status line: "
218              << request->response_headers()->GetStatusLine();
219      result_code_ = ERR_PAC_STATUS_NOT_OK;
220      request->Cancel();
221      return;
222    }
223
224    // NOTE about mime types: We do not enforce mime types on PAC files.
225    // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
226    // however log mismatches to help with debugging.
227    std::string mime_type;
228    cur_request_->GetMimeType(&mime_type);
229    if (!IsPacMimeType(mime_type)) {
230      VLOG(1) << "Fetched PAC script does not have a proper mime type: "
231              << mime_type;
232    }
233  }
234
235  ReadBody(request);
236}
237
238void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
239                                             int num_bytes) {
240  DCHECK_EQ(request, cur_request_.get());
241  if (ConsumeBytesRead(request, num_bytes)) {
242    // Keep reading.
243    ReadBody(request);
244  }
245}
246
247void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
248  // Read as many bytes as are available synchronously.
249  while (true) {
250    int num_bytes;
251    if (!request->Read(buf_.get(), kBufSize, &num_bytes)) {
252      // Check whether the read failed synchronously.
253      if (!request->status().is_io_pending())
254        OnResponseCompleted(request);
255      return;
256    }
257    if (!ConsumeBytesRead(request, num_bytes))
258      return;
259  }
260}
261
262bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
263                                              int num_bytes) {
264  if (num_bytes <= 0) {
265    // Error while reading, or EOF.
266    OnResponseCompleted(request);
267    return false;
268  }
269
270  // Enforce maximum size bound.
271  if (num_bytes + bytes_read_so_far_.size() >
272      static_cast<size_t>(max_response_bytes_)) {
273    result_code_ = ERR_FILE_TOO_BIG;
274    request->Cancel();
275    return false;
276  }
277
278  bytes_read_so_far_.append(buf_->data(), num_bytes);
279  return true;
280}
281
282void ProxyScriptFetcherImpl::FetchCompleted() {
283  if (result_code_ == OK) {
284    // The caller expects the response to be encoded as UTF16.
285    std::string charset;
286    cur_request_->GetCharset(&charset);
287    ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
288  } else {
289    // On error, the caller expects empty string for bytes.
290    result_text_->clear();
291  }
292
293  int result_code = result_code_;
294  CompletionCallback callback = callback_;
295
296  ResetCurRequestState();
297
298  callback.Run(result_code);
299}
300
301void ProxyScriptFetcherImpl::ResetCurRequestState() {
302  cur_request_.reset();
303  cur_request_id_ = 0;
304  callback_.Reset();
305  result_code_ = OK;
306  result_text_ = NULL;
307}
308
309void ProxyScriptFetcherImpl::OnTimeout(int id) {
310  // Timeout tasks may outlive the URLRequest they reference. Make sure it
311  // is still applicable.
312  if (cur_request_id_ != id)
313    return;
314
315  DCHECK(cur_request_.get());
316  result_code_ = ERR_TIMED_OUT;
317  cur_request_->Cancel();
318}
319
320}  // namespace net
321