1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/proxy/proxy_script_fetcher_impl.h"
6
7#include "base/compiler_specific.h"
8#include "base/i18n/icu_string_conversions.h"
9#include "base/logging.h"
10#include "base/message_loop/message_loop.h"
11#include "base/strings/string_util.h"
12#include "net/base/data_url.h"
13#include "net/base/io_buffer.h"
14#include "net/base/load_flags.h"
15#include "net/base/net_errors.h"
16#include "net/cert/cert_status_flags.h"
17#include "net/http/http_response_headers.h"
18#include "net/url_request/url_request_context.h"
19
20// TODO(eroman):
21//   - Support auth-prompts (http://crbug.com/77366)
22
23namespace net {
24
25namespace {
26
27// The maximum size (in bytes) allowed for a PAC script. Responses exceeding
28// this will fail with ERR_FILE_TOO_BIG.
29const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
30
31// The maximum duration (in milliseconds) allowed for fetching the PAC script.
32// Responses exceeding this will fail with ERR_TIMED_OUT.
33const int kDefaultMaxDurationMs = 300000;  // 5 minutes
34
35// Returns true if |mime_type| is one of the known PAC mime type.
36bool IsPacMimeType(const std::string& mime_type) {
37  static const char * const kSupportedPacMimeTypes[] = {
38    "application/x-ns-proxy-autoconfig",
39    "application/x-javascript-config",
40  };
41  for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
42    if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
43      return true;
44  }
45  return false;
46}
47
48// Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
49// to |*utf16|.
50// If |charset| is empty, then we don't know what it was and guess.
51void ConvertResponseToUTF16(const std::string& charset,
52                            const std::string& bytes,
53                            base::string16* utf16) {
54  const char* codepage;
55
56  if (charset.empty()) {
57    // Assume ISO-8859-1 if no charset was specified.
58    codepage = base::kCodepageLatin1;
59  } else {
60    // Otherwise trust the charset that was provided.
61    codepage = charset.c_str();
62  }
63
64  // We will be generous in the conversion -- if any characters lie
65  // outside of |charset| (i.e. invalid), then substitute them with
66  // U+FFFD rather than failing.
67  base::CodepageToUTF16(bytes, codepage,
68                        base::OnStringConversionError::SUBSTITUTE,
69                        utf16);
70}
71
72}  // namespace
73
74ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
75    URLRequestContext* url_request_context)
76    : weak_factory_(this),
77      url_request_context_(url_request_context),
78      buf_(new IOBuffer(kBufSize)),
79      next_id_(0),
80      cur_request_id_(0),
81      result_code_(OK),
82      result_text_(NULL),
83      max_response_bytes_(kDefaultMaxResponseBytes),
84      max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)) {
85  DCHECK(url_request_context);
86}
87
88ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
89  // The URLRequest's destructor will cancel the outstanding request, and
90  // ensure that the delegate (this) is not called again.
91}
92
93base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
94    base::TimeDelta timeout) {
95  base::TimeDelta prev = max_duration_;
96  max_duration_ = timeout;
97  return prev;
98}
99
100size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
101  size_t prev = max_response_bytes_;
102  max_response_bytes_ = size_bytes;
103  return prev;
104}
105
106void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
107  DCHECK_EQ(request, cur_request_.get());
108
109  // Use |result_code_| as the request's error if we have already set it to
110  // something specific.
111  if (result_code_ == OK && !request->status().is_success())
112    result_code_ = request->status().error();
113
114  FetchCompleted();
115}
116
117int ProxyScriptFetcherImpl::Fetch(
118    const GURL& url, base::string16* text, const CompletionCallback& callback) {
119  // It is invalid to call Fetch() while a request is already in progress.
120  DCHECK(!cur_request_.get());
121  DCHECK(!callback.is_null());
122  DCHECK(text);
123
124  // Handle base-64 encoded data-urls that contain custom PAC scripts.
125  if (url.SchemeIs("data")) {
126    std::string mime_type;
127    std::string charset;
128    std::string data;
129    if (!DataURL::Parse(url, &mime_type, &charset, &data))
130      return ERR_FAILED;
131
132    ConvertResponseToUTF16(charset, data, text);
133    return OK;
134  }
135
136  cur_request_.reset(url_request_context_->CreateRequest(url, this));
137  cur_request_->set_method("GET");
138
139  // Make sure that the PAC script is downloaded using a direct connection,
140  // to avoid circular dependencies (fetching is a part of proxy resolution).
141  // Also disable the use of the disk cache. The cache is disabled so that if
142  // the user switches networks we don't potentially use the cached response
143  // from old network when we should in fact be re-fetching on the new network.
144  // If the PAC script is hosted on an HTTPS server we bypass revocation
145  // checking in order to avoid a circular dependency when attempting to fetch
146  // the OCSP response or CRL. We could make the revocation check go direct but
147  // the proxy might be the only way to the outside world.
148  cur_request_->set_load_flags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE |
149                               LOAD_DISABLE_CERT_REVOCATION_CHECKING);
150
151  // Save the caller's info for notification on completion.
152  callback_ = callback;
153  result_text_ = text;
154
155  bytes_read_so_far_.clear();
156
157  // Post a task to timeout this request if it takes too long.
158  cur_request_id_ = ++next_id_;
159  base::MessageLoop::current()->PostDelayedTask(
160      FROM_HERE,
161      base::Bind(&ProxyScriptFetcherImpl::OnTimeout,
162                 weak_factory_.GetWeakPtr(),
163                 cur_request_id_),
164      max_duration_);
165
166  // Start the request.
167  cur_request_->Start();
168  return ERR_IO_PENDING;
169}
170
171void ProxyScriptFetcherImpl::Cancel() {
172  // ResetCurRequestState will free the URLRequest, which will cause
173  // cancellation.
174  ResetCurRequestState();
175}
176
177URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() const {
178  return url_request_context_;
179}
180
181void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
182                                            AuthChallengeInfo* auth_info) {
183  DCHECK_EQ(request, cur_request_.get());
184  // TODO(eroman): http://crbug.com/77366
185  LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
186  result_code_ = ERR_NOT_IMPLEMENTED;
187  request->CancelAuth();
188}
189
190void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
191                                                   const SSLInfo& ssl_info,
192                                                   bool fatal) {
193  DCHECK_EQ(request, cur_request_.get());
194  // Revocation check failures are not fatal.
195  if (IsCertStatusMinorError(ssl_info.cert_status)) {
196    request->ContinueDespiteLastError();
197    return;
198  }
199  LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
200  // Certificate errors are in same space as net errors.
201  result_code_ = MapCertStatusToNetError(ssl_info.cert_status);
202  request->Cancel();
203}
204
205void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
206  DCHECK_EQ(request, cur_request_.get());
207
208  if (!request->status().is_success()) {
209    OnResponseCompleted(request);
210    return;
211  }
212
213  // Require HTTP responses to have a success status code.
214  if (request->url().SchemeIs("http") || request->url().SchemeIs("https")) {
215    // NOTE about status codes: We are like Firefox 3 in this respect.
216    // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
217    if (request->GetResponseCode() != 200) {
218      VLOG(1) << "Fetched PAC script had (bad) status line: "
219              << request->response_headers()->GetStatusLine();
220      result_code_ = ERR_PAC_STATUS_NOT_OK;
221      request->Cancel();
222      return;
223    }
224
225    // NOTE about mime types: We do not enforce mime types on PAC files.
226    // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
227    // however log mismatches to help with debugging.
228    std::string mime_type;
229    cur_request_->GetMimeType(&mime_type);
230    if (!IsPacMimeType(mime_type)) {
231      VLOG(1) << "Fetched PAC script does not have a proper mime type: "
232              << mime_type;
233    }
234  }
235
236  ReadBody(request);
237}
238
239void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
240                                             int num_bytes) {
241  DCHECK_EQ(request, cur_request_.get());
242  if (ConsumeBytesRead(request, num_bytes)) {
243    // Keep reading.
244    ReadBody(request);
245  }
246}
247
248void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
249  // Read as many bytes as are available synchronously.
250  while (true) {
251    int num_bytes;
252    if (!request->Read(buf_.get(), kBufSize, &num_bytes)) {
253      // Check whether the read failed synchronously.
254      if (!request->status().is_io_pending())
255        OnResponseCompleted(request);
256      return;
257    }
258    if (!ConsumeBytesRead(request, num_bytes))
259      return;
260  }
261}
262
263bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
264                                              int num_bytes) {
265  if (num_bytes <= 0) {
266    // Error while reading, or EOF.
267    OnResponseCompleted(request);
268    return false;
269  }
270
271  // Enforce maximum size bound.
272  if (num_bytes + bytes_read_so_far_.size() >
273      static_cast<size_t>(max_response_bytes_)) {
274    result_code_ = ERR_FILE_TOO_BIG;
275    request->Cancel();
276    return false;
277  }
278
279  bytes_read_so_far_.append(buf_->data(), num_bytes);
280  return true;
281}
282
283void ProxyScriptFetcherImpl::FetchCompleted() {
284  if (result_code_ == OK) {
285    // The caller expects the response to be encoded as UTF16.
286    std::string charset;
287    cur_request_->GetCharset(&charset);
288    ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
289  } else {
290    // On error, the caller expects empty string for bytes.
291    result_text_->clear();
292  }
293
294  int result_code = result_code_;
295  CompletionCallback callback = callback_;
296
297  ResetCurRequestState();
298
299  callback.Run(result_code);
300}
301
302void ProxyScriptFetcherImpl::ResetCurRequestState() {
303  cur_request_.reset();
304  cur_request_id_ = 0;
305  callback_.Reset();
306  result_code_ = OK;
307  result_text_ = NULL;
308}
309
310void ProxyScriptFetcherImpl::OnTimeout(int id) {
311  // Timeout tasks may outlive the URLRequest they reference. Make sure it
312  // is still applicable.
313  if (cur_request_id_ != id)
314    return;
315
316  DCHECK(cur_request_.get());
317  result_code_ = ERR_TIMED_OUT;
318  cur_request_->Cancel();
319}
320
321}  // namespace net
322