1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/proxy/proxy_script_fetcher_impl.h"
6
7#include "base/compiler_specific.h"
8#include "base/i18n/icu_string_conversions.h"
9#include "base/logging.h"
10#include "base/message_loop.h"
11#include "base/string_util.h"
12#include "net/base/data_url.h"
13#include "net/base/io_buffer.h"
14#include "net/base/load_flags.h"
15#include "net/base/net_errors.h"
16#include "net/http/http_response_headers.h"
17#include "net/url_request/url_request_context.h"
18
19// TODO(eroman):
20//   - Support auth-prompts (http://crbug.com/77366)
21
22namespace net {
23
24namespace {
25
26// The maximum size (in bytes) allowed for a PAC script. Responses exceeding
27// this will fail with ERR_FILE_TOO_BIG.
28const int kDefaultMaxResponseBytes = 1048576;  // 1 megabyte
29
30// The maximum duration (in milliseconds) allowed for fetching the PAC script.
31// Responses exceeding this will fail with ERR_TIMED_OUT.
32const int kDefaultMaxDurationMs = 300000;  // 5 minutes
33
34// Returns true if |mime_type| is one of the known PAC mime type.
35bool IsPacMimeType(const std::string& mime_type) {
36  static const char * const kSupportedPacMimeTypes[] = {
37    "application/x-ns-proxy-autoconfig",
38    "application/x-javascript-config",
39  };
40  for (size_t i = 0; i < arraysize(kSupportedPacMimeTypes); ++i) {
41    if (LowerCaseEqualsASCII(mime_type, kSupportedPacMimeTypes[i]))
42      return true;
43  }
44  return false;
45}
46
47// Converts |bytes| (which is encoded by |charset|) to UTF16, saving the resul
48// to |*utf16|.
49// If |charset| is empty, then we don't know what it was and guess.
50void ConvertResponseToUTF16(const std::string& charset,
51                            const std::string& bytes,
52                            string16* utf16) {
53  const char* codepage;
54
55  if (charset.empty()) {
56    // Assume ISO-8859-1 if no charset was specified.
57    codepage = base::kCodepageLatin1;
58  } else {
59    // Otherwise trust the charset that was provided.
60    codepage = charset.c_str();
61  }
62
63  // We will be generous in the conversion -- if any characters lie
64  // outside of |charset| (i.e. invalid), then substitute them with
65  // U+FFFD rather than failing.
66  base::CodepageToUTF16(bytes, codepage,
67                        base::OnStringConversionError::SUBSTITUTE,
68                        utf16);
69}
70
71}  // namespace
72
73ProxyScriptFetcherImpl::ProxyScriptFetcherImpl(
74    URLRequestContext* url_request_context)
75    : ALLOW_THIS_IN_INITIALIZER_LIST(task_factory_(this)),
76      url_request_context_(url_request_context),
77      buf_(new IOBuffer(kBufSize)),
78      next_id_(0),
79      cur_request_(NULL),
80      cur_request_id_(0),
81      callback_(NULL),
82      result_code_(OK),
83      result_text_(NULL),
84      max_response_bytes_(kDefaultMaxResponseBytes),
85      max_duration_(base::TimeDelta::FromMilliseconds(kDefaultMaxDurationMs)) {
86  DCHECK(url_request_context);
87}
88
89ProxyScriptFetcherImpl::~ProxyScriptFetcherImpl() {
90  // The URLRequest's destructor will cancel the outstanding request, and
91  // ensure that the delegate (this) is not called again.
92}
93
94base::TimeDelta ProxyScriptFetcherImpl::SetTimeoutConstraint(
95    base::TimeDelta timeout) {
96  base::TimeDelta prev = max_duration_;
97  max_duration_ = timeout;
98  return prev;
99}
100
101size_t ProxyScriptFetcherImpl::SetSizeConstraint(size_t size_bytes) {
102  size_t prev = max_response_bytes_;
103  max_response_bytes_ = size_bytes;
104  return prev;
105}
106
107void ProxyScriptFetcherImpl::OnResponseCompleted(URLRequest* request) {
108  DCHECK_EQ(request, cur_request_.get());
109
110  // Use |result_code_| as the request's error if we have already set it to
111  // something specific.
112  if (result_code_ == OK && !request->status().is_success())
113    result_code_ = request->status().os_error();
114
115  FetchCompleted();
116}
117
118int ProxyScriptFetcherImpl::Fetch(const GURL& url,
119                                  string16* text,
120                                  CompletionCallback* callback) {
121  // It is invalid to call Fetch() while a request is already in progress.
122  DCHECK(!cur_request_.get());
123
124  DCHECK(callback);
125  DCHECK(text);
126
127  // Handle base-64 encoded data-urls that contain custom PAC scripts.
128  if (url.SchemeIs("data")) {
129    std::string mime_type;
130    std::string charset;
131    std::string data;
132    if (!DataURL::Parse(url, &mime_type, &charset, &data))
133      return ERR_FAILED;
134
135    ConvertResponseToUTF16(charset, data, text);
136    return OK;
137  }
138
139  cur_request_.reset(new URLRequest(url, this));
140  cur_request_->set_context(url_request_context_);
141  cur_request_->set_method("GET");
142
143  // Make sure that the PAC script is downloaded using a direct connection,
144  // to avoid circular dependencies (fetching is a part of proxy resolution).
145  // Also disable the use of the disk cache. The cache is disabled so that if
146  // the user switches networks we don't potentially use the cached response
147  // from old network when we should in fact be re-fetching on the new network.
148  cur_request_->set_load_flags(LOAD_BYPASS_PROXY | LOAD_DISABLE_CACHE);
149
150  // Save the caller's info for notification on completion.
151  callback_ = callback;
152  result_text_ = text;
153
154  bytes_read_so_far_.clear();
155
156  // Post a task to timeout this request if it takes too long.
157  cur_request_id_ = ++next_id_;
158  MessageLoop::current()->PostDelayedTask(FROM_HERE,
159      task_factory_.NewRunnableMethod(&ProxyScriptFetcherImpl::OnTimeout,
160                                      cur_request_id_),
161      static_cast<int>(max_duration_.InMilliseconds()));
162
163  // Start the request.
164  cur_request_->Start();
165  return ERR_IO_PENDING;
166}
167
168void ProxyScriptFetcherImpl::Cancel() {
169  // ResetCurRequestState will free the URLRequest, which will cause
170  // cancellation.
171  ResetCurRequestState();
172}
173
174URLRequestContext* ProxyScriptFetcherImpl::GetRequestContext() {
175  return url_request_context_;
176}
177
178void ProxyScriptFetcherImpl::OnAuthRequired(URLRequest* request,
179                                            AuthChallengeInfo* auth_info) {
180  DCHECK_EQ(request, cur_request_.get());
181  // TODO(eroman): http://crbug.com/77366
182  LOG(WARNING) << "Auth required to fetch PAC script, aborting.";
183  result_code_ = ERR_NOT_IMPLEMENTED;
184  request->CancelAuth();
185}
186
187void ProxyScriptFetcherImpl::OnSSLCertificateError(URLRequest* request,
188                                                   int cert_error,
189                                                   X509Certificate* cert) {
190  DCHECK_EQ(request, cur_request_.get());
191  LOG(WARNING) << "SSL certificate error when fetching PAC script, aborting.";
192  // Certificate errors are in same space as net errors.
193  result_code_ = cert_error;
194  request->Cancel();
195}
196
197void ProxyScriptFetcherImpl::OnResponseStarted(URLRequest* request) {
198  DCHECK_EQ(request, cur_request_.get());
199
200  if (!request->status().is_success()) {
201    OnResponseCompleted(request);
202    return;
203  }
204
205  // Require HTTP responses to have a success status code.
206  if (request->url().SchemeIs("http") || request->url().SchemeIs("https")) {
207    // NOTE about status codes: We are like Firefox 3 in this respect.
208    // {IE 7, Safari 3, Opera 9.5} do not care about the status code.
209    if (request->GetResponseCode() != 200) {
210      VLOG(1) << "Fetched PAC script had (bad) status line: "
211              << request->response_headers()->GetStatusLine();
212      result_code_ = ERR_PAC_STATUS_NOT_OK;
213      request->Cancel();
214      return;
215    }
216
217    // NOTE about mime types: We do not enforce mime types on PAC files.
218    // This is for compatibility with {IE 7, Firefox 3, Opera 9.5}. We will
219    // however log mismatches to help with debugging.
220    std::string mime_type;
221    cur_request_->GetMimeType(&mime_type);
222    if (!IsPacMimeType(mime_type)) {
223      VLOG(1) << "Fetched PAC script does not have a proper mime type: "
224              << mime_type;
225    }
226  }
227
228  ReadBody(request);
229}
230
231void ProxyScriptFetcherImpl::OnReadCompleted(URLRequest* request,
232                                             int num_bytes) {
233  DCHECK_EQ(request, cur_request_.get());
234  if (ConsumeBytesRead(request, num_bytes)) {
235    // Keep reading.
236    ReadBody(request);
237  }
238}
239
240void ProxyScriptFetcherImpl::ReadBody(URLRequest* request) {
241  // Read as many bytes as are available synchronously.
242  while (true) {
243    int num_bytes;
244    if (!request->Read(buf_, kBufSize, &num_bytes)) {
245      // Check whether the read failed synchronously.
246      if (!request->status().is_io_pending())
247        OnResponseCompleted(request);
248      return;
249    }
250    if (!ConsumeBytesRead(request, num_bytes))
251      return;
252  }
253}
254
255bool ProxyScriptFetcherImpl::ConsumeBytesRead(URLRequest* request,
256                                              int num_bytes) {
257  if (num_bytes <= 0) {
258    // Error while reading, or EOF.
259    OnResponseCompleted(request);
260    return false;
261  }
262
263  // Enforce maximum size bound.
264  if (num_bytes + bytes_read_so_far_.size() >
265      static_cast<size_t>(max_response_bytes_)) {
266    result_code_ = ERR_FILE_TOO_BIG;
267    request->Cancel();
268    return false;
269  }
270
271  bytes_read_so_far_.append(buf_->data(), num_bytes);
272  return true;
273}
274
275void ProxyScriptFetcherImpl::FetchCompleted() {
276  if (result_code_ == OK) {
277    // The caller expects the response to be encoded as UTF16.
278    std::string charset;
279    cur_request_->GetCharset(&charset);
280    ConvertResponseToUTF16(charset, bytes_read_so_far_, result_text_);
281  } else {
282    // On error, the caller expects empty string for bytes.
283    result_text_->clear();
284  }
285
286  int result_code = result_code_;
287  CompletionCallback* callback = callback_;
288
289  // Hold a reference to the URLRequestContext to prevent re-entrancy from
290  // ~URLRequestContext.
291  scoped_refptr<URLRequestContext> context(cur_request_->context());
292  ResetCurRequestState();
293
294  callback->Run(result_code);
295}
296
297void ProxyScriptFetcherImpl::ResetCurRequestState() {
298  cur_request_.reset();
299  cur_request_id_ = 0;
300  callback_ = NULL;
301  result_code_ = OK;
302  result_text_ = NULL;
303}
304
305void ProxyScriptFetcherImpl::OnTimeout(int id) {
306  // Timeout tasks may outlive the URLRequest they reference. Make sure it
307  // is still applicable.
308  if (cur_request_id_ != id)
309    return;
310
311  DCHECK(cur_request_.get());
312  result_code_ = ERR_TIMED_OUT;
313  cur_request_->Cancel();
314}
315
316}  // namespace net
317