1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "google_apis/gcm/engine/connection_factory_impl.h"
6
7#include "base/message_loop/message_loop.h"
8#include "base/metrics/histogram.h"
9#include "base/metrics/sparse_histogram.h"
10#include "google_apis/gcm/engine/connection_handler_impl.h"
11#include "google_apis/gcm/monitoring/gcm_stats_recorder.h"
12#include "google_apis/gcm/protocol/mcs.pb.h"
13#include "net/base/load_flags.h"
14#include "net/base/net_errors.h"
15#include "net/http/http_network_session.h"
16#include "net/http/http_request_headers.h"
17#include "net/proxy/proxy_info.h"
18#include "net/socket/client_socket_handle.h"
19#include "net/socket/client_socket_pool_manager.h"
20#include "net/ssl/ssl_config_service.h"
21
22namespace gcm {
23
24namespace {
25
26// The amount of time a Socket read should wait before timing out.
27const int kReadTimeoutMs = 30000;  // 30 seconds.
28
29// If a connection is reset after succeeding within this window of time,
30// the previous backoff entry is restored (and the connection success is treated
31// as if it was transient).
32const int kConnectionResetWindowSecs = 10;  // 10 seconds.
33
34// Decides whether the last login was within kConnectionResetWindowSecs of now
35// or not.
36bool ShouldRestorePreviousBackoff(const base::TimeTicks& login_time,
37                                  const base::TimeTicks& now_ticks) {
38  return !login_time.is_null() &&
39      now_ticks - login_time <=
40          base::TimeDelta::FromSeconds(kConnectionResetWindowSecs);
41}
42
43}  // namespace
44
45ConnectionFactoryImpl::ConnectionFactoryImpl(
46    const std::vector<GURL>& mcs_endpoints,
47    const net::BackoffEntry::Policy& backoff_policy,
48    const scoped_refptr<net::HttpNetworkSession>& gcm_network_session,
49    const scoped_refptr<net::HttpNetworkSession>& http_network_session,
50    net::NetLog* net_log,
51    GCMStatsRecorder* recorder)
52  : mcs_endpoints_(mcs_endpoints),
53    next_endpoint_(0),
54    last_successful_endpoint_(0),
55    backoff_policy_(backoff_policy),
56    gcm_network_session_(gcm_network_session),
57    http_network_session_(http_network_session),
58    bound_net_log_(
59        net::BoundNetLog::Make(net_log, net::NetLog::SOURCE_SOCKET)),
60    pac_request_(NULL),
61    connecting_(false),
62    waiting_for_backoff_(false),
63    waiting_for_network_online_(false),
64    logging_in_(false),
65    recorder_(recorder),
66    listener_(NULL),
67    weak_ptr_factory_(this) {
68  DCHECK_GE(mcs_endpoints_.size(), 1U);
69  DCHECK(!http_network_session_.get() ||
70         (gcm_network_session_.get() != http_network_session_.get()));
71}
72
73ConnectionFactoryImpl::~ConnectionFactoryImpl() {
74  CloseSocket();
75  net::NetworkChangeNotifier::RemoveNetworkChangeObserver(this);
76  if (pac_request_) {
77    gcm_network_session_->proxy_service()->CancelPacRequest(pac_request_);
78    pac_request_ = NULL;
79  }
80}
81
82void ConnectionFactoryImpl::Initialize(
83    const BuildLoginRequestCallback& request_builder,
84    const ConnectionHandler::ProtoReceivedCallback& read_callback,
85    const ConnectionHandler::ProtoSentCallback& write_callback) {
86  DCHECK(!connection_handler_);
87
88  previous_backoff_ = CreateBackoffEntry(&backoff_policy_);
89  backoff_entry_ = CreateBackoffEntry(&backoff_policy_);
90  request_builder_ = request_builder;
91
92  net::NetworkChangeNotifier::AddNetworkChangeObserver(this);
93  waiting_for_network_online_ = net::NetworkChangeNotifier::IsOffline();
94  connection_handler_ = CreateConnectionHandler(
95      base::TimeDelta::FromMilliseconds(kReadTimeoutMs),
96      read_callback,
97      write_callback,
98      base::Bind(&ConnectionFactoryImpl::ConnectionHandlerCallback,
99                 weak_ptr_factory_.GetWeakPtr())).Pass();
100}
101
102ConnectionHandler* ConnectionFactoryImpl::GetConnectionHandler() const {
103  return connection_handler_.get();
104}
105
106void ConnectionFactoryImpl::Connect() {
107  DCHECK(connection_handler_);
108
109  if (connecting_ || waiting_for_backoff_)
110    return;  // Connection attempt already in progress or pending.
111
112  if (IsEndpointReachable())
113    return;  // Already connected.
114
115  ConnectWithBackoff();
116}
117
118void ConnectionFactoryImpl::ConnectWithBackoff() {
119  // If a canary managed to connect while a backoff expiration was pending,
120  // just cleanup the internal state.
121  if (connecting_ || logging_in_ || IsEndpointReachable()) {
122    waiting_for_backoff_ = false;
123    return;
124  }
125
126  if (backoff_entry_->ShouldRejectRequest()) {
127    DVLOG(1) << "Delaying MCS endpoint connection for "
128             << backoff_entry_->GetTimeUntilRelease().InMilliseconds()
129             << " milliseconds.";
130    waiting_for_backoff_ = true;
131    recorder_->RecordConnectionDelayedDueToBackoff(
132        backoff_entry_->GetTimeUntilRelease().InMilliseconds());
133    base::MessageLoop::current()->PostDelayedTask(
134        FROM_HERE,
135        base::Bind(&ConnectionFactoryImpl::ConnectWithBackoff,
136                   weak_ptr_factory_.GetWeakPtr()),
137        backoff_entry_->GetTimeUntilRelease());
138    return;
139  }
140
141  DVLOG(1) << "Attempting connection to MCS endpoint.";
142  waiting_for_backoff_ = false;
143  ConnectImpl();
144}
145
146bool ConnectionFactoryImpl::IsEndpointReachable() const {
147  return connection_handler_ && connection_handler_->CanSendMessage();
148}
149
150std::string ConnectionFactoryImpl::GetConnectionStateString() const {
151  if (IsEndpointReachable())
152    return "CONNECTED";
153  if (logging_in_)
154    return "LOGGING IN";
155  if (connecting_)
156    return "CONNECTING";
157  if (waiting_for_backoff_)
158    return "WAITING FOR BACKOFF";
159  if (waiting_for_network_online_)
160    return "WAITING FOR NETWORK CHANGE";
161  return "NOT CONNECTED";
162}
163
164void ConnectionFactoryImpl::SignalConnectionReset(
165    ConnectionResetReason reason) {
166  // A failure can trigger multiple resets, so no need to do anything if a
167  // connection is already in progress.
168  if (connecting_) {
169    DVLOG(1) << "Connection in progress, ignoring reset.";
170    return;
171  }
172
173  if (listener_)
174    listener_->OnDisconnected();
175
176  UMA_HISTOGRAM_ENUMERATION("GCM.ConnectionResetReason",
177                            reason,
178                            CONNECTION_RESET_COUNT);
179  recorder_->RecordConnectionResetSignaled(reason);
180  if (!last_login_time_.is_null()) {
181    UMA_HISTOGRAM_CUSTOM_TIMES("GCM.ConnectionUpTime",
182                               NowTicks() - last_login_time_,
183                               base::TimeDelta::FromSeconds(1),
184                               base::TimeDelta::FromHours(24),
185                               50);
186    // |last_login_time_| will be reset below, before attempting the new
187    // connection.
188  }
189
190  CloseSocket();
191  DCHECK(!IsEndpointReachable());
192
193  // TODO(zea): if the network is offline, don't attempt to connect.
194  // See crbug.com/396687
195
196  // Network changes get special treatment as they can trigger a one-off canary
197  // request that bypasses backoff (but does nothing if a connection is in
198  // progress). Other connection reset events can be ignored as a connection
199  // is already awaiting backoff expiration.
200  if (waiting_for_backoff_ && reason != NETWORK_CHANGE) {
201    DVLOG(1) << "Backoff expiration pending, ignoring reset.";
202    return;
203  }
204
205  if (logging_in_) {
206    // Failures prior to login completion just reuse the existing backoff entry.
207    logging_in_ = false;
208    backoff_entry_->InformOfRequest(false);
209  } else if (reason == LOGIN_FAILURE ||
210             ShouldRestorePreviousBackoff(last_login_time_, NowTicks())) {
211    // Failures due to login, or within the reset window of a login, restore
212    // the backoff entry that was saved off at login completion time.
213    backoff_entry_.swap(previous_backoff_);
214    backoff_entry_->InformOfRequest(false);
215  } else if (reason == NETWORK_CHANGE) {
216    ConnectImpl();  // Canary attempts bypass backoff without resetting it.
217    return;
218  } else {
219    // We shouldn't be in backoff in thise case.
220    DCHECK_EQ(0, backoff_entry_->failure_count());
221  }
222
223  // At this point the last login time has been consumed or deemed irrelevant,
224  // reset it.
225  last_login_time_ = base::TimeTicks();
226
227  Connect();
228}
229
230void ConnectionFactoryImpl::SetConnectionListener(
231    ConnectionListener* listener) {
232  listener_ = listener;
233}
234
235base::TimeTicks ConnectionFactoryImpl::NextRetryAttempt() const {
236  if (!backoff_entry_)
237    return base::TimeTicks();
238  return backoff_entry_->GetReleaseTime();
239}
240
241void ConnectionFactoryImpl::OnNetworkChanged(
242    net::NetworkChangeNotifier::ConnectionType type) {
243  if (type == net::NetworkChangeNotifier::CONNECTION_NONE) {
244    DVLOG(1) << "Network lost, resettion connection.";
245    waiting_for_network_online_ = true;
246
247    // Will do nothing due to |waiting_for_network_online_ == true|.
248    // TODO(zea): make the above statement actually true. See crbug.com/396687
249    SignalConnectionReset(NETWORK_CHANGE);
250    return;
251  }
252
253  DVLOG(1) << "Connection type changed to " << type << ", reconnecting.";
254  waiting_for_network_online_ = false;
255  SignalConnectionReset(NETWORK_CHANGE);
256}
257
258GURL ConnectionFactoryImpl::GetCurrentEndpoint() const {
259  // Note that IsEndpointReachable() returns false anytime connecting_ is true,
260  // so while connecting this always uses |next_endpoint_|.
261  if (IsEndpointReachable())
262    return mcs_endpoints_[last_successful_endpoint_];
263  return mcs_endpoints_[next_endpoint_];
264}
265
266net::IPEndPoint ConnectionFactoryImpl::GetPeerIP() {
267  if (!socket_handle_.socket())
268    return net::IPEndPoint();
269
270  net::IPEndPoint ip_endpoint;
271  int result = socket_handle_.socket()->GetPeerAddress(&ip_endpoint);
272  if (result != net::OK)
273    return net::IPEndPoint();
274
275  return ip_endpoint;
276}
277
278void ConnectionFactoryImpl::ConnectImpl() {
279  DCHECK(!IsEndpointReachable());
280  DCHECK(!socket_handle_.socket());
281
282  // TODO(zea): if the network is offline, don't attempt to connect.
283  // See crbug.com/396687
284
285  connecting_ = true;
286  GURL current_endpoint = GetCurrentEndpoint();
287  recorder_->RecordConnectionInitiated(current_endpoint.host());
288  RebuildNetworkSessionAuthCache();
289  int status = gcm_network_session_->proxy_service()->ResolveProxy(
290      current_endpoint,
291      net::LOAD_NORMAL,
292      &proxy_info_,
293      base::Bind(&ConnectionFactoryImpl::OnProxyResolveDone,
294                 weak_ptr_factory_.GetWeakPtr()),
295      &pac_request_,
296      NULL,
297      bound_net_log_);
298  if (status != net::ERR_IO_PENDING)
299    OnProxyResolveDone(status);
300}
301
302void ConnectionFactoryImpl::InitHandler() {
303  // May be null in tests.
304  mcs_proto::LoginRequest login_request;
305  if (!request_builder_.is_null()) {
306    request_builder_.Run(&login_request);
307    DCHECK(login_request.IsInitialized());
308  }
309
310  connection_handler_->Init(login_request, socket_handle_.socket());
311}
312
313scoped_ptr<net::BackoffEntry> ConnectionFactoryImpl::CreateBackoffEntry(
314    const net::BackoffEntry::Policy* const policy) {
315  return scoped_ptr<net::BackoffEntry>(new net::BackoffEntry(policy));
316}
317
318scoped_ptr<ConnectionHandler> ConnectionFactoryImpl::CreateConnectionHandler(
319    base::TimeDelta read_timeout,
320    const ConnectionHandler::ProtoReceivedCallback& read_callback,
321    const ConnectionHandler::ProtoSentCallback& write_callback,
322    const ConnectionHandler::ConnectionChangedCallback& connection_callback) {
323  return make_scoped_ptr<ConnectionHandler>(
324      new ConnectionHandlerImpl(read_timeout,
325                                read_callback,
326                                write_callback,
327                                connection_callback));
328}
329
330base::TimeTicks ConnectionFactoryImpl::NowTicks() {
331  return base::TimeTicks::Now();
332}
333
334void ConnectionFactoryImpl::OnConnectDone(int result) {
335  if (result != net::OK) {
336    // If the connection fails, try another proxy.
337    result = ReconsiderProxyAfterError(result);
338    // ReconsiderProxyAfterError either returns an error (in which case it is
339    // not reconsidering a proxy) or returns ERR_IO_PENDING if it is considering
340    // another proxy.
341    DCHECK_NE(result, net::OK);
342    if (result == net::ERR_IO_PENDING)
343      return;  // Proxy reconsideration pending. Return.
344    LOG(ERROR) << "Failed to connect to MCS endpoint with error " << result;
345    UMA_HISTOGRAM_BOOLEAN("GCM.ConnectionSuccessRate", false);
346    recorder_->RecordConnectionFailure(result);
347    CloseSocket();
348    backoff_entry_->InformOfRequest(false);
349    UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionFailureErrorCode", result);
350
351    // If there are other endpoints available, use the next endpoint on the
352    // subsequent retry.
353    next_endpoint_++;
354    if (next_endpoint_ >= mcs_endpoints_.size())
355      next_endpoint_ = 0;
356    connecting_ = false;
357    Connect();
358    return;
359  }
360
361  UMA_HISTOGRAM_BOOLEAN("GCM.ConnectionSuccessRate", true);
362  UMA_HISTOGRAM_COUNTS("GCM.ConnectionEndpoint", next_endpoint_);
363  UMA_HISTOGRAM_BOOLEAN("GCM.ConnectedViaProxy",
364                        !(proxy_info_.is_empty() || proxy_info_.is_direct()));
365  ReportSuccessfulProxyConnection();
366  recorder_->RecordConnectionSuccess();
367
368  // Reset the endpoint back to the default.
369  // TODO(zea): consider prioritizing endpoints more intelligently based on
370  // which ones succeed most for this client? Although that will affect
371  // measuring the success rate of the default endpoint vs fallback.
372  last_successful_endpoint_ = next_endpoint_;
373  next_endpoint_ = 0;
374  connecting_ = false;
375  logging_in_ = true;
376  DVLOG(1) << "MCS endpoint socket connection success, starting login.";
377  InitHandler();
378}
379
380void ConnectionFactoryImpl::ConnectionHandlerCallback(int result) {
381  DCHECK(!connecting_);
382  if (result != net::OK) {
383    // TODO(zea): Consider how to handle errors that may require some sort of
384    // user intervention (login page, etc.).
385    UMA_HISTOGRAM_SPARSE_SLOWLY("GCM.ConnectionDisconnectErrorCode", result);
386    SignalConnectionReset(SOCKET_FAILURE);
387    return;
388  }
389
390  // Handshake complete, reset backoff. If the login failed with an error,
391  // the client should invoke SignalConnectionReset(LOGIN_FAILURE), which will
392  // restore the previous backoff.
393  DVLOG(1) << "Handshake complete.";
394  last_login_time_ = NowTicks();
395  previous_backoff_.swap(backoff_entry_);
396  backoff_entry_->Reset();
397  logging_in_ = false;
398
399  if (listener_)
400    listener_->OnConnected(GetCurrentEndpoint(), GetPeerIP());
401}
402
403// This has largely been copied from
404// HttpStreamFactoryImpl::Job::DoResolveProxyComplete. This should be
405// refactored into some common place.
406void ConnectionFactoryImpl::OnProxyResolveDone(int status) {
407  pac_request_ = NULL;
408  DVLOG(1) << "Proxy resolution status: " << status;
409
410  DCHECK_NE(status, net::ERR_IO_PENDING);
411  if (status == net::OK) {
412    // Remove unsupported proxies from the list.
413    proxy_info_.RemoveProxiesWithoutScheme(
414        net::ProxyServer::SCHEME_DIRECT |
415        net::ProxyServer::SCHEME_HTTP | net::ProxyServer::SCHEME_HTTPS |
416        net::ProxyServer::SCHEME_SOCKS4 | net::ProxyServer::SCHEME_SOCKS5);
417
418    if (proxy_info_.is_empty()) {
419      // No proxies/direct to choose from. This happens when we don't support
420      // any of the proxies in the returned list.
421      status = net::ERR_NO_SUPPORTED_PROXIES;
422    }
423  }
424
425  if (status != net::OK) {
426    // Failed to resolve proxy. Retry later.
427    OnConnectDone(status);
428    return;
429  }
430
431  DVLOG(1) << "Resolved proxy with PAC:" << proxy_info_.ToPacString();
432
433  net::SSLConfig ssl_config;
434  gcm_network_session_->ssl_config_service()->GetSSLConfig(&ssl_config);
435  status = net::InitSocketHandleForTlsConnect(
436      net::HostPortPair::FromURL(GetCurrentEndpoint()),
437      gcm_network_session_.get(),
438      proxy_info_,
439      ssl_config,
440      ssl_config,
441      net::PRIVACY_MODE_DISABLED,
442      bound_net_log_,
443      &socket_handle_,
444      base::Bind(&ConnectionFactoryImpl::OnConnectDone,
445                 weak_ptr_factory_.GetWeakPtr()));
446  if (status != net::ERR_IO_PENDING)
447    OnConnectDone(status);
448}
449
450// This has largely been copied from
451// HttpStreamFactoryImpl::Job::ReconsiderProxyAfterError. This should be
452// refactored into some common place.
453// This method reconsiders the proxy on certain errors. If it does reconsider
454// a proxy it always returns ERR_IO_PENDING and posts a call to
455// OnProxyResolveDone with the result of the reconsideration.
456int ConnectionFactoryImpl::ReconsiderProxyAfterError(int error) {
457  DCHECK(!pac_request_);
458  DCHECK_NE(error, net::OK);
459  DCHECK_NE(error, net::ERR_IO_PENDING);
460  // A failure to resolve the hostname or any error related to establishing a
461  // TCP connection could be grounds for trying a new proxy configuration.
462  //
463  // Why do this when a hostname cannot be resolved?  Some URLs only make sense
464  // to proxy servers.  The hostname in those URLs might fail to resolve if we
465  // are still using a non-proxy config.  We need to check if a proxy config
466  // now exists that corresponds to a proxy server that could load the URL.
467  //
468  switch (error) {
469    case net::ERR_PROXY_CONNECTION_FAILED:
470    case net::ERR_NAME_NOT_RESOLVED:
471    case net::ERR_INTERNET_DISCONNECTED:
472    case net::ERR_ADDRESS_UNREACHABLE:
473    case net::ERR_CONNECTION_CLOSED:
474    case net::ERR_CONNECTION_TIMED_OUT:
475    case net::ERR_CONNECTION_RESET:
476    case net::ERR_CONNECTION_REFUSED:
477    case net::ERR_CONNECTION_ABORTED:
478    case net::ERR_TIMED_OUT:
479    case net::ERR_TUNNEL_CONNECTION_FAILED:
480    case net::ERR_SOCKS_CONNECTION_FAILED:
481    // This can happen in the case of trying to talk to a proxy using SSL, and
482    // ending up talking to a captive portal that supports SSL instead.
483    case net::ERR_PROXY_CERTIFICATE_INVALID:
484    // This can happen when trying to talk SSL to a non-SSL server (Like a
485    // captive portal).
486    case net::ERR_SSL_PROTOCOL_ERROR:
487      break;
488    case net::ERR_SOCKS_CONNECTION_HOST_UNREACHABLE:
489      // Remap the SOCKS-specific "host unreachable" error to a more
490      // generic error code (this way consumers like the link doctor
491      // know to substitute their error page).
492      //
493      // Note that if the host resolving was done by the SOCKS5 proxy, we can't
494      // differentiate between a proxy-side "host not found" versus a proxy-side
495      // "address unreachable" error, and will report both of these failures as
496      // ERR_ADDRESS_UNREACHABLE.
497      return net::ERR_ADDRESS_UNREACHABLE;
498    default:
499      return error;
500  }
501
502  net::SSLConfig ssl_config;
503  gcm_network_session_->ssl_config_service()->GetSSLConfig(&ssl_config);
504  if (proxy_info_.is_https() && ssl_config.send_client_cert) {
505    gcm_network_session_->ssl_client_auth_cache()->Remove(
506        proxy_info_.proxy_server().host_port_pair());
507  }
508
509  int status = gcm_network_session_->proxy_service()->ReconsiderProxyAfterError(
510      GetCurrentEndpoint(), net::LOAD_NORMAL, error, &proxy_info_,
511      base::Bind(&ConnectionFactoryImpl::OnProxyResolveDone,
512                 weak_ptr_factory_.GetWeakPtr()),
513      &pac_request_,
514      NULL,
515      bound_net_log_);
516  if (status == net::OK || status == net::ERR_IO_PENDING) {
517    CloseSocket();
518  } else {
519    // If ReconsiderProxyAfterError() failed synchronously, it means
520    // there was nothing left to fall-back to, so fail the transaction
521    // with the last connection error we got.
522    status = error;
523  }
524
525  // If there is new proxy info, post OnProxyResolveDone to retry it. Otherwise,
526  // if there was an error falling back, fail synchronously.
527  if (status == net::OK) {
528    base::MessageLoop::current()->PostTask(
529        FROM_HERE,
530        base::Bind(&ConnectionFactoryImpl::OnProxyResolveDone,
531                   weak_ptr_factory_.GetWeakPtr(), status));
532    status = net::ERR_IO_PENDING;
533  }
534  return status;
535}
536
537void ConnectionFactoryImpl::ReportSuccessfulProxyConnection() {
538  if (gcm_network_session_.get() && gcm_network_session_->proxy_service())
539    gcm_network_session_->proxy_service()->ReportSuccess(proxy_info_, NULL);
540}
541
542void ConnectionFactoryImpl::CloseSocket() {
543  // The connection handler needs to be reset, else it'll attempt to keep using
544  // the destroyed socket.
545  if (connection_handler_)
546    connection_handler_->Reset();
547
548  if (socket_handle_.socket() && socket_handle_.socket()->IsConnected())
549    socket_handle_.socket()->Disconnect();
550  socket_handle_.Reset();
551}
552
553void ConnectionFactoryImpl::RebuildNetworkSessionAuthCache() {
554  if (!http_network_session_.get() || !http_network_session_->http_auth_cache())
555    return;
556
557  gcm_network_session_->http_auth_cache()->UpdateAllFrom(
558      *http_network_session_->http_auth_cache());
559}
560
561}  // namespace gcm
562