1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/socket/tcp_client_socket_win.h"
6
7#include <mstcpip.h>
8
9#include "base/basictypes.h"
10#include "base/compiler_specific.h"
11#include "base/metrics/stats_counters.h"
12#include "base/strings/string_util.h"
13#include "base/win/object_watcher.h"
14#include "base/win/windows_version.h"
15#include "net/base/connection_type_histograms.h"
16#include "net/base/io_buffer.h"
17#include "net/base/ip_endpoint.h"
18#include "net/base/net_errors.h"
19#include "net/base/net_log.h"
20#include "net/base/net_util.h"
21#include "net/base/network_change_notifier.h"
22#include "net/base/winsock_init.h"
23#include "net/base/winsock_util.h"
24#include "net/socket/socket_net_log_params.h"
25
26namespace net {
27
28namespace {
29
30const int kTCPKeepAliveSeconds = 45;
31bool g_disable_overlapped_reads = false;
32
33bool SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
34  int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
35                      reinterpret_cast<const char*>(&size), sizeof(size));
36  DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
37  return rv == 0;
38}
39
40bool SetSocketSendBufferSize(SOCKET socket, int32 size) {
41  int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
42                      reinterpret_cast<const char*>(&size), sizeof(size));
43  DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
44  return rv == 0;
45}
46
47// Disable Nagle.
48// The Nagle implementation on windows is governed by RFC 896.  The idea
49// behind Nagle is to reduce small packets on the network.  When Nagle is
50// enabled, if a partial packet has been sent, the TCP stack will disallow
51// further *partial* packets until an ACK has been received from the other
52// side.  Good applications should always strive to send as much data as
53// possible and avoid partial-packet sends.  However, in most real world
54// applications, there are edge cases where this does not happen, and two
55// partial packets may be sent back to back.  For a browser, it is NEVER
56// a benefit to delay for an RTT before the second packet is sent.
57//
58// As a practical example in Chromium today, consider the case of a small
59// POST.  I have verified this:
60//     Client writes 649 bytes of header  (partial packet #1)
61//     Client writes 50 bytes of POST data (partial packet #2)
62// In the above example, with Nagle, a RTT delay is inserted between these
63// two sends due to nagle.  RTTs can easily be 100ms or more.  The best
64// fix is to make sure that for POSTing data, we write as much data as
65// possible and minimize partial packets.  We will fix that.  But disabling
66// Nagle also ensure we don't run into this delay in other edge cases.
67// See also:
68//    http://technet.microsoft.com/en-us/library/bb726981.aspx
69bool DisableNagle(SOCKET socket, bool disable) {
70  BOOL val = disable ? TRUE : FALSE;
71  int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
72                      reinterpret_cast<const char*>(&val),
73                      sizeof(val));
74  DCHECK(!rv) << "Could not disable nagle";
75  return rv == 0;
76}
77
78// Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
79// connections. See http://crbug.com/27400 for details.
80bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
81  int delay = delay_secs * 1000;
82  struct tcp_keepalive keepalive_vals = {
83    enable ? 1 : 0,  // TCP keep-alive on.
84    delay,  // Delay seconds before sending first TCP keep-alive packet.
85    delay,  // Delay seconds between sending TCP keep-alive packets.
86  };
87  DWORD bytes_returned = 0xABAB;
88  int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
89                sizeof(keepalive_vals), NULL, 0,
90                &bytes_returned, NULL, NULL);
91  DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
92              << " [error: " << WSAGetLastError() << "].";
93
94  // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
95  return rv == 0;
96}
97
98// Sets socket parameters. Returns the OS error code (or 0 on
99// success).
100int SetupSocket(SOCKET socket) {
101  // Increase the socket buffer sizes from the default sizes for WinXP.  In
102  // performance testing, there is substantial benefit by increasing from 8KB
103  // to 64KB.
104  // See also:
105  //    http://support.microsoft.com/kb/823764/EN-US
106  // On Vista, if we manually set these sizes, Vista turns off its receive
107  // window auto-tuning feature.
108  //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
109  // Since Vista's auto-tune is better than any static value we can could set,
110  // only change these on pre-vista machines.
111  if (base::win::GetVersion() < base::win::VERSION_VISTA) {
112    const int32 kSocketBufferSize = 64 * 1024;
113    SetSocketReceiveBufferSize(socket, kSocketBufferSize);
114    SetSocketSendBufferSize(socket, kSocketBufferSize);
115  }
116
117  DisableNagle(socket, true);
118  SetTCPKeepAlive(socket, true, kTCPKeepAliveSeconds);
119  return 0;
120}
121
122// Creates a new socket and sets default parameters for it. Returns
123// the OS error code (or 0 on success).
124int CreateSocket(int family, SOCKET* socket) {
125  *socket = CreatePlatformSocket(family, SOCK_STREAM, IPPROTO_TCP);
126  if (*socket == INVALID_SOCKET) {
127    int os_error = WSAGetLastError();
128    LOG(ERROR) << "CreatePlatformSocket failed: " << os_error;
129    return os_error;
130  }
131  int error = SetupSocket(*socket);
132  if (error) {
133    if (closesocket(*socket) < 0)
134      PLOG(ERROR) << "closesocket";
135    *socket = INVALID_SOCKET;
136    return error;
137  }
138  return 0;
139}
140
141int MapConnectError(int os_error) {
142  switch (os_error) {
143    // connect fails with WSAEACCES when Windows Firewall blocks the
144    // connection.
145    case WSAEACCES:
146      return ERR_NETWORK_ACCESS_DENIED;
147    case WSAETIMEDOUT:
148      return ERR_CONNECTION_TIMED_OUT;
149    default: {
150      int net_error = MapSystemError(os_error);
151      if (net_error == ERR_FAILED)
152        return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
153
154      // Give a more specific error when the user is offline.
155      if (net_error == ERR_ADDRESS_UNREACHABLE &&
156          NetworkChangeNotifier::IsOffline()) {
157        return ERR_INTERNET_DISCONNECTED;
158      }
159
160      return net_error;
161    }
162  }
163}
164
165}  // namespace
166
167//-----------------------------------------------------------------------------
168
169// This class encapsulates all the state that has to be preserved as long as
170// there is a network IO operation in progress. If the owner TCPClientSocketWin
171// is destroyed while an operation is in progress, the Core is detached and it
172// lives until the operation completes and the OS doesn't reference any resource
173// declared on this class anymore.
174class TCPClientSocketWin::Core : public base::RefCounted<Core> {
175 public:
176  explicit Core(TCPClientSocketWin* socket);
177
178  // Start watching for the end of a read or write operation.
179  void WatchForRead();
180  void WatchForWrite();
181
182  // The TCPClientSocketWin is going away.
183  void Detach() { socket_ = NULL; }
184
185  // Throttle the read size based on our current slow start state.
186  // Returns the throttled read size.
187  int ThrottleReadSize(int size) {
188    if (slow_start_throttle_ < kMaxSlowStartThrottle) {
189      size = std::min(size, slow_start_throttle_);
190      slow_start_throttle_ *= 2;
191    }
192    return size;
193  }
194
195  // The separate OVERLAPPED variables for asynchronous operation.
196  // |read_overlapped_| is used for both Connect() and Read().
197  // |write_overlapped_| is only used for Write();
198  OVERLAPPED read_overlapped_;
199  OVERLAPPED write_overlapped_;
200
201  // The buffers used in Read() and Write().
202  scoped_refptr<IOBuffer> read_iobuffer_;
203  scoped_refptr<IOBuffer> write_iobuffer_;
204  int read_buffer_length_;
205  int write_buffer_length_;
206
207  // Remember the state of g_disable_overlapped_reads for the duration of the
208  // socket based on what it was when the socket was created.
209  bool disable_overlapped_reads_;
210  bool non_blocking_reads_initialized_;
211
212 private:
213  friend class base::RefCounted<Core>;
214
215  class ReadDelegate : public base::win::ObjectWatcher::Delegate {
216   public:
217    explicit ReadDelegate(Core* core) : core_(core) {}
218    virtual ~ReadDelegate() {}
219
220    // base::ObjectWatcher::Delegate methods:
221    virtual void OnObjectSignaled(HANDLE object);
222
223   private:
224    Core* const core_;
225  };
226
227  class WriteDelegate : public base::win::ObjectWatcher::Delegate {
228   public:
229    explicit WriteDelegate(Core* core) : core_(core) {}
230    virtual ~WriteDelegate() {}
231
232    // base::ObjectWatcher::Delegate methods:
233    virtual void OnObjectSignaled(HANDLE object);
234
235   private:
236    Core* const core_;
237  };
238
239  ~Core();
240
241  // The socket that created this object.
242  TCPClientSocketWin* socket_;
243
244  // |reader_| handles the signals from |read_watcher_|.
245  ReadDelegate reader_;
246  // |writer_| handles the signals from |write_watcher_|.
247  WriteDelegate writer_;
248
249  // |read_watcher_| watches for events from Connect() and Read().
250  base::win::ObjectWatcher read_watcher_;
251  // |write_watcher_| watches for events from Write();
252  base::win::ObjectWatcher write_watcher_;
253
254  // When doing reads from the socket, we try to mirror TCP's slow start.
255  // We do this because otherwise the async IO subsystem artifically delays
256  // returning data to the application.
257  static const int kInitialSlowStartThrottle = 1 * 1024;
258  static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle;
259  int slow_start_throttle_;
260
261  DISALLOW_COPY_AND_ASSIGN(Core);
262};
263
264TCPClientSocketWin::Core::Core(
265    TCPClientSocketWin* socket)
266    : read_buffer_length_(0),
267      write_buffer_length_(0),
268      disable_overlapped_reads_(g_disable_overlapped_reads),
269      non_blocking_reads_initialized_(false),
270      socket_(socket),
271      reader_(this),
272      writer_(this),
273      slow_start_throttle_(kInitialSlowStartThrottle) {
274  memset(&read_overlapped_, 0, sizeof(read_overlapped_));
275  memset(&write_overlapped_, 0, sizeof(write_overlapped_));
276
277  read_overlapped_.hEvent = WSACreateEvent();
278  write_overlapped_.hEvent = WSACreateEvent();
279}
280
281TCPClientSocketWin::Core::~Core() {
282  // Make sure the message loop is not watching this object anymore.
283  read_watcher_.StopWatching();
284  write_watcher_.StopWatching();
285
286  WSACloseEvent(read_overlapped_.hEvent);
287  memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
288  WSACloseEvent(write_overlapped_.hEvent);
289  memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
290}
291
292void TCPClientSocketWin::Core::WatchForRead() {
293  // We grab an extra reference because there is an IO operation in progress.
294  // Balanced in ReadDelegate::OnObjectSignaled().
295  AddRef();
296  read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
297}
298
299void TCPClientSocketWin::Core::WatchForWrite() {
300  // We grab an extra reference because there is an IO operation in progress.
301  // Balanced in WriteDelegate::OnObjectSignaled().
302  AddRef();
303  write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
304}
305
306void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled(
307    HANDLE object) {
308  DCHECK_EQ(object, core_->read_overlapped_.hEvent);
309  if (core_->socket_) {
310    if (core_->socket_->waiting_connect()) {
311      core_->socket_->DidCompleteConnect();
312    } else if (core_->disable_overlapped_reads_) {
313      core_->socket_->DidSignalRead();
314    } else {
315      core_->socket_->DidCompleteRead();
316    }
317  }
318
319  core_->Release();
320}
321
322void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled(
323    HANDLE object) {
324  DCHECK_EQ(object, core_->write_overlapped_.hEvent);
325  if (core_->socket_)
326    core_->socket_->DidCompleteWrite();
327
328  core_->Release();
329}
330
331//-----------------------------------------------------------------------------
332
333TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses,
334                                       net::NetLog* net_log,
335                                       const net::NetLog::Source& source)
336    : socket_(INVALID_SOCKET),
337      bound_socket_(INVALID_SOCKET),
338      addresses_(addresses),
339      current_address_index_(-1),
340      waiting_read_(false),
341      waiting_write_(false),
342      next_connect_state_(CONNECT_STATE_NONE),
343      connect_os_error_(0),
344      net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)),
345      previously_disconnected_(false) {
346  net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
347                      source.ToEventParametersCallback());
348  EnsureWinsockInit();
349}
350
351TCPClientSocketWin::~TCPClientSocketWin() {
352  Disconnect();
353  net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
354}
355
356int TCPClientSocketWin::AdoptSocket(SOCKET socket) {
357  DCHECK_EQ(socket_, INVALID_SOCKET);
358
359  int error = SetupSocket(socket);
360  if (error)
361    return MapSystemError(error);
362
363  socket_ = socket;
364  SetNonBlocking(socket_);
365
366  core_ = new Core(this);
367  current_address_index_ = 0;
368  use_history_.set_was_ever_connected();
369
370  return OK;
371}
372
373int TCPClientSocketWin::Bind(const IPEndPoint& address) {
374  if (current_address_index_ >= 0 || bind_address_.get()) {
375    // Cannot bind the socket if we are already connected or connecting.
376    return ERR_UNEXPECTED;
377  }
378
379  SockaddrStorage storage;
380  if (!address.ToSockAddr(storage.addr, &storage.addr_len))
381    return ERR_INVALID_ARGUMENT;
382
383  // Create |bound_socket_| and try to bind it to |address|.
384  int error = CreateSocket(address.GetSockAddrFamily(), &bound_socket_);
385  if (error)
386    return MapSystemError(error);
387
388  if (bind(bound_socket_, storage.addr, storage.addr_len)) {
389    error = errno;
390    if (closesocket(bound_socket_) < 0)
391      PLOG(ERROR) << "closesocket";
392    bound_socket_ = INVALID_SOCKET;
393    return MapSystemError(error);
394  }
395
396  bind_address_.reset(new IPEndPoint(address));
397
398  return 0;
399}
400
401
402int TCPClientSocketWin::Connect(const CompletionCallback& callback) {
403  DCHECK(CalledOnValidThread());
404
405  // If already connected, then just return OK.
406  if (socket_ != INVALID_SOCKET)
407    return OK;
408
409  base::StatsCounter connects("tcp.connect");
410  connects.Increment();
411
412  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
413                      addresses_.CreateNetLogCallback());
414
415  // We will try to connect to each address in addresses_. Start with the
416  // first one in the list.
417  next_connect_state_ = CONNECT_STATE_CONNECT;
418  current_address_index_ = 0;
419
420  int rv = DoConnectLoop(OK);
421  if (rv == ERR_IO_PENDING) {
422    // Synchronous operation not supported.
423    DCHECK(!callback.is_null());
424    // TODO(ajwong): Is setting read_callback_ the right thing to do here??
425    read_callback_ = callback;
426  } else {
427    LogConnectCompletion(rv);
428  }
429
430  return rv;
431}
432
433int TCPClientSocketWin::DoConnectLoop(int result) {
434  DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE);
435
436  int rv = result;
437  do {
438    ConnectState state = next_connect_state_;
439    next_connect_state_ = CONNECT_STATE_NONE;
440    switch (state) {
441      case CONNECT_STATE_CONNECT:
442        DCHECK_EQ(OK, rv);
443        rv = DoConnect();
444        break;
445      case CONNECT_STATE_CONNECT_COMPLETE:
446        rv = DoConnectComplete(rv);
447        break;
448      default:
449        LOG(DFATAL) << "bad state " << state;
450        rv = ERR_UNEXPECTED;
451        break;
452    }
453  } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE);
454
455  return rv;
456}
457
458int TCPClientSocketWin::DoConnect() {
459  DCHECK_GE(current_address_index_, 0);
460  DCHECK_LT(current_address_index_, static_cast<int>(addresses_.size()));
461  DCHECK_EQ(0, connect_os_error_);
462
463  const IPEndPoint& endpoint = addresses_[current_address_index_];
464
465  if (previously_disconnected_) {
466    use_history_.Reset();
467    previously_disconnected_ = false;
468  }
469
470  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
471                      CreateNetLogIPEndPointCallback(&endpoint));
472
473  next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE;
474
475  if (bound_socket_ != INVALID_SOCKET) {
476    DCHECK(bind_address_.get());
477    socket_ = bound_socket_;
478    bound_socket_ = INVALID_SOCKET;
479  } else {
480    connect_os_error_ = CreateSocket(endpoint.GetSockAddrFamily(), &socket_);
481    if (connect_os_error_ != 0)
482      return MapSystemError(connect_os_error_);
483
484    if (bind_address_.get()) {
485      SockaddrStorage storage;
486      if (!bind_address_->ToSockAddr(storage.addr, &storage.addr_len))
487        return ERR_INVALID_ARGUMENT;
488      if (bind(socket_, storage.addr, storage.addr_len))
489        return MapSystemError(errno);
490    }
491  }
492
493  DCHECK(!core_);
494  core_ = new Core(this);
495  // WSAEventSelect sets the socket to non-blocking mode as a side effect.
496  // Our connect() and recv() calls require that the socket be non-blocking.
497  WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
498
499  SockaddrStorage storage;
500  if (!endpoint.ToSockAddr(storage.addr, &storage.addr_len))
501    return ERR_INVALID_ARGUMENT;
502  if (!connect(socket_, storage.addr, storage.addr_len)) {
503    // Connected without waiting!
504    //
505    // The MSDN page for connect says:
506    //   With a nonblocking socket, the connection attempt cannot be completed
507    //   immediately. In this case, connect will return SOCKET_ERROR, and
508    //   WSAGetLastError will return WSAEWOULDBLOCK.
509    // which implies that for a nonblocking socket, connect never returns 0.
510    // It's not documented whether the event object will be signaled or not
511    // if connect does return 0.  So the code below is essentially dead code
512    // and we don't know if it's correct.
513    NOTREACHED();
514
515    if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
516      return OK;
517  } else {
518    int os_error = WSAGetLastError();
519    if (os_error != WSAEWOULDBLOCK) {
520      LOG(ERROR) << "connect failed: " << os_error;
521      connect_os_error_ = os_error;
522      return MapConnectError(os_error);
523    }
524  }
525
526  core_->WatchForRead();
527  return ERR_IO_PENDING;
528}
529
530int TCPClientSocketWin::DoConnectComplete(int result) {
531  // Log the end of this attempt (and any OS error it threw).
532  int os_error = connect_os_error_;
533  connect_os_error_ = 0;
534  if (result != OK) {
535    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
536                      NetLog::IntegerCallback("os_error", os_error));
537  } else {
538    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
539  }
540
541  if (result == OK) {
542    use_history_.set_was_ever_connected();
543    return OK;  // Done!
544  }
545
546  // Close whatever partially connected socket we currently have.
547  DoDisconnect();
548
549  // Try to fall back to the next address in the list.
550  if (current_address_index_ + 1 < static_cast<int>(addresses_.size())) {
551    next_connect_state_ = CONNECT_STATE_CONNECT;
552    ++current_address_index_;
553    return OK;
554  }
555
556  // Otherwise there is nothing to fall back to, so give up.
557  return result;
558}
559
560void TCPClientSocketWin::Disconnect() {
561  DCHECK(CalledOnValidThread());
562
563  DoDisconnect();
564  current_address_index_ = -1;
565  bind_address_.reset();
566}
567
568void TCPClientSocketWin::DoDisconnect() {
569  DCHECK(CalledOnValidThread());
570
571  if (socket_ == INVALID_SOCKET)
572    return;
573
574  // Note: don't use CancelIo to cancel pending IO because it doesn't work
575  // when there is a Winsock layered service provider.
576
577  // In most socket implementations, closing a socket results in a graceful
578  // connection shutdown, but in Winsock we have to call shutdown explicitly.
579  // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
580  // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
581  shutdown(socket_, SD_SEND);
582
583  // This cancels any pending IO.
584  closesocket(socket_);
585  socket_ = INVALID_SOCKET;
586
587  if (waiting_connect()) {
588    // We closed the socket, so this notification will never come.
589    // From MSDN' WSAEventSelect documentation:
590    // "Closing a socket with closesocket also cancels the association and
591    // selection of network events specified in WSAEventSelect for the socket".
592    core_->Release();
593  }
594
595  waiting_read_ = false;
596  waiting_write_ = false;
597
598  core_->Detach();
599  core_ = NULL;
600
601  previously_disconnected_ = true;
602}
603
604bool TCPClientSocketWin::IsConnected() const {
605  DCHECK(CalledOnValidThread());
606
607  if (socket_ == INVALID_SOCKET || waiting_connect())
608    return false;
609
610  if (waiting_read_)
611    return true;
612
613  // Check if connection is alive.
614  char c;
615  int rv = recv(socket_, &c, 1, MSG_PEEK);
616  if (rv == 0)
617    return false;
618  if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
619    return false;
620
621  return true;
622}
623
624bool TCPClientSocketWin::IsConnectedAndIdle() const {
625  DCHECK(CalledOnValidThread());
626
627  if (socket_ == INVALID_SOCKET || waiting_connect())
628    return false;
629
630  if (waiting_read_)
631    return true;
632
633  // Check if connection is alive and we haven't received any data
634  // unexpectedly.
635  char c;
636  int rv = recv(socket_, &c, 1, MSG_PEEK);
637  if (rv >= 0)
638    return false;
639  if (WSAGetLastError() != WSAEWOULDBLOCK)
640    return false;
641
642  return true;
643}
644
645int TCPClientSocketWin::GetPeerAddress(IPEndPoint* address) const {
646  DCHECK(CalledOnValidThread());
647  DCHECK(address);
648  if (!IsConnected())
649    return ERR_SOCKET_NOT_CONNECTED;
650  *address = addresses_[current_address_index_];
651  return OK;
652}
653
654int TCPClientSocketWin::GetLocalAddress(IPEndPoint* address) const {
655  DCHECK(CalledOnValidThread());
656  DCHECK(address);
657  if (socket_ == INVALID_SOCKET) {
658    if (bind_address_.get()) {
659      *address = *bind_address_;
660      return OK;
661    }
662    return ERR_SOCKET_NOT_CONNECTED;
663  }
664
665  struct sockaddr_storage addr_storage;
666  socklen_t addr_len = sizeof(addr_storage);
667  struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage);
668  if (getsockname(socket_, addr, &addr_len))
669    return MapSystemError(WSAGetLastError());
670  if (!address->FromSockAddr(addr, addr_len))
671    return ERR_FAILED;
672  return OK;
673}
674
675void TCPClientSocketWin::SetSubresourceSpeculation() {
676  use_history_.set_subresource_speculation();
677}
678
679void TCPClientSocketWin::SetOmniboxSpeculation() {
680  use_history_.set_omnibox_speculation();
681}
682
683bool TCPClientSocketWin::WasEverUsed() const {
684  return use_history_.was_used_to_convey_data();
685}
686
687bool TCPClientSocketWin::UsingTCPFastOpen() const {
688  // Not supported on windows.
689  return false;
690}
691
692bool TCPClientSocketWin::WasNpnNegotiated() const {
693  return false;
694}
695
696NextProto TCPClientSocketWin::GetNegotiatedProtocol() const {
697  return kProtoUnknown;
698}
699
700bool TCPClientSocketWin::GetSSLInfo(SSLInfo* ssl_info) {
701  return false;
702}
703
704int TCPClientSocketWin::Read(IOBuffer* buf,
705                             int buf_len,
706                             const CompletionCallback& callback) {
707  DCHECK(CalledOnValidThread());
708  DCHECK_NE(socket_, INVALID_SOCKET);
709  DCHECK(!waiting_read_);
710  DCHECK(read_callback_.is_null());
711  DCHECK(!core_->read_iobuffer_);
712
713  return DoRead(buf, buf_len, callback);
714}
715
716int TCPClientSocketWin::Write(IOBuffer* buf,
717                              int buf_len,
718                              const CompletionCallback& callback) {
719  DCHECK(CalledOnValidThread());
720  DCHECK_NE(socket_, INVALID_SOCKET);
721  DCHECK(!waiting_write_);
722  DCHECK(write_callback_.is_null());
723  DCHECK_GT(buf_len, 0);
724  DCHECK(!core_->write_iobuffer_);
725
726  base::StatsCounter writes("tcp.writes");
727  writes.Increment();
728
729  WSABUF write_buffer;
730  write_buffer.len = buf_len;
731  write_buffer.buf = buf->data();
732
733  // TODO(wtc): Remove the assertion after enough testing.
734  AssertEventNotSignaled(core_->write_overlapped_.hEvent);
735  DWORD num;
736  int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
737                   &core_->write_overlapped_, NULL);
738  if (rv == 0) {
739    if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
740      rv = static_cast<int>(num);
741      if (rv > buf_len || rv < 0) {
742        // It seems that some winsock interceptors report that more was written
743        // than was available. Treat this as an error.  http://crbug.com/27870
744        LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
745                   << " bytes, but " << rv << " bytes reported.";
746        return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
747      }
748      base::StatsCounter write_bytes("tcp.write_bytes");
749      write_bytes.Add(rv);
750      if (rv > 0)
751        use_history_.set_was_used_to_convey_data();
752      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
753                                    buf->data());
754      return rv;
755    }
756  } else {
757    int os_error = WSAGetLastError();
758    if (os_error != WSA_IO_PENDING) {
759      int net_error = MapSystemError(os_error);
760      net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
761                        CreateNetLogSocketErrorCallback(net_error, os_error));
762      return net_error;
763    }
764  }
765  waiting_write_ = true;
766  write_callback_ = callback;
767  core_->write_iobuffer_ = buf;
768  core_->write_buffer_length_ = buf_len;
769  core_->WatchForWrite();
770  return ERR_IO_PENDING;
771}
772
773bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) {
774  DCHECK(CalledOnValidThread());
775  return SetSocketReceiveBufferSize(socket_, size);
776}
777
778bool TCPClientSocketWin::SetSendBufferSize(int32 size) {
779  DCHECK(CalledOnValidThread());
780  return SetSocketSendBufferSize(socket_, size);
781}
782
783bool TCPClientSocketWin::SetKeepAlive(bool enable, int delay) {
784  return SetTCPKeepAlive(socket_, enable, delay);
785}
786
787bool TCPClientSocketWin::SetNoDelay(bool no_delay) {
788  return DisableNagle(socket_, no_delay);
789}
790
791void TCPClientSocketWin::DisableOverlappedReads() {
792  g_disable_overlapped_reads = true;
793}
794
795void TCPClientSocketWin::LogConnectCompletion(int net_error) {
796  if (net_error == OK)
797    UpdateConnectionTypeHistograms(CONNECTION_ANY);
798
799  if (net_error != OK) {
800    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
801    return;
802  }
803
804  struct sockaddr_storage source_address;
805  socklen_t addrlen = sizeof(source_address);
806  int rv = getsockname(
807      socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
808  if (rv != 0) {
809    LOG(ERROR) << "getsockname() [rv: " << rv
810               << "] error: " << WSAGetLastError();
811    NOTREACHED();
812    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
813    return;
814  }
815
816  net_log_.EndEvent(
817      NetLog::TYPE_TCP_CONNECT,
818      CreateNetLogSourceAddressCallback(
819          reinterpret_cast<const struct sockaddr*>(&source_address),
820          sizeof(source_address)));
821}
822
823int TCPClientSocketWin::DoRead(IOBuffer* buf, int buf_len,
824                               const CompletionCallback& callback) {
825  if (core_->disable_overlapped_reads_) {
826    if (!core_->non_blocking_reads_initialized_) {
827      WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
828                     FD_READ | FD_CLOSE);
829      core_->non_blocking_reads_initialized_ = true;
830    }
831    int rv = recv(socket_, buf->data(), buf_len, 0);
832    if (rv == SOCKET_ERROR) {
833      int os_error = WSAGetLastError();
834      if (os_error != WSAEWOULDBLOCK) {
835        int net_error = MapSystemError(os_error);
836        net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
837            CreateNetLogSocketErrorCallback(net_error, os_error));
838        return net_error;
839      }
840    } else {
841      base::StatsCounter read_bytes("tcp.read_bytes");
842      if (rv > 0) {
843        use_history_.set_was_used_to_convey_data();
844        read_bytes.Add(rv);
845      }
846      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
847                                    buf->data());
848      return rv;
849    }
850  } else {
851    buf_len = core_->ThrottleReadSize(buf_len);
852
853    WSABUF read_buffer;
854    read_buffer.len = buf_len;
855    read_buffer.buf = buf->data();
856
857    // TODO(wtc): Remove the assertion after enough testing.
858    AssertEventNotSignaled(core_->read_overlapped_.hEvent);
859    DWORD num;
860    DWORD flags = 0;
861    int rv = WSARecv(socket_, &read_buffer, 1, &num, &flags,
862                     &core_->read_overlapped_, NULL);
863    if (rv == 0) {
864      if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) {
865        base::StatsCounter read_bytes("tcp.read_bytes");
866        if (num > 0) {
867          use_history_.set_was_used_to_convey_data();
868          read_bytes.Add(num);
869        }
870        net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, num,
871                                      buf->data());
872        return static_cast<int>(num);
873      }
874    } else {
875      int os_error = WSAGetLastError();
876      if (os_error != WSA_IO_PENDING) {
877        int net_error = MapSystemError(os_error);
878        net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
879            CreateNetLogSocketErrorCallback(net_error, os_error));
880        return net_error;
881      }
882    }
883  }
884
885  waiting_read_ = true;
886  read_callback_ = callback;
887  core_->read_iobuffer_ = buf;
888  core_->read_buffer_length_ = buf_len;
889  core_->WatchForRead();
890  return ERR_IO_PENDING;
891}
892
893void TCPClientSocketWin::DoReadCallback(int rv) {
894  DCHECK_NE(rv, ERR_IO_PENDING);
895  DCHECK(!read_callback_.is_null());
896
897  // Since Run may result in Read being called, clear read_callback_ up front.
898  CompletionCallback c = read_callback_;
899  read_callback_.Reset();
900  c.Run(rv);
901}
902
903void TCPClientSocketWin::DoWriteCallback(int rv) {
904  DCHECK_NE(rv, ERR_IO_PENDING);
905  DCHECK(!write_callback_.is_null());
906
907  // since Run may result in Write being called, clear write_callback_ up front.
908  CompletionCallback c = write_callback_;
909  write_callback_.Reset();
910  c.Run(rv);
911}
912
913void TCPClientSocketWin::DidCompleteConnect() {
914  DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE);
915  int result;
916
917  WSANETWORKEVENTS events;
918  int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
919                                &events);
920  int os_error = 0;
921  if (rv == SOCKET_ERROR) {
922    NOTREACHED();
923    os_error = WSAGetLastError();
924    result = MapSystemError(os_error);
925  } else if (events.lNetworkEvents & FD_CONNECT) {
926    os_error = events.iErrorCode[FD_CONNECT_BIT];
927    result = MapConnectError(os_error);
928  } else {
929    NOTREACHED();
930    result = ERR_UNEXPECTED;
931  }
932
933  connect_os_error_ = os_error;
934  rv = DoConnectLoop(result);
935  if (rv != ERR_IO_PENDING) {
936    LogConnectCompletion(rv);
937    DoReadCallback(rv);
938  }
939}
940
941void TCPClientSocketWin::DidCompleteRead() {
942  DCHECK(waiting_read_);
943  DWORD num_bytes, flags;
944  BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_,
945                                   &num_bytes, FALSE, &flags);
946  waiting_read_ = false;
947  int rv;
948  if (ok) {
949    base::StatsCounter read_bytes("tcp.read_bytes");
950    read_bytes.Add(num_bytes);
951    if (num_bytes > 0)
952      use_history_.set_was_used_to_convey_data();
953    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED,
954                                  num_bytes, core_->read_iobuffer_->data());
955    rv = static_cast<int>(num_bytes);
956  } else {
957    int os_error = WSAGetLastError();
958    rv = MapSystemError(os_error);
959    net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
960                      CreateNetLogSocketErrorCallback(rv, os_error));
961  }
962  WSAResetEvent(core_->read_overlapped_.hEvent);
963  core_->read_iobuffer_ = NULL;
964  core_->read_buffer_length_ = 0;
965  DoReadCallback(rv);
966}
967
968void TCPClientSocketWin::DidCompleteWrite() {
969  DCHECK(waiting_write_);
970
971  DWORD num_bytes, flags;
972  BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
973                                   &num_bytes, FALSE, &flags);
974  WSAResetEvent(core_->write_overlapped_.hEvent);
975  waiting_write_ = false;
976  int rv;
977  if (!ok) {
978    int os_error = WSAGetLastError();
979    rv = MapSystemError(os_error);
980    net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
981                      CreateNetLogSocketErrorCallback(rv, os_error));
982  } else {
983    rv = static_cast<int>(num_bytes);
984    if (rv > core_->write_buffer_length_ || rv < 0) {
985      // It seems that some winsock interceptors report that more was written
986      // than was available. Treat this as an error.  http://crbug.com/27870
987      LOG(ERROR) << "Detected broken LSP: Asked to write "
988                 << core_->write_buffer_length_ << " bytes, but " << rv
989                 << " bytes reported.";
990      rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
991    } else {
992      base::StatsCounter write_bytes("tcp.write_bytes");
993      write_bytes.Add(num_bytes);
994      if (num_bytes > 0)
995        use_history_.set_was_used_to_convey_data();
996      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
997                                    core_->write_iobuffer_->data());
998    }
999  }
1000  core_->write_iobuffer_ = NULL;
1001  DoWriteCallback(rv);
1002}
1003
1004void TCPClientSocketWin::DidSignalRead() {
1005  DCHECK(waiting_read_);
1006  int os_error = 0;
1007  WSANETWORKEVENTS network_events;
1008  int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
1009                                &network_events);
1010  if (rv == SOCKET_ERROR) {
1011    os_error = WSAGetLastError();
1012    rv = MapSystemError(os_error);
1013  } else if (network_events.lNetworkEvents) {
1014    DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
1015    // If network_events.lNetworkEvents is FD_CLOSE and
1016    // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
1017    // connection closure. It is tempting to directly set rv to 0 in
1018    // this case, but the MSDN pages for WSAEventSelect and
1019    // WSAAsyncSelect recommend we still call DoRead():
1020    //   FD_CLOSE should only be posted after all data is read from a
1021    //   socket, but an application should check for remaining data upon
1022    //   receipt of FD_CLOSE to avoid any possibility of losing data.
1023    //
1024    // If network_events.iErrorCode[FD_READ_BIT] or
1025    // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
1026    // DoRead() because recv() reports a more accurate error code
1027    // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
1028    // reset.
1029    rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
1030                read_callback_);
1031    if (rv == ERR_IO_PENDING)
1032      return;
1033  } else {
1034    // This may happen because Read() may succeed synchronously and
1035    // consume all the received data without resetting the event object.
1036    core_->WatchForRead();
1037    return;
1038  }
1039  waiting_read_ = false;
1040  core_->read_iobuffer_ = NULL;
1041  core_->read_buffer_length_ = 0;
1042  DoReadCallback(rv);
1043}
1044
1045}  // namespace net
1046