1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/socket/tcp_socket_win.h"
6
7#include <mstcpip.h>
8
9#include "base/callback_helpers.h"
10#include "base/logging.h"
11#include "base/metrics/stats_counters.h"
12#include "base/win/windows_version.h"
13#include "net/base/address_list.h"
14#include "net/base/connection_type_histograms.h"
15#include "net/base/io_buffer.h"
16#include "net/base/ip_endpoint.h"
17#include "net/base/net_errors.h"
18#include "net/base/net_util.h"
19#include "net/base/network_change_notifier.h"
20#include "net/base/winsock_init.h"
21#include "net/base/winsock_util.h"
22#include "net/socket/socket_descriptor.h"
23#include "net/socket/socket_net_log_params.h"
24
25namespace net {
26
27namespace {
28
29const int kTCPKeepAliveSeconds = 45;
30
31bool SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
32  int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
33                      reinterpret_cast<const char*>(&size), sizeof(size));
34  DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
35  return rv == 0;
36}
37
38bool SetSocketSendBufferSize(SOCKET socket, int32 size) {
39  int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
40                      reinterpret_cast<const char*>(&size), sizeof(size));
41  DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
42  return rv == 0;
43}
44
45// Disable Nagle.
46// The Nagle implementation on windows is governed by RFC 896.  The idea
47// behind Nagle is to reduce small packets on the network.  When Nagle is
48// enabled, if a partial packet has been sent, the TCP stack will disallow
49// further *partial* packets until an ACK has been received from the other
50// side.  Good applications should always strive to send as much data as
51// possible and avoid partial-packet sends.  However, in most real world
52// applications, there are edge cases where this does not happen, and two
53// partial packets may be sent back to back.  For a browser, it is NEVER
54// a benefit to delay for an RTT before the second packet is sent.
55//
56// As a practical example in Chromium today, consider the case of a small
57// POST.  I have verified this:
58//     Client writes 649 bytes of header  (partial packet #1)
59//     Client writes 50 bytes of POST data (partial packet #2)
60// In the above example, with Nagle, a RTT delay is inserted between these
61// two sends due to nagle.  RTTs can easily be 100ms or more.  The best
62// fix is to make sure that for POSTing data, we write as much data as
63// possible and minimize partial packets.  We will fix that.  But disabling
64// Nagle also ensure we don't run into this delay in other edge cases.
65// See also:
66//    http://technet.microsoft.com/en-us/library/bb726981.aspx
67bool DisableNagle(SOCKET socket, bool disable) {
68  BOOL val = disable ? TRUE : FALSE;
69  int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
70                      reinterpret_cast<const char*>(&val),
71                      sizeof(val));
72  DCHECK(!rv) << "Could not disable nagle";
73  return rv == 0;
74}
75
76// Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
77// connections. See http://crbug.com/27400 for details.
78bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
79  int delay = delay_secs * 1000;
80  struct tcp_keepalive keepalive_vals = {
81    enable ? 1 : 0,  // TCP keep-alive on.
82    delay,  // Delay seconds before sending first TCP keep-alive packet.
83    delay,  // Delay seconds between sending TCP keep-alive packets.
84  };
85  DWORD bytes_returned = 0xABAB;
86  int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
87                    sizeof(keepalive_vals), NULL, 0,
88                    &bytes_returned, NULL, NULL);
89  DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
90              << " [error: " << WSAGetLastError() << "].";
91
92  // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
93  return rv == 0;
94}
95
96int MapConnectError(int os_error) {
97  switch (os_error) {
98    // connect fails with WSAEACCES when Windows Firewall blocks the
99    // connection.
100    case WSAEACCES:
101      return ERR_NETWORK_ACCESS_DENIED;
102    case WSAETIMEDOUT:
103      return ERR_CONNECTION_TIMED_OUT;
104    default: {
105      int net_error = MapSystemError(os_error);
106      if (net_error == ERR_FAILED)
107        return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
108
109      // Give a more specific error when the user is offline.
110      if (net_error == ERR_ADDRESS_UNREACHABLE &&
111          NetworkChangeNotifier::IsOffline()) {
112        return ERR_INTERNET_DISCONNECTED;
113      }
114
115      return net_error;
116    }
117  }
118}
119
120}  // namespace
121
122//-----------------------------------------------------------------------------
123
124// This class encapsulates all the state that has to be preserved as long as
125// there is a network IO operation in progress. If the owner TCPSocketWin is
126// destroyed while an operation is in progress, the Core is detached and it
127// lives until the operation completes and the OS doesn't reference any resource
128// declared on this class anymore.
129class TCPSocketWin::Core : public base::RefCounted<Core> {
130 public:
131  explicit Core(TCPSocketWin* socket);
132
133  // Start watching for the end of a read or write operation.
134  void WatchForRead();
135  void WatchForWrite();
136
137  // The TCPSocketWin is going away.
138  void Detach() { socket_ = NULL; }
139
140  // The separate OVERLAPPED variables for asynchronous operation.
141  // |read_overlapped_| is used for both Connect() and Read().
142  // |write_overlapped_| is only used for Write();
143  OVERLAPPED read_overlapped_;
144  OVERLAPPED write_overlapped_;
145
146  // The buffers used in Read() and Write().
147  scoped_refptr<IOBuffer> read_iobuffer_;
148  scoped_refptr<IOBuffer> write_iobuffer_;
149  int read_buffer_length_;
150  int write_buffer_length_;
151
152  bool non_blocking_reads_initialized_;
153
154 private:
155  friend class base::RefCounted<Core>;
156
157  class ReadDelegate : public base::win::ObjectWatcher::Delegate {
158   public:
159    explicit ReadDelegate(Core* core) : core_(core) {}
160    virtual ~ReadDelegate() {}
161
162    // base::ObjectWatcher::Delegate methods:
163    virtual void OnObjectSignaled(HANDLE object);
164
165   private:
166    Core* const core_;
167  };
168
169  class WriteDelegate : public base::win::ObjectWatcher::Delegate {
170   public:
171    explicit WriteDelegate(Core* core) : core_(core) {}
172    virtual ~WriteDelegate() {}
173
174    // base::ObjectWatcher::Delegate methods:
175    virtual void OnObjectSignaled(HANDLE object);
176
177   private:
178    Core* const core_;
179  };
180
181  ~Core();
182
183  // The socket that created this object.
184  TCPSocketWin* socket_;
185
186  // |reader_| handles the signals from |read_watcher_|.
187  ReadDelegate reader_;
188  // |writer_| handles the signals from |write_watcher_|.
189  WriteDelegate writer_;
190
191  // |read_watcher_| watches for events from Connect() and Read().
192  base::win::ObjectWatcher read_watcher_;
193  // |write_watcher_| watches for events from Write();
194  base::win::ObjectWatcher write_watcher_;
195
196  DISALLOW_COPY_AND_ASSIGN(Core);
197};
198
199TCPSocketWin::Core::Core(TCPSocketWin* socket)
200    : read_buffer_length_(0),
201      write_buffer_length_(0),
202      non_blocking_reads_initialized_(false),
203      socket_(socket),
204      reader_(this),
205      writer_(this) {
206  memset(&read_overlapped_, 0, sizeof(read_overlapped_));
207  memset(&write_overlapped_, 0, sizeof(write_overlapped_));
208
209  read_overlapped_.hEvent = WSACreateEvent();
210  write_overlapped_.hEvent = WSACreateEvent();
211}
212
213TCPSocketWin::Core::~Core() {
214  // Make sure the message loop is not watching this object anymore.
215  read_watcher_.StopWatching();
216  write_watcher_.StopWatching();
217
218  WSACloseEvent(read_overlapped_.hEvent);
219  memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
220  WSACloseEvent(write_overlapped_.hEvent);
221  memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
222}
223
224void TCPSocketWin::Core::WatchForRead() {
225  // We grab an extra reference because there is an IO operation in progress.
226  // Balanced in ReadDelegate::OnObjectSignaled().
227  AddRef();
228  read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
229}
230
231void TCPSocketWin::Core::WatchForWrite() {
232  // We grab an extra reference because there is an IO operation in progress.
233  // Balanced in WriteDelegate::OnObjectSignaled().
234  AddRef();
235  write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
236}
237
238void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
239  DCHECK_EQ(object, core_->read_overlapped_.hEvent);
240  if (core_->socket_) {
241    if (core_->socket_->waiting_connect_)
242      core_->socket_->DidCompleteConnect();
243    else
244      core_->socket_->DidSignalRead();
245  }
246
247  core_->Release();
248}
249
250void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
251    HANDLE object) {
252  DCHECK_EQ(object, core_->write_overlapped_.hEvent);
253  if (core_->socket_)
254    core_->socket_->DidCompleteWrite();
255
256  core_->Release();
257}
258
259//-----------------------------------------------------------------------------
260
261TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
262                           const net::NetLog::Source& source)
263    : socket_(INVALID_SOCKET),
264      accept_event_(WSA_INVALID_EVENT),
265      accept_socket_(NULL),
266      accept_address_(NULL),
267      waiting_connect_(false),
268      waiting_read_(false),
269      waiting_write_(false),
270      connect_os_error_(0),
271      logging_multiple_connect_attempts_(false),
272      net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
273  net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
274                      source.ToEventParametersCallback());
275  EnsureWinsockInit();
276}
277
278TCPSocketWin::~TCPSocketWin() {
279  Close();
280  net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
281}
282
283int TCPSocketWin::Open(AddressFamily family) {
284  DCHECK(CalledOnValidThread());
285  DCHECK_EQ(socket_, INVALID_SOCKET);
286
287  socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
288                                 IPPROTO_TCP);
289  if (socket_ == INVALID_SOCKET) {
290    PLOG(ERROR) << "CreatePlatformSocket() returned an error";
291    return MapSystemError(WSAGetLastError());
292  }
293
294  if (SetNonBlocking(socket_)) {
295    int result = MapSystemError(WSAGetLastError());
296    Close();
297    return result;
298  }
299
300  return OK;
301}
302
303int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
304                                       const IPEndPoint& peer_address) {
305  DCHECK(CalledOnValidThread());
306  DCHECK_EQ(socket_, INVALID_SOCKET);
307  DCHECK(!core_);
308
309  socket_ = socket;
310
311  if (SetNonBlocking(socket_)) {
312    int result = MapSystemError(WSAGetLastError());
313    Close();
314    return result;
315  }
316
317  core_ = new Core(this);
318  peer_address_.reset(new IPEndPoint(peer_address));
319
320  return OK;
321}
322
323int TCPSocketWin::Bind(const IPEndPoint& address) {
324  DCHECK(CalledOnValidThread());
325  DCHECK_NE(socket_, INVALID_SOCKET);
326
327  SockaddrStorage storage;
328  if (!address.ToSockAddr(storage.addr, &storage.addr_len))
329    return ERR_ADDRESS_INVALID;
330
331  int result = bind(socket_, storage.addr, storage.addr_len);
332  if (result < 0) {
333    PLOG(ERROR) << "bind() returned an error";
334    return MapSystemError(WSAGetLastError());
335  }
336
337  return OK;
338}
339
340int TCPSocketWin::Listen(int backlog) {
341  DCHECK(CalledOnValidThread());
342  DCHECK_GT(backlog, 0);
343  DCHECK_NE(socket_, INVALID_SOCKET);
344  DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
345
346  accept_event_ = WSACreateEvent();
347  if (accept_event_ == WSA_INVALID_EVENT) {
348    PLOG(ERROR) << "WSACreateEvent()";
349    return MapSystemError(WSAGetLastError());
350  }
351
352  int result = listen(socket_, backlog);
353  if (result < 0) {
354    PLOG(ERROR) << "listen() returned an error";
355    return MapSystemError(WSAGetLastError());
356  }
357
358  return OK;
359}
360
361int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
362                         IPEndPoint* address,
363                         const CompletionCallback& callback) {
364  DCHECK(CalledOnValidThread());
365  DCHECK(socket);
366  DCHECK(address);
367  DCHECK(!callback.is_null());
368  DCHECK(accept_callback_.is_null());
369
370  net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
371
372  int result = AcceptInternal(socket, address);
373
374  if (result == ERR_IO_PENDING) {
375    // Start watching.
376    WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
377    accept_watcher_.StartWatching(accept_event_, this);
378
379    accept_socket_ = socket;
380    accept_address_ = address;
381    accept_callback_ = callback;
382  }
383
384  return result;
385}
386
387int TCPSocketWin::Connect(const IPEndPoint& address,
388                          const CompletionCallback& callback) {
389  DCHECK(CalledOnValidThread());
390  DCHECK_NE(socket_, INVALID_SOCKET);
391  DCHECK(!waiting_connect_);
392
393  // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
394  // Unless Close() is called to reset the internal state, a second call to
395  // Connect() is not allowed.
396  // Please note that we enforce this even if the previous Connect() has
397  // completed and failed. Although it is allowed to connect the same |socket_|
398  // again after a connection attempt failed on Windows, it results in
399  // unspecified behavior according to POSIX. Therefore, we make it behave in
400  // the same way as TCPSocketLibevent.
401  DCHECK(!peer_address_ && !core_);
402
403  if (!logging_multiple_connect_attempts_)
404    LogConnectBegin(AddressList(address));
405
406  peer_address_.reset(new IPEndPoint(address));
407
408  int rv = DoConnect();
409  if (rv == ERR_IO_PENDING) {
410    // Synchronous operation not supported.
411    DCHECK(!callback.is_null());
412    read_callback_ = callback;
413    waiting_connect_ = true;
414  } else {
415    DoConnectComplete(rv);
416  }
417
418  return rv;
419}
420
421bool TCPSocketWin::IsConnected() const {
422  DCHECK(CalledOnValidThread());
423
424  if (socket_ == INVALID_SOCKET || waiting_connect_)
425    return false;
426
427  if (waiting_read_)
428    return true;
429
430  // Check if connection is alive.
431  char c;
432  int rv = recv(socket_, &c, 1, MSG_PEEK);
433  if (rv == 0)
434    return false;
435  if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
436    return false;
437
438  return true;
439}
440
441bool TCPSocketWin::IsConnectedAndIdle() const {
442  DCHECK(CalledOnValidThread());
443
444  if (socket_ == INVALID_SOCKET || waiting_connect_)
445    return false;
446
447  if (waiting_read_)
448    return true;
449
450  // Check if connection is alive and we haven't received any data
451  // unexpectedly.
452  char c;
453  int rv = recv(socket_, &c, 1, MSG_PEEK);
454  if (rv >= 0)
455    return false;
456  if (WSAGetLastError() != WSAEWOULDBLOCK)
457    return false;
458
459  return true;
460}
461
462int TCPSocketWin::Read(IOBuffer* buf,
463                       int buf_len,
464                       const CompletionCallback& callback) {
465  DCHECK(CalledOnValidThread());
466  DCHECK_NE(socket_, INVALID_SOCKET);
467  DCHECK(!waiting_read_);
468  DCHECK(read_callback_.is_null());
469  DCHECK(!core_->read_iobuffer_);
470
471  return DoRead(buf, buf_len, callback);
472}
473
474int TCPSocketWin::Write(IOBuffer* buf,
475                        int buf_len,
476                        const CompletionCallback& callback) {
477  DCHECK(CalledOnValidThread());
478  DCHECK_NE(socket_, INVALID_SOCKET);
479  DCHECK(!waiting_write_);
480  DCHECK(write_callback_.is_null());
481  DCHECK_GT(buf_len, 0);
482  DCHECK(!core_->write_iobuffer_);
483
484  base::StatsCounter writes("tcp.writes");
485  writes.Increment();
486
487  WSABUF write_buffer;
488  write_buffer.len = buf_len;
489  write_buffer.buf = buf->data();
490
491  // TODO(wtc): Remove the assertion after enough testing.
492  AssertEventNotSignaled(core_->write_overlapped_.hEvent);
493  DWORD num;
494  int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
495                   &core_->write_overlapped_, NULL);
496  if (rv == 0) {
497    if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
498      rv = static_cast<int>(num);
499      if (rv > buf_len || rv < 0) {
500        // It seems that some winsock interceptors report that more was written
501        // than was available. Treat this as an error.  http://crbug.com/27870
502        LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
503                   << " bytes, but " << rv << " bytes reported.";
504        return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
505      }
506      base::StatsCounter write_bytes("tcp.write_bytes");
507      write_bytes.Add(rv);
508      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
509                                    buf->data());
510      return rv;
511    }
512  } else {
513    int os_error = WSAGetLastError();
514    if (os_error != WSA_IO_PENDING) {
515      int net_error = MapSystemError(os_error);
516      net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
517                        CreateNetLogSocketErrorCallback(net_error, os_error));
518      return net_error;
519    }
520  }
521  waiting_write_ = true;
522  write_callback_ = callback;
523  core_->write_iobuffer_ = buf;
524  core_->write_buffer_length_ = buf_len;
525  core_->WatchForWrite();
526  return ERR_IO_PENDING;
527}
528
529int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
530  DCHECK(CalledOnValidThread());
531  DCHECK(address);
532
533  SockaddrStorage storage;
534  if (getsockname(socket_, storage.addr, &storage.addr_len))
535    return MapSystemError(WSAGetLastError());
536  if (!address->FromSockAddr(storage.addr, storage.addr_len))
537    return ERR_ADDRESS_INVALID;
538
539  return OK;
540}
541
542int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
543  DCHECK(CalledOnValidThread());
544  DCHECK(address);
545  if (!IsConnected())
546    return ERR_SOCKET_NOT_CONNECTED;
547  *address = *peer_address_;
548  return OK;
549}
550
551int TCPSocketWin::SetDefaultOptionsForServer() {
552  return SetExclusiveAddrUse();
553}
554
555void TCPSocketWin::SetDefaultOptionsForClient() {
556  // Increase the socket buffer sizes from the default sizes for WinXP.  In
557  // performance testing, there is substantial benefit by increasing from 8KB
558  // to 64KB.
559  // See also:
560  //    http://support.microsoft.com/kb/823764/EN-US
561  // On Vista, if we manually set these sizes, Vista turns off its receive
562  // window auto-tuning feature.
563  //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
564  // Since Vista's auto-tune is better than any static value we can could set,
565  // only change these on pre-vista machines.
566  if (base::win::GetVersion() < base::win::VERSION_VISTA) {
567    const int32 kSocketBufferSize = 64 * 1024;
568    SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
569    SetSocketSendBufferSize(socket_, kSocketBufferSize);
570  }
571
572  DisableNagle(socket_, true);
573  SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
574}
575
576int TCPSocketWin::SetExclusiveAddrUse() {
577  // On Windows, a bound end point can be hijacked by another process by
578  // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
579  // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
580  // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
581  // socket to forcibly bind to the end point until the end point is unbound.
582  // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
583  // MSDN: http://goo.gl/M6fjQ.
584  //
585  // Unlike on *nix, on Windows a TCP server socket can always bind to an end
586  // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
587  // needed here.
588  //
589  // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
590  // point in TIME_WAIT status. It does not have this effect for a TCP server
591  // socket.
592
593  BOOL true_value = 1;
594  int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
595                      reinterpret_cast<const char*>(&true_value),
596                      sizeof(true_value));
597  if (rv < 0)
598    return MapSystemError(errno);
599  return OK;
600}
601
602bool TCPSocketWin::SetReceiveBufferSize(int32 size) {
603  DCHECK(CalledOnValidThread());
604  return SetSocketReceiveBufferSize(socket_, size);
605}
606
607bool TCPSocketWin::SetSendBufferSize(int32 size) {
608  DCHECK(CalledOnValidThread());
609  return SetSocketSendBufferSize(socket_, size);
610}
611
612bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
613  return SetTCPKeepAlive(socket_, enable, delay);
614}
615
616bool TCPSocketWin::SetNoDelay(bool no_delay) {
617  return DisableNagle(socket_, no_delay);
618}
619
620void TCPSocketWin::Close() {
621  DCHECK(CalledOnValidThread());
622
623  if (socket_ != INVALID_SOCKET) {
624    // Note: don't use CancelIo to cancel pending IO because it doesn't work
625    // when there is a Winsock layered service provider.
626
627    // In most socket implementations, closing a socket results in a graceful
628    // connection shutdown, but in Winsock we have to call shutdown explicitly.
629    // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
630    // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
631    shutdown(socket_, SD_SEND);
632
633    // This cancels any pending IO.
634    if (closesocket(socket_) < 0)
635      PLOG(ERROR) << "closesocket";
636    socket_ = INVALID_SOCKET;
637  }
638
639  if (!accept_callback_.is_null()) {
640    accept_watcher_.StopWatching();
641    accept_socket_ = NULL;
642    accept_address_ = NULL;
643    accept_callback_.Reset();
644  }
645
646  if (accept_event_) {
647    WSACloseEvent(accept_event_);
648    accept_event_ = WSA_INVALID_EVENT;
649  }
650
651  if (core_) {
652    if (waiting_connect_) {
653      // We closed the socket, so this notification will never come.
654      // From MSDN' WSAEventSelect documentation:
655      // "Closing a socket with closesocket also cancels the association and
656      // selection of network events specified in WSAEventSelect for the
657      // socket".
658      core_->Release();
659    }
660    core_->Detach();
661    core_ = NULL;
662  }
663
664  waiting_connect_ = false;
665  waiting_read_ = false;
666  waiting_write_ = false;
667
668  read_callback_.Reset();
669  write_callback_.Reset();
670  peer_address_.reset();
671  connect_os_error_ = 0;
672}
673
674bool TCPSocketWin::UsingTCPFastOpen() const {
675  // Not supported on windows.
676  return false;
677}
678
679void TCPSocketWin::StartLoggingMultipleConnectAttempts(
680    const AddressList& addresses) {
681  if (!logging_multiple_connect_attempts_) {
682    logging_multiple_connect_attempts_ = true;
683    LogConnectBegin(addresses);
684  } else {
685    NOTREACHED();
686  }
687}
688
689void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
690  if (logging_multiple_connect_attempts_) {
691    LogConnectEnd(net_error);
692    logging_multiple_connect_attempts_ = false;
693  } else {
694    NOTREACHED();
695  }
696}
697
698int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
699                                 IPEndPoint* address) {
700  SockaddrStorage storage;
701  int new_socket = accept(socket_, storage.addr, &storage.addr_len);
702  if (new_socket < 0) {
703    int net_error = MapSystemError(WSAGetLastError());
704    if (net_error != ERR_IO_PENDING)
705      net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
706    return net_error;
707  }
708
709  IPEndPoint ip_end_point;
710  if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
711    NOTREACHED();
712    if (closesocket(new_socket) < 0)
713      PLOG(ERROR) << "closesocket";
714    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, ERR_FAILED);
715    return ERR_FAILED;
716  }
717  scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
718      net_log_.net_log(), net_log_.source()));
719  int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
720  if (adopt_result != OK) {
721    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
722    return adopt_result;
723  }
724  *socket = tcp_socket.Pass();
725  *address = ip_end_point;
726  net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
727                    CreateNetLogIPEndPointCallback(&ip_end_point));
728  return OK;
729}
730
731void TCPSocketWin::OnObjectSignaled(HANDLE object) {
732  WSANETWORKEVENTS ev;
733  if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
734    PLOG(ERROR) << "WSAEnumNetworkEvents()";
735    return;
736  }
737
738  if (ev.lNetworkEvents & FD_ACCEPT) {
739    int result = AcceptInternal(accept_socket_, accept_address_);
740    if (result != ERR_IO_PENDING) {
741      accept_socket_ = NULL;
742      accept_address_ = NULL;
743      base::ResetAndReturn(&accept_callback_).Run(result);
744    }
745  } else {
746    // This happens when a client opens a connection and closes it before we
747    // have a chance to accept it.
748    DCHECK(ev.lNetworkEvents == 0);
749
750    // Start watching the next FD_ACCEPT event.
751    WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
752    accept_watcher_.StartWatching(accept_event_, this);
753  }
754}
755
756int TCPSocketWin::DoConnect() {
757  DCHECK_EQ(connect_os_error_, 0);
758  DCHECK(!core_);
759
760  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
761                      CreateNetLogIPEndPointCallback(peer_address_.get()));
762
763  core_ = new Core(this);
764  // WSAEventSelect sets the socket to non-blocking mode as a side effect.
765  // Our connect() and recv() calls require that the socket be non-blocking.
766  WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
767
768  SockaddrStorage storage;
769  if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
770    return ERR_INVALID_ARGUMENT;
771  if (!connect(socket_, storage.addr, storage.addr_len)) {
772    // Connected without waiting!
773    //
774    // The MSDN page for connect says:
775    //   With a nonblocking socket, the connection attempt cannot be completed
776    //   immediately. In this case, connect will return SOCKET_ERROR, and
777    //   WSAGetLastError will return WSAEWOULDBLOCK.
778    // which implies that for a nonblocking socket, connect never returns 0.
779    // It's not documented whether the event object will be signaled or not
780    // if connect does return 0.  So the code below is essentially dead code
781    // and we don't know if it's correct.
782    NOTREACHED();
783
784    if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
785      return OK;
786  } else {
787    int os_error = WSAGetLastError();
788    if (os_error != WSAEWOULDBLOCK) {
789      LOG(ERROR) << "connect failed: " << os_error;
790      connect_os_error_ = os_error;
791      int rv = MapConnectError(os_error);
792      CHECK_NE(ERR_IO_PENDING, rv);
793      return rv;
794    }
795  }
796
797  core_->WatchForRead();
798  return ERR_IO_PENDING;
799}
800
801void TCPSocketWin::DoConnectComplete(int result) {
802  // Log the end of this attempt (and any OS error it threw).
803  int os_error = connect_os_error_;
804  connect_os_error_ = 0;
805  if (result != OK) {
806    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
807                      NetLog::IntegerCallback("os_error", os_error));
808  } else {
809    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
810  }
811
812  if (!logging_multiple_connect_attempts_)
813    LogConnectEnd(result);
814}
815
816void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
817  base::StatsCounter connects("tcp.connect");
818  connects.Increment();
819
820  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
821                      addresses.CreateNetLogCallback());
822}
823
824void TCPSocketWin::LogConnectEnd(int net_error) {
825  if (net_error == OK)
826    UpdateConnectionTypeHistograms(CONNECTION_ANY);
827
828  if (net_error != OK) {
829    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
830    return;
831  }
832
833  struct sockaddr_storage source_address;
834  socklen_t addrlen = sizeof(source_address);
835  int rv = getsockname(
836      socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
837  if (rv != 0) {
838    LOG(ERROR) << "getsockname() [rv: " << rv
839               << "] error: " << WSAGetLastError();
840    NOTREACHED();
841    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
842    return;
843  }
844
845  net_log_.EndEvent(
846      NetLog::TYPE_TCP_CONNECT,
847      CreateNetLogSourceAddressCallback(
848          reinterpret_cast<const struct sockaddr*>(&source_address),
849          sizeof(source_address)));
850}
851
852int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
853                         const CompletionCallback& callback) {
854  if (!core_->non_blocking_reads_initialized_) {
855    WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
856                   FD_READ | FD_CLOSE);
857    core_->non_blocking_reads_initialized_ = true;
858  }
859  int rv = recv(socket_, buf->data(), buf_len, 0);
860  if (rv == SOCKET_ERROR) {
861    int os_error = WSAGetLastError();
862    if (os_error != WSAEWOULDBLOCK) {
863      int net_error = MapSystemError(os_error);
864      net_log_.AddEvent(
865          NetLog::TYPE_SOCKET_READ_ERROR,
866          CreateNetLogSocketErrorCallback(net_error, os_error));
867      return net_error;
868    }
869  } else {
870    base::StatsCounter read_bytes("tcp.read_bytes");
871    if (rv > 0)
872      read_bytes.Add(rv);
873    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
874                                  buf->data());
875    return rv;
876  }
877
878  waiting_read_ = true;
879  read_callback_ = callback;
880  core_->read_iobuffer_ = buf;
881  core_->read_buffer_length_ = buf_len;
882  core_->WatchForRead();
883  return ERR_IO_PENDING;
884}
885
886void TCPSocketWin::DidCompleteConnect() {
887  DCHECK(waiting_connect_);
888  DCHECK(!read_callback_.is_null());
889  int result;
890
891  WSANETWORKEVENTS events;
892  int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
893                                &events);
894  int os_error = 0;
895  if (rv == SOCKET_ERROR) {
896    NOTREACHED();
897    os_error = WSAGetLastError();
898    result = MapSystemError(os_error);
899  } else if (events.lNetworkEvents & FD_CONNECT) {
900    os_error = events.iErrorCode[FD_CONNECT_BIT];
901    result = MapConnectError(os_error);
902  } else {
903    NOTREACHED();
904    result = ERR_UNEXPECTED;
905  }
906
907  connect_os_error_ = os_error;
908  DoConnectComplete(result);
909  waiting_connect_ = false;
910
911  DCHECK_NE(result, ERR_IO_PENDING);
912  base::ResetAndReturn(&read_callback_).Run(result);
913}
914
915void TCPSocketWin::DidCompleteWrite() {
916  DCHECK(waiting_write_);
917  DCHECK(!write_callback_.is_null());
918
919  DWORD num_bytes, flags;
920  BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
921                                   &num_bytes, FALSE, &flags);
922  WSAResetEvent(core_->write_overlapped_.hEvent);
923  waiting_write_ = false;
924  int rv;
925  if (!ok) {
926    int os_error = WSAGetLastError();
927    rv = MapSystemError(os_error);
928    net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
929                      CreateNetLogSocketErrorCallback(rv, os_error));
930  } else {
931    rv = static_cast<int>(num_bytes);
932    if (rv > core_->write_buffer_length_ || rv < 0) {
933      // It seems that some winsock interceptors report that more was written
934      // than was available. Treat this as an error.  http://crbug.com/27870
935      LOG(ERROR) << "Detected broken LSP: Asked to write "
936                 << core_->write_buffer_length_ << " bytes, but " << rv
937                 << " bytes reported.";
938      rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
939    } else {
940      base::StatsCounter write_bytes("tcp.write_bytes");
941      write_bytes.Add(num_bytes);
942      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
943                                    core_->write_iobuffer_->data());
944    }
945  }
946
947  core_->write_iobuffer_ = NULL;
948
949  DCHECK_NE(rv, ERR_IO_PENDING);
950  base::ResetAndReturn(&write_callback_).Run(rv);
951}
952
953void TCPSocketWin::DidSignalRead() {
954  DCHECK(waiting_read_);
955  DCHECK(!read_callback_.is_null());
956
957  int os_error = 0;
958  WSANETWORKEVENTS network_events;
959  int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
960                                &network_events);
961  if (rv == SOCKET_ERROR) {
962    os_error = WSAGetLastError();
963    rv = MapSystemError(os_error);
964  } else if (network_events.lNetworkEvents) {
965    DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
966    // If network_events.lNetworkEvents is FD_CLOSE and
967    // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
968    // connection closure. It is tempting to directly set rv to 0 in
969    // this case, but the MSDN pages for WSAEventSelect and
970    // WSAAsyncSelect recommend we still call DoRead():
971    //   FD_CLOSE should only be posted after all data is read from a
972    //   socket, but an application should check for remaining data upon
973    //   receipt of FD_CLOSE to avoid any possibility of losing data.
974    //
975    // If network_events.iErrorCode[FD_READ_BIT] or
976    // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
977    // DoRead() because recv() reports a more accurate error code
978    // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
979    // reset.
980    rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
981                read_callback_);
982    if (rv == ERR_IO_PENDING)
983      return;
984  } else {
985    // This may happen because Read() may succeed synchronously and
986    // consume all the received data without resetting the event object.
987    core_->WatchForRead();
988    return;
989  }
990
991  waiting_read_ = false;
992  core_->read_iobuffer_ = NULL;
993  core_->read_buffer_length_ = 0;
994
995  DCHECK_NE(rv, ERR_IO_PENDING);
996  base::ResetAndReturn(&read_callback_).Run(rv);
997}
998
999}  // namespace net
1000
1001