1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "net/socket/tcp_socket.h"
6#include "net/socket/tcp_socket_win.h"
7
8#include <mstcpip.h>
9
10#include "base/callback_helpers.h"
11#include "base/logging.h"
12#include "base/metrics/stats_counters.h"
13#include "base/win/windows_version.h"
14#include "net/base/address_list.h"
15#include "net/base/connection_type_histograms.h"
16#include "net/base/io_buffer.h"
17#include "net/base/ip_endpoint.h"
18#include "net/base/net_errors.h"
19#include "net/base/net_util.h"
20#include "net/base/network_change_notifier.h"
21#include "net/base/winsock_init.h"
22#include "net/base/winsock_util.h"
23#include "net/socket/socket_descriptor.h"
24#include "net/socket/socket_net_log_params.h"
25
26namespace net {
27
28namespace {
29
30const int kTCPKeepAliveSeconds = 45;
31
32int SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
33  int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
34                      reinterpret_cast<const char*>(&size), sizeof(size));
35  int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
36  DCHECK(!rv) << "Could not set socket receive buffer size: " << net_error;
37  return net_error;
38}
39
40int SetSocketSendBufferSize(SOCKET socket, int32 size) {
41  int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
42                      reinterpret_cast<const char*>(&size), sizeof(size));
43  int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
44  DCHECK(!rv) << "Could not set socket send buffer size: " << net_error;
45  return net_error;
46}
47
48// Disable Nagle.
49// The Nagle implementation on windows is governed by RFC 896.  The idea
50// behind Nagle is to reduce small packets on the network.  When Nagle is
51// enabled, if a partial packet has been sent, the TCP stack will disallow
52// further *partial* packets until an ACK has been received from the other
53// side.  Good applications should always strive to send as much data as
54// possible and avoid partial-packet sends.  However, in most real world
55// applications, there are edge cases where this does not happen, and two
56// partial packets may be sent back to back.  For a browser, it is NEVER
57// a benefit to delay for an RTT before the second packet is sent.
58//
59// As a practical example in Chromium today, consider the case of a small
60// POST.  I have verified this:
61//     Client writes 649 bytes of header  (partial packet #1)
62//     Client writes 50 bytes of POST data (partial packet #2)
63// In the above example, with Nagle, a RTT delay is inserted between these
64// two sends due to nagle.  RTTs can easily be 100ms or more.  The best
65// fix is to make sure that for POSTing data, we write as much data as
66// possible and minimize partial packets.  We will fix that.  But disabling
67// Nagle also ensure we don't run into this delay in other edge cases.
68// See also:
69//    http://technet.microsoft.com/en-us/library/bb726981.aspx
70bool DisableNagle(SOCKET socket, bool disable) {
71  BOOL val = disable ? TRUE : FALSE;
72  int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
73                      reinterpret_cast<const char*>(&val),
74                      sizeof(val));
75  DCHECK(!rv) << "Could not disable nagle";
76  return rv == 0;
77}
78
79// Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
80// connections. See http://crbug.com/27400 for details.
81bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
82  int delay = delay_secs * 1000;
83  struct tcp_keepalive keepalive_vals = {
84    enable ? 1 : 0,  // TCP keep-alive on.
85    delay,  // Delay seconds before sending first TCP keep-alive packet.
86    delay,  // Delay seconds between sending TCP keep-alive packets.
87  };
88  DWORD bytes_returned = 0xABAB;
89  int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
90                    sizeof(keepalive_vals), NULL, 0,
91                    &bytes_returned, NULL, NULL);
92  DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
93              << " [error: " << WSAGetLastError() << "].";
94
95  // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
96  return rv == 0;
97}
98
99int MapConnectError(int os_error) {
100  switch (os_error) {
101    // connect fails with WSAEACCES when Windows Firewall blocks the
102    // connection.
103    case WSAEACCES:
104      return ERR_NETWORK_ACCESS_DENIED;
105    case WSAETIMEDOUT:
106      return ERR_CONNECTION_TIMED_OUT;
107    default: {
108      int net_error = MapSystemError(os_error);
109      if (net_error == ERR_FAILED)
110        return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
111
112      // Give a more specific error when the user is offline.
113      if (net_error == ERR_ADDRESS_UNREACHABLE &&
114          NetworkChangeNotifier::IsOffline()) {
115        return ERR_INTERNET_DISCONNECTED;
116      }
117
118      return net_error;
119    }
120  }
121}
122
123}  // namespace
124
125//-----------------------------------------------------------------------------
126
127// Nothing to do for Windows since it doesn't support TCP FastOpen.
128// TODO(jri): Remove these along with the corresponding global variables.
129bool IsTCPFastOpenSupported() { return false; }
130bool IsTCPFastOpenUserEnabled() { return false; }
131void CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled) {}
132
133// This class encapsulates all the state that has to be preserved as long as
134// there is a network IO operation in progress. If the owner TCPSocketWin is
135// destroyed while an operation is in progress, the Core is detached and it
136// lives until the operation completes and the OS doesn't reference any resource
137// declared on this class anymore.
138class TCPSocketWin::Core : public base::RefCounted<Core> {
139 public:
140  explicit Core(TCPSocketWin* socket);
141
142  // Start watching for the end of a read or write operation.
143  void WatchForRead();
144  void WatchForWrite();
145
146  // The TCPSocketWin is going away.
147  void Detach() { socket_ = NULL; }
148
149  // The separate OVERLAPPED variables for asynchronous operation.
150  // |read_overlapped_| is used for both Connect() and Read().
151  // |write_overlapped_| is only used for Write();
152  OVERLAPPED read_overlapped_;
153  OVERLAPPED write_overlapped_;
154
155  // The buffers used in Read() and Write().
156  scoped_refptr<IOBuffer> read_iobuffer_;
157  scoped_refptr<IOBuffer> write_iobuffer_;
158  int read_buffer_length_;
159  int write_buffer_length_;
160
161  bool non_blocking_reads_initialized_;
162
163 private:
164  friend class base::RefCounted<Core>;
165
166  class ReadDelegate : public base::win::ObjectWatcher::Delegate {
167   public:
168    explicit ReadDelegate(Core* core) : core_(core) {}
169    virtual ~ReadDelegate() {}
170
171    // base::ObjectWatcher::Delegate methods:
172    virtual void OnObjectSignaled(HANDLE object);
173
174   private:
175    Core* const core_;
176  };
177
178  class WriteDelegate : public base::win::ObjectWatcher::Delegate {
179   public:
180    explicit WriteDelegate(Core* core) : core_(core) {}
181    virtual ~WriteDelegate() {}
182
183    // base::ObjectWatcher::Delegate methods:
184    virtual void OnObjectSignaled(HANDLE object);
185
186   private:
187    Core* const core_;
188  };
189
190  ~Core();
191
192  // The socket that created this object.
193  TCPSocketWin* socket_;
194
195  // |reader_| handles the signals from |read_watcher_|.
196  ReadDelegate reader_;
197  // |writer_| handles the signals from |write_watcher_|.
198  WriteDelegate writer_;
199
200  // |read_watcher_| watches for events from Connect() and Read().
201  base::win::ObjectWatcher read_watcher_;
202  // |write_watcher_| watches for events from Write();
203  base::win::ObjectWatcher write_watcher_;
204
205  DISALLOW_COPY_AND_ASSIGN(Core);
206};
207
208TCPSocketWin::Core::Core(TCPSocketWin* socket)
209    : read_buffer_length_(0),
210      write_buffer_length_(0),
211      non_blocking_reads_initialized_(false),
212      socket_(socket),
213      reader_(this),
214      writer_(this) {
215  memset(&read_overlapped_, 0, sizeof(read_overlapped_));
216  memset(&write_overlapped_, 0, sizeof(write_overlapped_));
217
218  read_overlapped_.hEvent = WSACreateEvent();
219  write_overlapped_.hEvent = WSACreateEvent();
220}
221
222TCPSocketWin::Core::~Core() {
223  // Make sure the message loop is not watching this object anymore.
224  read_watcher_.StopWatching();
225  write_watcher_.StopWatching();
226
227  WSACloseEvent(read_overlapped_.hEvent);
228  memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
229  WSACloseEvent(write_overlapped_.hEvent);
230  memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
231}
232
233void TCPSocketWin::Core::WatchForRead() {
234  // We grab an extra reference because there is an IO operation in progress.
235  // Balanced in ReadDelegate::OnObjectSignaled().
236  AddRef();
237  read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
238}
239
240void TCPSocketWin::Core::WatchForWrite() {
241  // We grab an extra reference because there is an IO operation in progress.
242  // Balanced in WriteDelegate::OnObjectSignaled().
243  AddRef();
244  write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
245}
246
247void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
248  DCHECK_EQ(object, core_->read_overlapped_.hEvent);
249  if (core_->socket_) {
250    if (core_->socket_->waiting_connect_)
251      core_->socket_->DidCompleteConnect();
252    else
253      core_->socket_->DidSignalRead();
254  }
255
256  core_->Release();
257}
258
259void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
260    HANDLE object) {
261  DCHECK_EQ(object, core_->write_overlapped_.hEvent);
262  if (core_->socket_)
263    core_->socket_->DidCompleteWrite();
264
265  core_->Release();
266}
267
268//-----------------------------------------------------------------------------
269
270TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
271                           const net::NetLog::Source& source)
272    : socket_(INVALID_SOCKET),
273      accept_event_(WSA_INVALID_EVENT),
274      accept_socket_(NULL),
275      accept_address_(NULL),
276      waiting_connect_(false),
277      waiting_read_(false),
278      waiting_write_(false),
279      connect_os_error_(0),
280      logging_multiple_connect_attempts_(false),
281      net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
282  net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
283                      source.ToEventParametersCallback());
284  EnsureWinsockInit();
285}
286
287TCPSocketWin::~TCPSocketWin() {
288  Close();
289  net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
290}
291
292int TCPSocketWin::Open(AddressFamily family) {
293  DCHECK(CalledOnValidThread());
294  DCHECK_EQ(socket_, INVALID_SOCKET);
295
296  socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
297                                 IPPROTO_TCP);
298  if (socket_ == INVALID_SOCKET) {
299    PLOG(ERROR) << "CreatePlatformSocket() returned an error";
300    return MapSystemError(WSAGetLastError());
301  }
302
303  if (SetNonBlocking(socket_)) {
304    int result = MapSystemError(WSAGetLastError());
305    Close();
306    return result;
307  }
308
309  return OK;
310}
311
312int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
313                                       const IPEndPoint& peer_address) {
314  DCHECK(CalledOnValidThread());
315  DCHECK_EQ(socket_, INVALID_SOCKET);
316  DCHECK(!core_);
317
318  socket_ = socket;
319
320  if (SetNonBlocking(socket_)) {
321    int result = MapSystemError(WSAGetLastError());
322    Close();
323    return result;
324  }
325
326  core_ = new Core(this);
327  peer_address_.reset(new IPEndPoint(peer_address));
328
329  return OK;
330}
331
332int TCPSocketWin::AdoptListenSocket(SOCKET socket) {
333  DCHECK(CalledOnValidThread());
334  DCHECK_EQ(socket_, INVALID_SOCKET);
335
336  socket_ = socket;
337
338  if (SetNonBlocking(socket_)) {
339    int result = MapSystemError(WSAGetLastError());
340    Close();
341    return result;
342  }
343
344  // |core_| is not needed for sockets that are used to accept connections.
345  // The operation here is more like Open but with an existing socket.
346
347  return OK;
348}
349
350int TCPSocketWin::Bind(const IPEndPoint& address) {
351  DCHECK(CalledOnValidThread());
352  DCHECK_NE(socket_, INVALID_SOCKET);
353
354  SockaddrStorage storage;
355  if (!address.ToSockAddr(storage.addr, &storage.addr_len))
356    return ERR_ADDRESS_INVALID;
357
358  int result = bind(socket_, storage.addr, storage.addr_len);
359  if (result < 0) {
360    PLOG(ERROR) << "bind() returned an error";
361    return MapSystemError(WSAGetLastError());
362  }
363
364  return OK;
365}
366
367int TCPSocketWin::Listen(int backlog) {
368  DCHECK(CalledOnValidThread());
369  DCHECK_GT(backlog, 0);
370  DCHECK_NE(socket_, INVALID_SOCKET);
371  DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
372
373  accept_event_ = WSACreateEvent();
374  if (accept_event_ == WSA_INVALID_EVENT) {
375    PLOG(ERROR) << "WSACreateEvent()";
376    return MapSystemError(WSAGetLastError());
377  }
378
379  int result = listen(socket_, backlog);
380  if (result < 0) {
381    PLOG(ERROR) << "listen() returned an error";
382    return MapSystemError(WSAGetLastError());
383  }
384
385  return OK;
386}
387
388int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
389                         IPEndPoint* address,
390                         const CompletionCallback& callback) {
391  DCHECK(CalledOnValidThread());
392  DCHECK(socket);
393  DCHECK(address);
394  DCHECK(!callback.is_null());
395  DCHECK(accept_callback_.is_null());
396
397  net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
398
399  int result = AcceptInternal(socket, address);
400
401  if (result == ERR_IO_PENDING) {
402    // Start watching.
403    WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
404    accept_watcher_.StartWatching(accept_event_, this);
405
406    accept_socket_ = socket;
407    accept_address_ = address;
408    accept_callback_ = callback;
409  }
410
411  return result;
412}
413
414int TCPSocketWin::Connect(const IPEndPoint& address,
415                          const CompletionCallback& callback) {
416  DCHECK(CalledOnValidThread());
417  DCHECK_NE(socket_, INVALID_SOCKET);
418  DCHECK(!waiting_connect_);
419
420  // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
421  // Unless Close() is called to reset the internal state, a second call to
422  // Connect() is not allowed.
423  // Please note that we enforce this even if the previous Connect() has
424  // completed and failed. Although it is allowed to connect the same |socket_|
425  // again after a connection attempt failed on Windows, it results in
426  // unspecified behavior according to POSIX. Therefore, we make it behave in
427  // the same way as TCPSocketLibevent.
428  DCHECK(!peer_address_ && !core_);
429
430  if (!logging_multiple_connect_attempts_)
431    LogConnectBegin(AddressList(address));
432
433  peer_address_.reset(new IPEndPoint(address));
434
435  int rv = DoConnect();
436  if (rv == ERR_IO_PENDING) {
437    // Synchronous operation not supported.
438    DCHECK(!callback.is_null());
439    read_callback_ = callback;
440    waiting_connect_ = true;
441  } else {
442    DoConnectComplete(rv);
443  }
444
445  return rv;
446}
447
448bool TCPSocketWin::IsConnected() const {
449  DCHECK(CalledOnValidThread());
450
451  if (socket_ == INVALID_SOCKET || waiting_connect_)
452    return false;
453
454  if (waiting_read_)
455    return true;
456
457  // Check if connection is alive.
458  char c;
459  int rv = recv(socket_, &c, 1, MSG_PEEK);
460  if (rv == 0)
461    return false;
462  if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
463    return false;
464
465  return true;
466}
467
468bool TCPSocketWin::IsConnectedAndIdle() const {
469  DCHECK(CalledOnValidThread());
470
471  if (socket_ == INVALID_SOCKET || waiting_connect_)
472    return false;
473
474  if (waiting_read_)
475    return true;
476
477  // Check if connection is alive and we haven't received any data
478  // unexpectedly.
479  char c;
480  int rv = recv(socket_, &c, 1, MSG_PEEK);
481  if (rv >= 0)
482    return false;
483  if (WSAGetLastError() != WSAEWOULDBLOCK)
484    return false;
485
486  return true;
487}
488
489int TCPSocketWin::Read(IOBuffer* buf,
490                       int buf_len,
491                       const CompletionCallback& callback) {
492  DCHECK(CalledOnValidThread());
493  DCHECK_NE(socket_, INVALID_SOCKET);
494  DCHECK(!waiting_read_);
495  DCHECK(read_callback_.is_null());
496  DCHECK(!core_->read_iobuffer_);
497
498  return DoRead(buf, buf_len, callback);
499}
500
501int TCPSocketWin::Write(IOBuffer* buf,
502                        int buf_len,
503                        const CompletionCallback& callback) {
504  DCHECK(CalledOnValidThread());
505  DCHECK_NE(socket_, INVALID_SOCKET);
506  DCHECK(!waiting_write_);
507  DCHECK(write_callback_.is_null());
508  DCHECK_GT(buf_len, 0);
509  DCHECK(!core_->write_iobuffer_);
510
511  base::StatsCounter writes("tcp.writes");
512  writes.Increment();
513
514  WSABUF write_buffer;
515  write_buffer.len = buf_len;
516  write_buffer.buf = buf->data();
517
518  // TODO(wtc): Remove the assertion after enough testing.
519  AssertEventNotSignaled(core_->write_overlapped_.hEvent);
520  DWORD num;
521  int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
522                   &core_->write_overlapped_, NULL);
523  if (rv == 0) {
524    if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
525      rv = static_cast<int>(num);
526      if (rv > buf_len || rv < 0) {
527        // It seems that some winsock interceptors report that more was written
528        // than was available. Treat this as an error.  http://crbug.com/27870
529        LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
530                   << " bytes, but " << rv << " bytes reported.";
531        return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
532      }
533      base::StatsCounter write_bytes("tcp.write_bytes");
534      write_bytes.Add(rv);
535      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
536                                    buf->data());
537      return rv;
538    }
539  } else {
540    int os_error = WSAGetLastError();
541    if (os_error != WSA_IO_PENDING) {
542      int net_error = MapSystemError(os_error);
543      net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
544                        CreateNetLogSocketErrorCallback(net_error, os_error));
545      return net_error;
546    }
547  }
548  waiting_write_ = true;
549  write_callback_ = callback;
550  core_->write_iobuffer_ = buf;
551  core_->write_buffer_length_ = buf_len;
552  core_->WatchForWrite();
553  return ERR_IO_PENDING;
554}
555
556int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
557  DCHECK(CalledOnValidThread());
558  DCHECK(address);
559
560  SockaddrStorage storage;
561  if (getsockname(socket_, storage.addr, &storage.addr_len))
562    return MapSystemError(WSAGetLastError());
563  if (!address->FromSockAddr(storage.addr, storage.addr_len))
564    return ERR_ADDRESS_INVALID;
565
566  return OK;
567}
568
569int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
570  DCHECK(CalledOnValidThread());
571  DCHECK(address);
572  if (!IsConnected())
573    return ERR_SOCKET_NOT_CONNECTED;
574  *address = *peer_address_;
575  return OK;
576}
577
578int TCPSocketWin::SetDefaultOptionsForServer() {
579  return SetExclusiveAddrUse();
580}
581
582void TCPSocketWin::SetDefaultOptionsForClient() {
583  // Increase the socket buffer sizes from the default sizes for WinXP.  In
584  // performance testing, there is substantial benefit by increasing from 8KB
585  // to 64KB.
586  // See also:
587  //    http://support.microsoft.com/kb/823764/EN-US
588  // On Vista, if we manually set these sizes, Vista turns off its receive
589  // window auto-tuning feature.
590  //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
591  // Since Vista's auto-tune is better than any static value we can could set,
592  // only change these on pre-vista machines.
593  if (base::win::GetVersion() < base::win::VERSION_VISTA) {
594    const int32 kSocketBufferSize = 64 * 1024;
595    SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
596    SetSocketSendBufferSize(socket_, kSocketBufferSize);
597  }
598
599  DisableNagle(socket_, true);
600  SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
601}
602
603int TCPSocketWin::SetExclusiveAddrUse() {
604  // On Windows, a bound end point can be hijacked by another process by
605  // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
606  // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
607  // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
608  // socket to forcibly bind to the end point until the end point is unbound.
609  // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
610  // MSDN: http://goo.gl/M6fjQ.
611  //
612  // Unlike on *nix, on Windows a TCP server socket can always bind to an end
613  // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
614  // needed here.
615  //
616  // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
617  // point in TIME_WAIT status. It does not have this effect for a TCP server
618  // socket.
619
620  BOOL true_value = 1;
621  int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
622                      reinterpret_cast<const char*>(&true_value),
623                      sizeof(true_value));
624  if (rv < 0)
625    return MapSystemError(errno);
626  return OK;
627}
628
629int TCPSocketWin::SetReceiveBufferSize(int32 size) {
630  DCHECK(CalledOnValidThread());
631  return SetSocketReceiveBufferSize(socket_, size);
632}
633
634int TCPSocketWin::SetSendBufferSize(int32 size) {
635  DCHECK(CalledOnValidThread());
636  return SetSocketSendBufferSize(socket_, size);
637}
638
639bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
640  return SetTCPKeepAlive(socket_, enable, delay);
641}
642
643bool TCPSocketWin::SetNoDelay(bool no_delay) {
644  return DisableNagle(socket_, no_delay);
645}
646
647void TCPSocketWin::Close() {
648  DCHECK(CalledOnValidThread());
649
650  if (socket_ != INVALID_SOCKET) {
651    // Only log the close event if there's actually a socket to close.
652    net_log_.AddEvent(NetLog::EventType::TYPE_SOCKET_CLOSED);
653
654    // Note: don't use CancelIo to cancel pending IO because it doesn't work
655    // when there is a Winsock layered service provider.
656
657    // In most socket implementations, closing a socket results in a graceful
658    // connection shutdown, but in Winsock we have to call shutdown explicitly.
659    // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
660    // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
661    shutdown(socket_, SD_SEND);
662
663    // This cancels any pending IO.
664    if (closesocket(socket_) < 0)
665      PLOG(ERROR) << "closesocket";
666    socket_ = INVALID_SOCKET;
667  }
668
669  if (!accept_callback_.is_null()) {
670    accept_watcher_.StopWatching();
671    accept_socket_ = NULL;
672    accept_address_ = NULL;
673    accept_callback_.Reset();
674  }
675
676  if (accept_event_) {
677    WSACloseEvent(accept_event_);
678    accept_event_ = WSA_INVALID_EVENT;
679  }
680
681  if (core_) {
682    if (waiting_connect_) {
683      // We closed the socket, so this notification will never come.
684      // From MSDN' WSAEventSelect documentation:
685      // "Closing a socket with closesocket also cancels the association and
686      // selection of network events specified in WSAEventSelect for the
687      // socket".
688      core_->Release();
689    }
690    core_->Detach();
691    core_ = NULL;
692  }
693
694  waiting_connect_ = false;
695  waiting_read_ = false;
696  waiting_write_ = false;
697
698  read_callback_.Reset();
699  write_callback_.Reset();
700  peer_address_.reset();
701  connect_os_error_ = 0;
702}
703
704void TCPSocketWin::StartLoggingMultipleConnectAttempts(
705    const AddressList& addresses) {
706  if (!logging_multiple_connect_attempts_) {
707    logging_multiple_connect_attempts_ = true;
708    LogConnectBegin(addresses);
709  } else {
710    NOTREACHED();
711  }
712}
713
714void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
715  if (logging_multiple_connect_attempts_) {
716    LogConnectEnd(net_error);
717    logging_multiple_connect_attempts_ = false;
718  } else {
719    NOTREACHED();
720  }
721}
722
723int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
724                                 IPEndPoint* address) {
725  SockaddrStorage storage;
726  int new_socket = accept(socket_, storage.addr, &storage.addr_len);
727  if (new_socket < 0) {
728    int net_error = MapSystemError(WSAGetLastError());
729    if (net_error != ERR_IO_PENDING)
730      net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
731    return net_error;
732  }
733
734  IPEndPoint ip_end_point;
735  if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
736    NOTREACHED();
737    if (closesocket(new_socket) < 0)
738      PLOG(ERROR) << "closesocket";
739    int net_error = ERR_ADDRESS_INVALID;
740    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
741    return net_error;
742  }
743  scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
744      net_log_.net_log(), net_log_.source()));
745  int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
746  if (adopt_result != OK) {
747    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
748    return adopt_result;
749  }
750  *socket = tcp_socket.Pass();
751  *address = ip_end_point;
752  net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
753                    CreateNetLogIPEndPointCallback(&ip_end_point));
754  return OK;
755}
756
757void TCPSocketWin::OnObjectSignaled(HANDLE object) {
758  WSANETWORKEVENTS ev;
759  if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
760    PLOG(ERROR) << "WSAEnumNetworkEvents()";
761    return;
762  }
763
764  if (ev.lNetworkEvents & FD_ACCEPT) {
765    int result = AcceptInternal(accept_socket_, accept_address_);
766    if (result != ERR_IO_PENDING) {
767      accept_socket_ = NULL;
768      accept_address_ = NULL;
769      base::ResetAndReturn(&accept_callback_).Run(result);
770    }
771  } else {
772    // This happens when a client opens a connection and closes it before we
773    // have a chance to accept it.
774    DCHECK(ev.lNetworkEvents == 0);
775
776    // Start watching the next FD_ACCEPT event.
777    WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
778    accept_watcher_.StartWatching(accept_event_, this);
779  }
780}
781
782int TCPSocketWin::DoConnect() {
783  DCHECK_EQ(connect_os_error_, 0);
784  DCHECK(!core_);
785
786  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
787                      CreateNetLogIPEndPointCallback(peer_address_.get()));
788
789  core_ = new Core(this);
790  // WSAEventSelect sets the socket to non-blocking mode as a side effect.
791  // Our connect() and recv() calls require that the socket be non-blocking.
792  WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
793
794  SockaddrStorage storage;
795  if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
796    return ERR_ADDRESS_INVALID;
797  if (!connect(socket_, storage.addr, storage.addr_len)) {
798    // Connected without waiting!
799    //
800    // The MSDN page for connect says:
801    //   With a nonblocking socket, the connection attempt cannot be completed
802    //   immediately. In this case, connect will return SOCKET_ERROR, and
803    //   WSAGetLastError will return WSAEWOULDBLOCK.
804    // which implies that for a nonblocking socket, connect never returns 0.
805    // It's not documented whether the event object will be signaled or not
806    // if connect does return 0.  So the code below is essentially dead code
807    // and we don't know if it's correct.
808    NOTREACHED();
809
810    if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
811      return OK;
812  } else {
813    int os_error = WSAGetLastError();
814    if (os_error != WSAEWOULDBLOCK) {
815      LOG(ERROR) << "connect failed: " << os_error;
816      connect_os_error_ = os_error;
817      int rv = MapConnectError(os_error);
818      CHECK_NE(ERR_IO_PENDING, rv);
819      return rv;
820    }
821  }
822
823  core_->WatchForRead();
824  return ERR_IO_PENDING;
825}
826
827void TCPSocketWin::DoConnectComplete(int result) {
828  // Log the end of this attempt (and any OS error it threw).
829  int os_error = connect_os_error_;
830  connect_os_error_ = 0;
831  if (result != OK) {
832    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
833                      NetLog::IntegerCallback("os_error", os_error));
834  } else {
835    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
836  }
837
838  if (!logging_multiple_connect_attempts_)
839    LogConnectEnd(result);
840}
841
842void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
843  base::StatsCounter connects("tcp.connect");
844  connects.Increment();
845
846  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
847                      addresses.CreateNetLogCallback());
848}
849
850void TCPSocketWin::LogConnectEnd(int net_error) {
851  if (net_error == OK)
852    UpdateConnectionTypeHistograms(CONNECTION_ANY);
853
854  if (net_error != OK) {
855    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
856    return;
857  }
858
859  struct sockaddr_storage source_address;
860  socklen_t addrlen = sizeof(source_address);
861  int rv = getsockname(
862      socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
863  if (rv != 0) {
864    LOG(ERROR) << "getsockname() [rv: " << rv
865               << "] error: " << WSAGetLastError();
866    NOTREACHED();
867    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
868    return;
869  }
870
871  net_log_.EndEvent(
872      NetLog::TYPE_TCP_CONNECT,
873      CreateNetLogSourceAddressCallback(
874          reinterpret_cast<const struct sockaddr*>(&source_address),
875          sizeof(source_address)));
876}
877
878int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
879                         const CompletionCallback& callback) {
880  if (!core_->non_blocking_reads_initialized_) {
881    WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
882                   FD_READ | FD_CLOSE);
883    core_->non_blocking_reads_initialized_ = true;
884  }
885  int rv = recv(socket_, buf->data(), buf_len, 0);
886  if (rv == SOCKET_ERROR) {
887    int os_error = WSAGetLastError();
888    if (os_error != WSAEWOULDBLOCK) {
889      int net_error = MapSystemError(os_error);
890      net_log_.AddEvent(
891          NetLog::TYPE_SOCKET_READ_ERROR,
892          CreateNetLogSocketErrorCallback(net_error, os_error));
893      return net_error;
894    }
895  } else {
896    base::StatsCounter read_bytes("tcp.read_bytes");
897    if (rv > 0)
898      read_bytes.Add(rv);
899    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
900                                  buf->data());
901    return rv;
902  }
903
904  waiting_read_ = true;
905  read_callback_ = callback;
906  core_->read_iobuffer_ = buf;
907  core_->read_buffer_length_ = buf_len;
908  core_->WatchForRead();
909  return ERR_IO_PENDING;
910}
911
912void TCPSocketWin::DidCompleteConnect() {
913  DCHECK(waiting_connect_);
914  DCHECK(!read_callback_.is_null());
915  int result;
916
917  WSANETWORKEVENTS events;
918  int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
919                                &events);
920  int os_error = 0;
921  if (rv == SOCKET_ERROR) {
922    NOTREACHED();
923    os_error = WSAGetLastError();
924    result = MapSystemError(os_error);
925  } else if (events.lNetworkEvents & FD_CONNECT) {
926    os_error = events.iErrorCode[FD_CONNECT_BIT];
927    result = MapConnectError(os_error);
928  } else {
929    NOTREACHED();
930    result = ERR_UNEXPECTED;
931  }
932
933  connect_os_error_ = os_error;
934  DoConnectComplete(result);
935  waiting_connect_ = false;
936
937  DCHECK_NE(result, ERR_IO_PENDING);
938  base::ResetAndReturn(&read_callback_).Run(result);
939}
940
941void TCPSocketWin::DidCompleteWrite() {
942  DCHECK(waiting_write_);
943  DCHECK(!write_callback_.is_null());
944
945  DWORD num_bytes, flags;
946  BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
947                                   &num_bytes, FALSE, &flags);
948  WSAResetEvent(core_->write_overlapped_.hEvent);
949  waiting_write_ = false;
950  int rv;
951  if (!ok) {
952    int os_error = WSAGetLastError();
953    rv = MapSystemError(os_error);
954    net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
955                      CreateNetLogSocketErrorCallback(rv, os_error));
956  } else {
957    rv = static_cast<int>(num_bytes);
958    if (rv > core_->write_buffer_length_ || rv < 0) {
959      // It seems that some winsock interceptors report that more was written
960      // than was available. Treat this as an error.  http://crbug.com/27870
961      LOG(ERROR) << "Detected broken LSP: Asked to write "
962                 << core_->write_buffer_length_ << " bytes, but " << rv
963                 << " bytes reported.";
964      rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
965    } else {
966      base::StatsCounter write_bytes("tcp.write_bytes");
967      write_bytes.Add(num_bytes);
968      net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
969                                    core_->write_iobuffer_->data());
970    }
971  }
972
973  core_->write_iobuffer_ = NULL;
974
975  DCHECK_NE(rv, ERR_IO_PENDING);
976  base::ResetAndReturn(&write_callback_).Run(rv);
977}
978
979void TCPSocketWin::DidSignalRead() {
980  DCHECK(waiting_read_);
981  DCHECK(!read_callback_.is_null());
982
983  int os_error = 0;
984  WSANETWORKEVENTS network_events;
985  int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
986                                &network_events);
987  if (rv == SOCKET_ERROR) {
988    os_error = WSAGetLastError();
989    rv = MapSystemError(os_error);
990  } else if (network_events.lNetworkEvents) {
991    DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
992    // If network_events.lNetworkEvents is FD_CLOSE and
993    // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
994    // connection closure. It is tempting to directly set rv to 0 in
995    // this case, but the MSDN pages for WSAEventSelect and
996    // WSAAsyncSelect recommend we still call DoRead():
997    //   FD_CLOSE should only be posted after all data is read from a
998    //   socket, but an application should check for remaining data upon
999    //   receipt of FD_CLOSE to avoid any possibility of losing data.
1000    //
1001    // If network_events.iErrorCode[FD_READ_BIT] or
1002    // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
1003    // DoRead() because recv() reports a more accurate error code
1004    // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
1005    // reset.
1006    rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
1007                read_callback_);
1008    if (rv == ERR_IO_PENDING)
1009      return;
1010  } else {
1011    // This may happen because Read() may succeed synchronously and
1012    // consume all the received data without resetting the event object.
1013    core_->WatchForRead();
1014    return;
1015  }
1016
1017  waiting_read_ = false;
1018  core_->read_iobuffer_ = NULL;
1019  core_->read_buffer_length_ = 0;
1020
1021  DCHECK_NE(rv, ERR_IO_PENDING);
1022  base::ResetAndReturn(&read_callback_).Run(rv);
1023}
1024
1025}  // namespace net
1026