1// Copyright 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "net/socket/tcp_socket_win.h" 6 7#include <mstcpip.h> 8 9#include "base/callback_helpers.h" 10#include "base/logging.h" 11#include "base/metrics/stats_counters.h" 12#include "base/win/windows_version.h" 13#include "net/base/address_list.h" 14#include "net/base/connection_type_histograms.h" 15#include "net/base/io_buffer.h" 16#include "net/base/ip_endpoint.h" 17#include "net/base/net_errors.h" 18#include "net/base/net_util.h" 19#include "net/base/network_change_notifier.h" 20#include "net/base/winsock_init.h" 21#include "net/base/winsock_util.h" 22#include "net/socket/socket_descriptor.h" 23#include "net/socket/socket_net_log_params.h" 24 25namespace net { 26 27namespace { 28 29const int kTCPKeepAliveSeconds = 45; 30 31bool SetSocketReceiveBufferSize(SOCKET socket, int32 size) { 32 int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF, 33 reinterpret_cast<const char*>(&size), sizeof(size)); 34 DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError(); 35 return rv == 0; 36} 37 38bool SetSocketSendBufferSize(SOCKET socket, int32 size) { 39 int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF, 40 reinterpret_cast<const char*>(&size), sizeof(size)); 41 DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError(); 42 return rv == 0; 43} 44 45// Disable Nagle. 46// The Nagle implementation on windows is governed by RFC 896. The idea 47// behind Nagle is to reduce small packets on the network. When Nagle is 48// enabled, if a partial packet has been sent, the TCP stack will disallow 49// further *partial* packets until an ACK has been received from the other 50// side. Good applications should always strive to send as much data as 51// possible and avoid partial-packet sends. However, in most real world 52// applications, there are edge cases where this does not happen, and two 53// partial packets may be sent back to back. For a browser, it is NEVER 54// a benefit to delay for an RTT before the second packet is sent. 55// 56// As a practical example in Chromium today, consider the case of a small 57// POST. I have verified this: 58// Client writes 649 bytes of header (partial packet #1) 59// Client writes 50 bytes of POST data (partial packet #2) 60// In the above example, with Nagle, a RTT delay is inserted between these 61// two sends due to nagle. RTTs can easily be 100ms or more. The best 62// fix is to make sure that for POSTing data, we write as much data as 63// possible and minimize partial packets. We will fix that. But disabling 64// Nagle also ensure we don't run into this delay in other edge cases. 65// See also: 66// http://technet.microsoft.com/en-us/library/bb726981.aspx 67bool DisableNagle(SOCKET socket, bool disable) { 68 BOOL val = disable ? TRUE : FALSE; 69 int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, 70 reinterpret_cast<const char*>(&val), 71 sizeof(val)); 72 DCHECK(!rv) << "Could not disable nagle"; 73 return rv == 0; 74} 75 76// Enable TCP Keep-Alive to prevent NAT routers from timing out TCP 77// connections. See http://crbug.com/27400 for details. 78bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) { 79 int delay = delay_secs * 1000; 80 struct tcp_keepalive keepalive_vals = { 81 enable ? 1 : 0, // TCP keep-alive on. 82 delay, // Delay seconds before sending first TCP keep-alive packet. 83 delay, // Delay seconds between sending TCP keep-alive packets. 84 }; 85 DWORD bytes_returned = 0xABAB; 86 int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals, 87 sizeof(keepalive_vals), NULL, 0, 88 &bytes_returned, NULL, NULL); 89 DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket 90 << " [error: " << WSAGetLastError() << "]."; 91 92 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive. 93 return rv == 0; 94} 95 96int MapConnectError(int os_error) { 97 switch (os_error) { 98 // connect fails with WSAEACCES when Windows Firewall blocks the 99 // connection. 100 case WSAEACCES: 101 return ERR_NETWORK_ACCESS_DENIED; 102 case WSAETIMEDOUT: 103 return ERR_CONNECTION_TIMED_OUT; 104 default: { 105 int net_error = MapSystemError(os_error); 106 if (net_error == ERR_FAILED) 107 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED. 108 109 // Give a more specific error when the user is offline. 110 if (net_error == ERR_ADDRESS_UNREACHABLE && 111 NetworkChangeNotifier::IsOffline()) { 112 return ERR_INTERNET_DISCONNECTED; 113 } 114 115 return net_error; 116 } 117 } 118} 119 120} // namespace 121 122//----------------------------------------------------------------------------- 123 124// This class encapsulates all the state that has to be preserved as long as 125// there is a network IO operation in progress. If the owner TCPSocketWin is 126// destroyed while an operation is in progress, the Core is detached and it 127// lives until the operation completes and the OS doesn't reference any resource 128// declared on this class anymore. 129class TCPSocketWin::Core : public base::RefCounted<Core> { 130 public: 131 explicit Core(TCPSocketWin* socket); 132 133 // Start watching for the end of a read or write operation. 134 void WatchForRead(); 135 void WatchForWrite(); 136 137 // The TCPSocketWin is going away. 138 void Detach() { socket_ = NULL; } 139 140 // The separate OVERLAPPED variables for asynchronous operation. 141 // |read_overlapped_| is used for both Connect() and Read(). 142 // |write_overlapped_| is only used for Write(); 143 OVERLAPPED read_overlapped_; 144 OVERLAPPED write_overlapped_; 145 146 // The buffers used in Read() and Write(). 147 scoped_refptr<IOBuffer> read_iobuffer_; 148 scoped_refptr<IOBuffer> write_iobuffer_; 149 int read_buffer_length_; 150 int write_buffer_length_; 151 152 bool non_blocking_reads_initialized_; 153 154 private: 155 friend class base::RefCounted<Core>; 156 157 class ReadDelegate : public base::win::ObjectWatcher::Delegate { 158 public: 159 explicit ReadDelegate(Core* core) : core_(core) {} 160 virtual ~ReadDelegate() {} 161 162 // base::ObjectWatcher::Delegate methods: 163 virtual void OnObjectSignaled(HANDLE object); 164 165 private: 166 Core* const core_; 167 }; 168 169 class WriteDelegate : public base::win::ObjectWatcher::Delegate { 170 public: 171 explicit WriteDelegate(Core* core) : core_(core) {} 172 virtual ~WriteDelegate() {} 173 174 // base::ObjectWatcher::Delegate methods: 175 virtual void OnObjectSignaled(HANDLE object); 176 177 private: 178 Core* const core_; 179 }; 180 181 ~Core(); 182 183 // The socket that created this object. 184 TCPSocketWin* socket_; 185 186 // |reader_| handles the signals from |read_watcher_|. 187 ReadDelegate reader_; 188 // |writer_| handles the signals from |write_watcher_|. 189 WriteDelegate writer_; 190 191 // |read_watcher_| watches for events from Connect() and Read(). 192 base::win::ObjectWatcher read_watcher_; 193 // |write_watcher_| watches for events from Write(); 194 base::win::ObjectWatcher write_watcher_; 195 196 DISALLOW_COPY_AND_ASSIGN(Core); 197}; 198 199TCPSocketWin::Core::Core(TCPSocketWin* socket) 200 : read_buffer_length_(0), 201 write_buffer_length_(0), 202 non_blocking_reads_initialized_(false), 203 socket_(socket), 204 reader_(this), 205 writer_(this) { 206 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 207 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 208 209 read_overlapped_.hEvent = WSACreateEvent(); 210 write_overlapped_.hEvent = WSACreateEvent(); 211} 212 213TCPSocketWin::Core::~Core() { 214 // Make sure the message loop is not watching this object anymore. 215 read_watcher_.StopWatching(); 216 write_watcher_.StopWatching(); 217 218 WSACloseEvent(read_overlapped_.hEvent); 219 memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_)); 220 WSACloseEvent(write_overlapped_.hEvent); 221 memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_)); 222} 223 224void TCPSocketWin::Core::WatchForRead() { 225 // We grab an extra reference because there is an IO operation in progress. 226 // Balanced in ReadDelegate::OnObjectSignaled(). 227 AddRef(); 228 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_); 229} 230 231void TCPSocketWin::Core::WatchForWrite() { 232 // We grab an extra reference because there is an IO operation in progress. 233 // Balanced in WriteDelegate::OnObjectSignaled(). 234 AddRef(); 235 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_); 236} 237 238void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) { 239 DCHECK_EQ(object, core_->read_overlapped_.hEvent); 240 if (core_->socket_) { 241 if (core_->socket_->waiting_connect_) 242 core_->socket_->DidCompleteConnect(); 243 else 244 core_->socket_->DidSignalRead(); 245 } 246 247 core_->Release(); 248} 249 250void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled( 251 HANDLE object) { 252 DCHECK_EQ(object, core_->write_overlapped_.hEvent); 253 if (core_->socket_) 254 core_->socket_->DidCompleteWrite(); 255 256 core_->Release(); 257} 258 259//----------------------------------------------------------------------------- 260 261TCPSocketWin::TCPSocketWin(net::NetLog* net_log, 262 const net::NetLog::Source& source) 263 : socket_(INVALID_SOCKET), 264 accept_event_(WSA_INVALID_EVENT), 265 accept_socket_(NULL), 266 accept_address_(NULL), 267 waiting_connect_(false), 268 waiting_read_(false), 269 waiting_write_(false), 270 connect_os_error_(0), 271 logging_multiple_connect_attempts_(false), 272 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) { 273 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, 274 source.ToEventParametersCallback()); 275 EnsureWinsockInit(); 276} 277 278TCPSocketWin::~TCPSocketWin() { 279 Close(); 280 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE); 281} 282 283int TCPSocketWin::Open(AddressFamily family) { 284 DCHECK(CalledOnValidThread()); 285 DCHECK_EQ(socket_, INVALID_SOCKET); 286 287 socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM, 288 IPPROTO_TCP); 289 if (socket_ == INVALID_SOCKET) { 290 PLOG(ERROR) << "CreatePlatformSocket() returned an error"; 291 return MapSystemError(WSAGetLastError()); 292 } 293 294 if (SetNonBlocking(socket_)) { 295 int result = MapSystemError(WSAGetLastError()); 296 Close(); 297 return result; 298 } 299 300 return OK; 301} 302 303int TCPSocketWin::AdoptConnectedSocket(SOCKET socket, 304 const IPEndPoint& peer_address) { 305 DCHECK(CalledOnValidThread()); 306 DCHECK_EQ(socket_, INVALID_SOCKET); 307 DCHECK(!core_); 308 309 socket_ = socket; 310 311 if (SetNonBlocking(socket_)) { 312 int result = MapSystemError(WSAGetLastError()); 313 Close(); 314 return result; 315 } 316 317 core_ = new Core(this); 318 peer_address_.reset(new IPEndPoint(peer_address)); 319 320 return OK; 321} 322 323int TCPSocketWin::Bind(const IPEndPoint& address) { 324 DCHECK(CalledOnValidThread()); 325 DCHECK_NE(socket_, INVALID_SOCKET); 326 327 SockaddrStorage storage; 328 if (!address.ToSockAddr(storage.addr, &storage.addr_len)) 329 return ERR_ADDRESS_INVALID; 330 331 int result = bind(socket_, storage.addr, storage.addr_len); 332 if (result < 0) { 333 PLOG(ERROR) << "bind() returned an error"; 334 return MapSystemError(WSAGetLastError()); 335 } 336 337 return OK; 338} 339 340int TCPSocketWin::Listen(int backlog) { 341 DCHECK(CalledOnValidThread()); 342 DCHECK_GT(backlog, 0); 343 DCHECK_NE(socket_, INVALID_SOCKET); 344 DCHECK_EQ(accept_event_, WSA_INVALID_EVENT); 345 346 accept_event_ = WSACreateEvent(); 347 if (accept_event_ == WSA_INVALID_EVENT) { 348 PLOG(ERROR) << "WSACreateEvent()"; 349 return MapSystemError(WSAGetLastError()); 350 } 351 352 int result = listen(socket_, backlog); 353 if (result < 0) { 354 PLOG(ERROR) << "listen() returned an error"; 355 return MapSystemError(WSAGetLastError()); 356 } 357 358 return OK; 359} 360 361int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket, 362 IPEndPoint* address, 363 const CompletionCallback& callback) { 364 DCHECK(CalledOnValidThread()); 365 DCHECK(socket); 366 DCHECK(address); 367 DCHECK(!callback.is_null()); 368 DCHECK(accept_callback_.is_null()); 369 370 net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT); 371 372 int result = AcceptInternal(socket, address); 373 374 if (result == ERR_IO_PENDING) { 375 // Start watching. 376 WSAEventSelect(socket_, accept_event_, FD_ACCEPT); 377 accept_watcher_.StartWatching(accept_event_, this); 378 379 accept_socket_ = socket; 380 accept_address_ = address; 381 accept_callback_ = callback; 382 } 383 384 return result; 385} 386 387int TCPSocketWin::Connect(const IPEndPoint& address, 388 const CompletionCallback& callback) { 389 DCHECK(CalledOnValidThread()); 390 DCHECK_NE(socket_, INVALID_SOCKET); 391 DCHECK(!waiting_connect_); 392 393 // |peer_address_| and |core_| will be non-NULL if Connect() has been called. 394 // Unless Close() is called to reset the internal state, a second call to 395 // Connect() is not allowed. 396 // Please note that we enforce this even if the previous Connect() has 397 // completed and failed. Although it is allowed to connect the same |socket_| 398 // again after a connection attempt failed on Windows, it results in 399 // unspecified behavior according to POSIX. Therefore, we make it behave in 400 // the same way as TCPSocketLibevent. 401 DCHECK(!peer_address_ && !core_); 402 403 if (!logging_multiple_connect_attempts_) 404 LogConnectBegin(AddressList(address)); 405 406 peer_address_.reset(new IPEndPoint(address)); 407 408 int rv = DoConnect(); 409 if (rv == ERR_IO_PENDING) { 410 // Synchronous operation not supported. 411 DCHECK(!callback.is_null()); 412 read_callback_ = callback; 413 waiting_connect_ = true; 414 } else { 415 DoConnectComplete(rv); 416 } 417 418 return rv; 419} 420 421bool TCPSocketWin::IsConnected() const { 422 DCHECK(CalledOnValidThread()); 423 424 if (socket_ == INVALID_SOCKET || waiting_connect_) 425 return false; 426 427 if (waiting_read_) 428 return true; 429 430 // Check if connection is alive. 431 char c; 432 int rv = recv(socket_, &c, 1, MSG_PEEK); 433 if (rv == 0) 434 return false; 435 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) 436 return false; 437 438 return true; 439} 440 441bool TCPSocketWin::IsConnectedAndIdle() const { 442 DCHECK(CalledOnValidThread()); 443 444 if (socket_ == INVALID_SOCKET || waiting_connect_) 445 return false; 446 447 if (waiting_read_) 448 return true; 449 450 // Check if connection is alive and we haven't received any data 451 // unexpectedly. 452 char c; 453 int rv = recv(socket_, &c, 1, MSG_PEEK); 454 if (rv >= 0) 455 return false; 456 if (WSAGetLastError() != WSAEWOULDBLOCK) 457 return false; 458 459 return true; 460} 461 462int TCPSocketWin::Read(IOBuffer* buf, 463 int buf_len, 464 const CompletionCallback& callback) { 465 DCHECK(CalledOnValidThread()); 466 DCHECK_NE(socket_, INVALID_SOCKET); 467 DCHECK(!waiting_read_); 468 DCHECK(read_callback_.is_null()); 469 DCHECK(!core_->read_iobuffer_); 470 471 return DoRead(buf, buf_len, callback); 472} 473 474int TCPSocketWin::Write(IOBuffer* buf, 475 int buf_len, 476 const CompletionCallback& callback) { 477 DCHECK(CalledOnValidThread()); 478 DCHECK_NE(socket_, INVALID_SOCKET); 479 DCHECK(!waiting_write_); 480 DCHECK(write_callback_.is_null()); 481 DCHECK_GT(buf_len, 0); 482 DCHECK(!core_->write_iobuffer_); 483 484 base::StatsCounter writes("tcp.writes"); 485 writes.Increment(); 486 487 WSABUF write_buffer; 488 write_buffer.len = buf_len; 489 write_buffer.buf = buf->data(); 490 491 // TODO(wtc): Remove the assertion after enough testing. 492 AssertEventNotSignaled(core_->write_overlapped_.hEvent); 493 DWORD num; 494 int rv = WSASend(socket_, &write_buffer, 1, &num, 0, 495 &core_->write_overlapped_, NULL); 496 if (rv == 0) { 497 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) { 498 rv = static_cast<int>(num); 499 if (rv > buf_len || rv < 0) { 500 // It seems that some winsock interceptors report that more was written 501 // than was available. Treat this as an error. http://crbug.com/27870 502 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len 503 << " bytes, but " << rv << " bytes reported."; 504 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 505 } 506 base::StatsCounter write_bytes("tcp.write_bytes"); 507 write_bytes.Add(rv); 508 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv, 509 buf->data()); 510 return rv; 511 } 512 } else { 513 int os_error = WSAGetLastError(); 514 if (os_error != WSA_IO_PENDING) { 515 int net_error = MapSystemError(os_error); 516 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR, 517 CreateNetLogSocketErrorCallback(net_error, os_error)); 518 return net_error; 519 } 520 } 521 waiting_write_ = true; 522 write_callback_ = callback; 523 core_->write_iobuffer_ = buf; 524 core_->write_buffer_length_ = buf_len; 525 core_->WatchForWrite(); 526 return ERR_IO_PENDING; 527} 528 529int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const { 530 DCHECK(CalledOnValidThread()); 531 DCHECK(address); 532 533 SockaddrStorage storage; 534 if (getsockname(socket_, storage.addr, &storage.addr_len)) 535 return MapSystemError(WSAGetLastError()); 536 if (!address->FromSockAddr(storage.addr, storage.addr_len)) 537 return ERR_ADDRESS_INVALID; 538 539 return OK; 540} 541 542int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const { 543 DCHECK(CalledOnValidThread()); 544 DCHECK(address); 545 if (!IsConnected()) 546 return ERR_SOCKET_NOT_CONNECTED; 547 *address = *peer_address_; 548 return OK; 549} 550 551int TCPSocketWin::SetDefaultOptionsForServer() { 552 return SetExclusiveAddrUse(); 553} 554 555void TCPSocketWin::SetDefaultOptionsForClient() { 556 // Increase the socket buffer sizes from the default sizes for WinXP. In 557 // performance testing, there is substantial benefit by increasing from 8KB 558 // to 64KB. 559 // See also: 560 // http://support.microsoft.com/kb/823764/EN-US 561 // On Vista, if we manually set these sizes, Vista turns off its receive 562 // window auto-tuning feature. 563 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx 564 // Since Vista's auto-tune is better than any static value we can could set, 565 // only change these on pre-vista machines. 566 if (base::win::GetVersion() < base::win::VERSION_VISTA) { 567 const int32 kSocketBufferSize = 64 * 1024; 568 SetSocketReceiveBufferSize(socket_, kSocketBufferSize); 569 SetSocketSendBufferSize(socket_, kSocketBufferSize); 570 } 571 572 DisableNagle(socket_, true); 573 SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds); 574} 575 576int TCPSocketWin::SetExclusiveAddrUse() { 577 // On Windows, a bound end point can be hijacked by another process by 578 // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE 579 // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the 580 // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another 581 // socket to forcibly bind to the end point until the end point is unbound. 582 // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE. 583 // MSDN: http://goo.gl/M6fjQ. 584 // 585 // Unlike on *nix, on Windows a TCP server socket can always bind to an end 586 // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not 587 // needed here. 588 // 589 // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end 590 // point in TIME_WAIT status. It does not have this effect for a TCP server 591 // socket. 592 593 BOOL true_value = 1; 594 int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, 595 reinterpret_cast<const char*>(&true_value), 596 sizeof(true_value)); 597 if (rv < 0) 598 return MapSystemError(errno); 599 return OK; 600} 601 602bool TCPSocketWin::SetReceiveBufferSize(int32 size) { 603 DCHECK(CalledOnValidThread()); 604 return SetSocketReceiveBufferSize(socket_, size); 605} 606 607bool TCPSocketWin::SetSendBufferSize(int32 size) { 608 DCHECK(CalledOnValidThread()); 609 return SetSocketSendBufferSize(socket_, size); 610} 611 612bool TCPSocketWin::SetKeepAlive(bool enable, int delay) { 613 return SetTCPKeepAlive(socket_, enable, delay); 614} 615 616bool TCPSocketWin::SetNoDelay(bool no_delay) { 617 return DisableNagle(socket_, no_delay); 618} 619 620void TCPSocketWin::Close() { 621 DCHECK(CalledOnValidThread()); 622 623 if (socket_ != INVALID_SOCKET) { 624 // Note: don't use CancelIo to cancel pending IO because it doesn't work 625 // when there is a Winsock layered service provider. 626 627 // In most socket implementations, closing a socket results in a graceful 628 // connection shutdown, but in Winsock we have to call shutdown explicitly. 629 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure" 630 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx 631 shutdown(socket_, SD_SEND); 632 633 // This cancels any pending IO. 634 if (closesocket(socket_) < 0) 635 PLOG(ERROR) << "closesocket"; 636 socket_ = INVALID_SOCKET; 637 } 638 639 if (!accept_callback_.is_null()) { 640 accept_watcher_.StopWatching(); 641 accept_socket_ = NULL; 642 accept_address_ = NULL; 643 accept_callback_.Reset(); 644 } 645 646 if (accept_event_) { 647 WSACloseEvent(accept_event_); 648 accept_event_ = WSA_INVALID_EVENT; 649 } 650 651 if (core_) { 652 if (waiting_connect_) { 653 // We closed the socket, so this notification will never come. 654 // From MSDN' WSAEventSelect documentation: 655 // "Closing a socket with closesocket also cancels the association and 656 // selection of network events specified in WSAEventSelect for the 657 // socket". 658 core_->Release(); 659 } 660 core_->Detach(); 661 core_ = NULL; 662 } 663 664 waiting_connect_ = false; 665 waiting_read_ = false; 666 waiting_write_ = false; 667 668 read_callback_.Reset(); 669 write_callback_.Reset(); 670 peer_address_.reset(); 671 connect_os_error_ = 0; 672} 673 674bool TCPSocketWin::UsingTCPFastOpen() const { 675 // Not supported on windows. 676 return false; 677} 678 679void TCPSocketWin::StartLoggingMultipleConnectAttempts( 680 const AddressList& addresses) { 681 if (!logging_multiple_connect_attempts_) { 682 logging_multiple_connect_attempts_ = true; 683 LogConnectBegin(addresses); 684 } else { 685 NOTREACHED(); 686 } 687} 688 689void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) { 690 if (logging_multiple_connect_attempts_) { 691 LogConnectEnd(net_error); 692 logging_multiple_connect_attempts_ = false; 693 } else { 694 NOTREACHED(); 695 } 696} 697 698int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket, 699 IPEndPoint* address) { 700 SockaddrStorage storage; 701 int new_socket = accept(socket_, storage.addr, &storage.addr_len); 702 if (new_socket < 0) { 703 int net_error = MapSystemError(WSAGetLastError()); 704 if (net_error != ERR_IO_PENDING) 705 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error); 706 return net_error; 707 } 708 709 IPEndPoint ip_end_point; 710 if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) { 711 NOTREACHED(); 712 if (closesocket(new_socket) < 0) 713 PLOG(ERROR) << "closesocket"; 714 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, ERR_FAILED); 715 return ERR_FAILED; 716 } 717 scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin( 718 net_log_.net_log(), net_log_.source())); 719 int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point); 720 if (adopt_result != OK) { 721 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result); 722 return adopt_result; 723 } 724 *socket = tcp_socket.Pass(); 725 *address = ip_end_point; 726 net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT, 727 CreateNetLogIPEndPointCallback(&ip_end_point)); 728 return OK; 729} 730 731void TCPSocketWin::OnObjectSignaled(HANDLE object) { 732 WSANETWORKEVENTS ev; 733 if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) { 734 PLOG(ERROR) << "WSAEnumNetworkEvents()"; 735 return; 736 } 737 738 if (ev.lNetworkEvents & FD_ACCEPT) { 739 int result = AcceptInternal(accept_socket_, accept_address_); 740 if (result != ERR_IO_PENDING) { 741 accept_socket_ = NULL; 742 accept_address_ = NULL; 743 base::ResetAndReturn(&accept_callback_).Run(result); 744 } 745 } else { 746 // This happens when a client opens a connection and closes it before we 747 // have a chance to accept it. 748 DCHECK(ev.lNetworkEvents == 0); 749 750 // Start watching the next FD_ACCEPT event. 751 WSAEventSelect(socket_, accept_event_, FD_ACCEPT); 752 accept_watcher_.StartWatching(accept_event_, this); 753 } 754} 755 756int TCPSocketWin::DoConnect() { 757 DCHECK_EQ(connect_os_error_, 0); 758 DCHECK(!core_); 759 760 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 761 CreateNetLogIPEndPointCallback(peer_address_.get())); 762 763 core_ = new Core(this); 764 // WSAEventSelect sets the socket to non-blocking mode as a side effect. 765 // Our connect() and recv() calls require that the socket be non-blocking. 766 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT); 767 768 SockaddrStorage storage; 769 if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len)) 770 return ERR_INVALID_ARGUMENT; 771 if (!connect(socket_, storage.addr, storage.addr_len)) { 772 // Connected without waiting! 773 // 774 // The MSDN page for connect says: 775 // With a nonblocking socket, the connection attempt cannot be completed 776 // immediately. In this case, connect will return SOCKET_ERROR, and 777 // WSAGetLastError will return WSAEWOULDBLOCK. 778 // which implies that for a nonblocking socket, connect never returns 0. 779 // It's not documented whether the event object will be signaled or not 780 // if connect does return 0. So the code below is essentially dead code 781 // and we don't know if it's correct. 782 NOTREACHED(); 783 784 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) 785 return OK; 786 } else { 787 int os_error = WSAGetLastError(); 788 if (os_error != WSAEWOULDBLOCK) { 789 LOG(ERROR) << "connect failed: " << os_error; 790 connect_os_error_ = os_error; 791 int rv = MapConnectError(os_error); 792 CHECK_NE(ERR_IO_PENDING, rv); 793 return rv; 794 } 795 } 796 797 core_->WatchForRead(); 798 return ERR_IO_PENDING; 799} 800 801void TCPSocketWin::DoConnectComplete(int result) { 802 // Log the end of this attempt (and any OS error it threw). 803 int os_error = connect_os_error_; 804 connect_os_error_ = 0; 805 if (result != OK) { 806 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 807 NetLog::IntegerCallback("os_error", os_error)); 808 } else { 809 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT); 810 } 811 812 if (!logging_multiple_connect_attempts_) 813 LogConnectEnd(result); 814} 815 816void TCPSocketWin::LogConnectBegin(const AddressList& addresses) { 817 base::StatsCounter connects("tcp.connect"); 818 connects.Increment(); 819 820 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT, 821 addresses.CreateNetLogCallback()); 822} 823 824void TCPSocketWin::LogConnectEnd(int net_error) { 825 if (net_error == OK) 826 UpdateConnectionTypeHistograms(CONNECTION_ANY); 827 828 if (net_error != OK) { 829 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error); 830 return; 831 } 832 833 struct sockaddr_storage source_address; 834 socklen_t addrlen = sizeof(source_address); 835 int rv = getsockname( 836 socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen); 837 if (rv != 0) { 838 LOG(ERROR) << "getsockname() [rv: " << rv 839 << "] error: " << WSAGetLastError(); 840 NOTREACHED(); 841 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv); 842 return; 843 } 844 845 net_log_.EndEvent( 846 NetLog::TYPE_TCP_CONNECT, 847 CreateNetLogSourceAddressCallback( 848 reinterpret_cast<const struct sockaddr*>(&source_address), 849 sizeof(source_address))); 850} 851 852int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len, 853 const CompletionCallback& callback) { 854 if (!core_->non_blocking_reads_initialized_) { 855 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, 856 FD_READ | FD_CLOSE); 857 core_->non_blocking_reads_initialized_ = true; 858 } 859 int rv = recv(socket_, buf->data(), buf_len, 0); 860 if (rv == SOCKET_ERROR) { 861 int os_error = WSAGetLastError(); 862 if (os_error != WSAEWOULDBLOCK) { 863 int net_error = MapSystemError(os_error); 864 net_log_.AddEvent( 865 NetLog::TYPE_SOCKET_READ_ERROR, 866 CreateNetLogSocketErrorCallback(net_error, os_error)); 867 return net_error; 868 } 869 } else { 870 base::StatsCounter read_bytes("tcp.read_bytes"); 871 if (rv > 0) 872 read_bytes.Add(rv); 873 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv, 874 buf->data()); 875 return rv; 876 } 877 878 waiting_read_ = true; 879 read_callback_ = callback; 880 core_->read_iobuffer_ = buf; 881 core_->read_buffer_length_ = buf_len; 882 core_->WatchForRead(); 883 return ERR_IO_PENDING; 884} 885 886void TCPSocketWin::DidCompleteConnect() { 887 DCHECK(waiting_connect_); 888 DCHECK(!read_callback_.is_null()); 889 int result; 890 891 WSANETWORKEVENTS events; 892 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 893 &events); 894 int os_error = 0; 895 if (rv == SOCKET_ERROR) { 896 NOTREACHED(); 897 os_error = WSAGetLastError(); 898 result = MapSystemError(os_error); 899 } else if (events.lNetworkEvents & FD_CONNECT) { 900 os_error = events.iErrorCode[FD_CONNECT_BIT]; 901 result = MapConnectError(os_error); 902 } else { 903 NOTREACHED(); 904 result = ERR_UNEXPECTED; 905 } 906 907 connect_os_error_ = os_error; 908 DoConnectComplete(result); 909 waiting_connect_ = false; 910 911 DCHECK_NE(result, ERR_IO_PENDING); 912 base::ResetAndReturn(&read_callback_).Run(result); 913} 914 915void TCPSocketWin::DidCompleteWrite() { 916 DCHECK(waiting_write_); 917 DCHECK(!write_callback_.is_null()); 918 919 DWORD num_bytes, flags; 920 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_, 921 &num_bytes, FALSE, &flags); 922 WSAResetEvent(core_->write_overlapped_.hEvent); 923 waiting_write_ = false; 924 int rv; 925 if (!ok) { 926 int os_error = WSAGetLastError(); 927 rv = MapSystemError(os_error); 928 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR, 929 CreateNetLogSocketErrorCallback(rv, os_error)); 930 } else { 931 rv = static_cast<int>(num_bytes); 932 if (rv > core_->write_buffer_length_ || rv < 0) { 933 // It seems that some winsock interceptors report that more was written 934 // than was available. Treat this as an error. http://crbug.com/27870 935 LOG(ERROR) << "Detected broken LSP: Asked to write " 936 << core_->write_buffer_length_ << " bytes, but " << rv 937 << " bytes reported."; 938 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 939 } else { 940 base::StatsCounter write_bytes("tcp.write_bytes"); 941 write_bytes.Add(num_bytes); 942 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes, 943 core_->write_iobuffer_->data()); 944 } 945 } 946 947 core_->write_iobuffer_ = NULL; 948 949 DCHECK_NE(rv, ERR_IO_PENDING); 950 base::ResetAndReturn(&write_callback_).Run(rv); 951} 952 953void TCPSocketWin::DidSignalRead() { 954 DCHECK(waiting_read_); 955 DCHECK(!read_callback_.is_null()); 956 957 int os_error = 0; 958 WSANETWORKEVENTS network_events; 959 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 960 &network_events); 961 if (rv == SOCKET_ERROR) { 962 os_error = WSAGetLastError(); 963 rv = MapSystemError(os_error); 964 } else if (network_events.lNetworkEvents) { 965 DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0); 966 // If network_events.lNetworkEvents is FD_CLOSE and 967 // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful 968 // connection closure. It is tempting to directly set rv to 0 in 969 // this case, but the MSDN pages for WSAEventSelect and 970 // WSAAsyncSelect recommend we still call DoRead(): 971 // FD_CLOSE should only be posted after all data is read from a 972 // socket, but an application should check for remaining data upon 973 // receipt of FD_CLOSE to avoid any possibility of losing data. 974 // 975 // If network_events.iErrorCode[FD_READ_BIT] or 976 // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call 977 // DoRead() because recv() reports a more accurate error code 978 // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was 979 // reset. 980 rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_, 981 read_callback_); 982 if (rv == ERR_IO_PENDING) 983 return; 984 } else { 985 // This may happen because Read() may succeed synchronously and 986 // consume all the received data without resetting the event object. 987 core_->WatchForRead(); 988 return; 989 } 990 991 waiting_read_ = false; 992 core_->read_iobuffer_ = NULL; 993 core_->read_buffer_length_ = 0; 994 995 DCHECK_NE(rv, ERR_IO_PENDING); 996 base::ResetAndReturn(&read_callback_).Run(rv); 997} 998 999} // namespace net 1000 1001