tcp_client_socket_win.cc revision 7b9ca917061470268bf3395c8925d4b9cc52d8e1
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "net/socket/tcp_client_socket_win.h" 6 7#include "base/basictypes.h" 8#include "base/compiler_specific.h" 9#include "base/memory_debug.h" 10#include "base/metrics/stats_counters.h" 11#include "base/string_util.h" 12#include "base/sys_info.h" 13#include "net/base/address_list_net_log_param.h" 14#include "net/base/connection_type_histograms.h" 15#include "net/base/io_buffer.h" 16#include "net/base/net_errors.h" 17#include "net/base/net_log.h" 18#include "net/base/net_util.h" 19#include "net/base/network_change_notifier.h" 20#include "net/base/sys_addrinfo.h" 21#include "net/base/winsock_init.h" 22 23namespace net { 24 25namespace { 26 27// Assert that the (manual-reset) event object is not signaled. 28void AssertEventNotSignaled(WSAEVENT hEvent) { 29 DWORD wait_rv = WaitForSingleObject(hEvent, 0); 30 if (wait_rv != WAIT_TIMEOUT) { 31 DWORD err = ERROR_SUCCESS; 32 if (wait_rv == WAIT_FAILED) 33 err = GetLastError(); 34 CHECK(false); // Crash. 35 // This LOG statement is unreachable since we have already crashed, but it 36 // should prevent the compiler from optimizing away the |wait_rv| and 37 // |err| variables so they appear nicely on the stack in crash dumps. 38 VLOG(1) << "wait_rv=" << wait_rv << ", err=" << err; 39 } 40} 41 42// If the (manual-reset) event object is signaled, resets it and returns true. 43// Otherwise, does nothing and returns false. Called after a Winsock function 44// succeeds synchronously 45// 46// Our testing shows that except in rare cases (when running inside QEMU), 47// the event object is already signaled at this point, so we call this method 48// to avoid a context switch in common cases. This is just a performance 49// optimization. The code still works if this function simply returns false. 50bool ResetEventIfSignaled(WSAEVENT hEvent) { 51 // TODO(wtc): Remove the CHECKs after enough testing. 52 DWORD wait_rv = WaitForSingleObject(hEvent, 0); 53 if (wait_rv == WAIT_TIMEOUT) 54 return false; // The event object is not signaled. 55 CHECK_EQ(WAIT_OBJECT_0, wait_rv); 56 BOOL ok = WSAResetEvent(hEvent); 57 CHECK(ok); 58 return true; 59} 60 61//----------------------------------------------------------------------------- 62 63int MapWinsockError(int os_error) { 64 // There are numerous Winsock error codes, but these are the ones we thus far 65 // find interesting. 66 switch (os_error) { 67 case WSAEACCES: 68 return ERR_ACCESS_DENIED; 69 case WSAENETDOWN: 70 return ERR_INTERNET_DISCONNECTED; 71 case WSAETIMEDOUT: 72 return ERR_TIMED_OUT; 73 case WSAECONNRESET: 74 case WSAENETRESET: // Related to keep-alive 75 return ERR_CONNECTION_RESET; 76 case WSAECONNABORTED: 77 return ERR_CONNECTION_ABORTED; 78 case WSAECONNREFUSED: 79 return ERR_CONNECTION_REFUSED; 80 case WSA_IO_INCOMPLETE: 81 case WSAEDISCON: 82 // WSAEDISCON is returned by WSARecv or WSARecvFrom for message-oriented 83 // sockets (where a return value of zero means a zero-byte message) to 84 // indicate graceful connection shutdown. We should not ever see this 85 // error code for TCP sockets, which are byte stream oriented. 86 LOG(DFATAL) << "Unexpected error " << os_error 87 << " mapped to net::ERR_UNEXPECTED"; 88 return ERR_UNEXPECTED; 89 case WSAEHOSTUNREACH: 90 case WSAENETUNREACH: 91 return ERR_ADDRESS_UNREACHABLE; 92 case WSAEADDRNOTAVAIL: 93 return ERR_ADDRESS_INVALID; 94 case ERROR_SUCCESS: 95 return OK; 96 default: 97 LOG(WARNING) << "Unknown error " << os_error 98 << " mapped to net::ERR_FAILED"; 99 return ERR_FAILED; 100 } 101} 102 103int MapConnectError(int os_error) { 104 switch (os_error) { 105 // connect fails with WSAEACCES when Windows Firewall blocks the 106 // connection. 107 case WSAEACCES: 108 return ERR_NETWORK_ACCESS_DENIED; 109 case WSAETIMEDOUT: 110 return ERR_CONNECTION_TIMED_OUT; 111 default: { 112 int net_error = MapWinsockError(os_error); 113 if (net_error == ERR_FAILED) 114 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED. 115 116 // Give a more specific error when the user is offline. 117 if (net_error == ERR_ADDRESS_UNREACHABLE && 118 NetworkChangeNotifier::IsOffline()) { 119 return ERR_INTERNET_DISCONNECTED; 120 } 121 122 return net_error; 123 } 124 } 125} 126 127} // namespace 128 129//----------------------------------------------------------------------------- 130 131// This class encapsulates all the state that has to be preserved as long as 132// there is a network IO operation in progress. If the owner TCPClientSocketWin 133// is destroyed while an operation is in progress, the Core is detached and it 134// lives until the operation completes and the OS doesn't reference any resource 135// declared on this class anymore. 136class TCPClientSocketWin::Core : public base::RefCounted<Core> { 137 public: 138 explicit Core(TCPClientSocketWin* socket); 139 140 // Start watching for the end of a read or write operation. 141 void WatchForRead(); 142 void WatchForWrite(); 143 144 // The TCPClientSocketWin is going away. 145 void Detach() { socket_ = NULL; } 146 147 // The separate OVERLAPPED variables for asynchronous operation. 148 // |read_overlapped_| is used for both Connect() and Read(). 149 // |write_overlapped_| is only used for Write(); 150 OVERLAPPED read_overlapped_; 151 OVERLAPPED write_overlapped_; 152 153 // The buffers used in Read() and Write(). 154 WSABUF read_buffer_; 155 WSABUF write_buffer_; 156 scoped_refptr<IOBuffer> read_iobuffer_; 157 scoped_refptr<IOBuffer> write_iobuffer_; 158 int write_buffer_length_; 159 160 // Throttle the read size based on our current slow start state. 161 // Returns the throttled read size. 162 int ThrottleReadSize(int size) { 163 if (slow_start_throttle_ < kMaxSlowStartThrottle) { 164 size = std::min(size, slow_start_throttle_); 165 slow_start_throttle_ *= 2; 166 } 167 return size; 168 } 169 170 private: 171 friend class base::RefCounted<Core>; 172 173 class ReadDelegate : public base::ObjectWatcher::Delegate { 174 public: 175 explicit ReadDelegate(Core* core) : core_(core) {} 176 virtual ~ReadDelegate() {} 177 178 // base::ObjectWatcher::Delegate methods: 179 virtual void OnObjectSignaled(HANDLE object); 180 181 private: 182 Core* const core_; 183 }; 184 185 class WriteDelegate : public base::ObjectWatcher::Delegate { 186 public: 187 explicit WriteDelegate(Core* core) : core_(core) {} 188 virtual ~WriteDelegate() {} 189 190 // base::ObjectWatcher::Delegate methods: 191 virtual void OnObjectSignaled(HANDLE object); 192 193 private: 194 Core* const core_; 195 }; 196 197 ~Core(); 198 199 // The socket that created this object. 200 TCPClientSocketWin* socket_; 201 202 // |reader_| handles the signals from |read_watcher_|. 203 ReadDelegate reader_; 204 // |writer_| handles the signals from |write_watcher_|. 205 WriteDelegate writer_; 206 207 // |read_watcher_| watches for events from Connect() and Read(). 208 base::ObjectWatcher read_watcher_; 209 // |write_watcher_| watches for events from Write(); 210 base::ObjectWatcher write_watcher_; 211 212 // When doing reads from the socket, we try to mirror TCP's slow start. 213 // We do this because otherwise the async IO subsystem artifically delays 214 // returning data to the application. 215 static const int kInitialSlowStartThrottle = 1 * 1024; 216 static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle; 217 int slow_start_throttle_; 218 219 DISALLOW_COPY_AND_ASSIGN(Core); 220}; 221 222TCPClientSocketWin::Core::Core( 223 TCPClientSocketWin* socket) 224 : write_buffer_length_(0), 225 socket_(socket), 226 ALLOW_THIS_IN_INITIALIZER_LIST(reader_(this)), 227 ALLOW_THIS_IN_INITIALIZER_LIST(writer_(this)), 228 slow_start_throttle_(kInitialSlowStartThrottle) { 229 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 230 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 231} 232 233TCPClientSocketWin::Core::~Core() { 234 // Make sure the message loop is not watching this object anymore. 235 read_watcher_.StopWatching(); 236 write_watcher_.StopWatching(); 237 238 WSACloseEvent(read_overlapped_.hEvent); 239 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 240 WSACloseEvent(write_overlapped_.hEvent); 241 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 242} 243 244void TCPClientSocketWin::Core::WatchForRead() { 245 // We grab an extra reference because there is an IO operation in progress. 246 // Balanced in ReadDelegate::OnObjectSignaled(). 247 AddRef(); 248 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_); 249} 250 251void TCPClientSocketWin::Core::WatchForWrite() { 252 // We grab an extra reference because there is an IO operation in progress. 253 // Balanced in WriteDelegate::OnObjectSignaled(). 254 AddRef(); 255 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_); 256} 257 258void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled( 259 HANDLE object) { 260 DCHECK_EQ(object, core_->read_overlapped_.hEvent); 261 if (core_->socket_) { 262 if (core_->socket_->waiting_connect()) { 263 core_->socket_->DidCompleteConnect(); 264 } else { 265 core_->socket_->DidCompleteRead(); 266 } 267 } 268 269 core_->Release(); 270} 271 272void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled( 273 HANDLE object) { 274 DCHECK_EQ(object, core_->write_overlapped_.hEvent); 275 if (core_->socket_) 276 core_->socket_->DidCompleteWrite(); 277 278 core_->Release(); 279} 280 281//----------------------------------------------------------------------------- 282 283TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses, 284 net::NetLog* net_log, 285 const net::NetLog::Source& source) 286 : socket_(INVALID_SOCKET), 287 addresses_(addresses), 288 current_ai_(NULL), 289 waiting_read_(false), 290 waiting_write_(false), 291 read_callback_(NULL), 292 write_callback_(NULL), 293 next_connect_state_(CONNECT_STATE_NONE), 294 connect_os_error_(0), 295 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)), 296 previously_disconnected_(false) { 297 scoped_refptr<NetLog::EventParameters> params; 298 if (source.is_valid()) 299 params = new NetLogSourceParameter("source_dependency", source); 300 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, params); 301 EnsureWinsockInit(); 302} 303 304TCPClientSocketWin::~TCPClientSocketWin() { 305 Disconnect(); 306 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE, NULL); 307} 308 309void TCPClientSocketWin::AdoptSocket(SOCKET socket) { 310 DCHECK_EQ(socket_, INVALID_SOCKET); 311 socket_ = socket; 312 int error = SetupSocket(); 313 DCHECK_EQ(0, error); 314 current_ai_ = addresses_.head(); 315 use_history_.set_was_ever_connected(); 316} 317 318#ifdef ANDROID 319// TODO(kristianm): handle the case when wait_for_connect is true 320// (sync requests) 321#endif 322int TCPClientSocketWin::Connect(CompletionCallback* callback 323#ifdef ANDROID 324 , bool wait_for_connect 325#endif 326 ) { 327 DCHECK(CalledOnValidThread()); 328 329 // If already connected, then just return OK. 330 if (socket_ != INVALID_SOCKET) 331 return OK; 332 333 static base::StatsCounter connects("tcp.connect"); 334 connects.Increment(); 335 336 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT, 337 new AddressListNetLogParam(addresses_)); 338 339 // We will try to connect to each address in addresses_. Start with the 340 // first one in the list. 341 next_connect_state_ = CONNECT_STATE_CONNECT; 342 current_ai_ = addresses_.head(); 343 344 int rv = DoConnectLoop(OK); 345 if (rv == ERR_IO_PENDING) { 346 // Synchronous operation not supported. 347 DCHECK(callback); 348 read_callback_ = callback; 349 } else { 350 LogConnectCompletion(rv); 351 } 352 353 return rv; 354} 355 356int TCPClientSocketWin::DoConnectLoop(int result) { 357 DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE); 358 359 int rv = result; 360 do { 361 ConnectState state = next_connect_state_; 362 next_connect_state_ = CONNECT_STATE_NONE; 363 switch (state) { 364 case CONNECT_STATE_CONNECT: 365 DCHECK_EQ(OK, rv); 366 rv = DoConnect(); 367 break; 368 case CONNECT_STATE_CONNECT_COMPLETE: 369 rv = DoConnectComplete(rv); 370 break; 371 default: 372 LOG(DFATAL) << "bad state " << state; 373 rv = ERR_UNEXPECTED; 374 break; 375 } 376 } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE); 377 378 return rv; 379} 380 381int TCPClientSocketWin::DoConnect() { 382 const struct addrinfo* ai = current_ai_; 383 DCHECK(ai); 384 DCHECK_EQ(0, connect_os_error_); 385 386 if (previously_disconnected_) { 387 use_history_.Reset(); 388 previously_disconnected_ = false; 389 } 390 391 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 392 new NetLogStringParameter( 393 "address", NetAddressToStringWithPort(current_ai_))); 394 395 next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE; 396 397 connect_os_error_ = CreateSocket(ai); 398 if (connect_os_error_ != 0) 399 return MapWinsockError(connect_os_error_); 400 401 DCHECK(!core_); 402 core_ = new Core(this); 403 404 // WSACreateEvent creates a manual-reset event object. 405 core_->read_overlapped_.hEvent = WSACreateEvent(); 406 // WSAEventSelect sets the socket to non-blocking mode as a side effect. 407 // Our connect() and recv() calls require that the socket be non-blocking. 408 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT); 409 410 core_->write_overlapped_.hEvent = WSACreateEvent(); 411 412 if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) { 413 // Connected without waiting! 414 // 415 // The MSDN page for connect says: 416 // With a nonblocking socket, the connection attempt cannot be completed 417 // immediately. In this case, connect will return SOCKET_ERROR, and 418 // WSAGetLastError will return WSAEWOULDBLOCK. 419 // which implies that for a nonblocking socket, connect never returns 0. 420 // It's not documented whether the event object will be signaled or not 421 // if connect does return 0. So the code below is essentially dead code 422 // and we don't know if it's correct. 423 NOTREACHED(); 424 425 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) 426 return OK; 427 } else { 428 int os_error = WSAGetLastError(); 429 if (os_error != WSAEWOULDBLOCK) { 430 LOG(ERROR) << "connect failed: " << os_error; 431 connect_os_error_ = os_error; 432 return MapConnectError(os_error); 433 } 434 } 435 436 core_->WatchForRead(); 437 return ERR_IO_PENDING; 438} 439 440int TCPClientSocketWin::DoConnectComplete(int result) { 441 // Log the end of this attempt (and any OS error it threw). 442 int os_error = connect_os_error_; 443 connect_os_error_ = 0; 444 scoped_refptr<NetLog::EventParameters> params; 445 if (result != OK) 446 params = new NetLogIntegerParameter("os_error", os_error); 447 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, params); 448 449 if (result == OK) { 450 use_history_.set_was_ever_connected(); 451 return OK; // Done! 452 } 453 454 // Close whatever partially connected socket we currently have. 455 DoDisconnect(); 456 457 // Try to fall back to the next address in the list. 458 if (current_ai_->ai_next) { 459 next_connect_state_ = CONNECT_STATE_CONNECT; 460 current_ai_ = current_ai_->ai_next; 461 return OK; 462 } 463 464 // Otherwise there is nothing to fall back to, so give up. 465 return result; 466} 467 468void TCPClientSocketWin::Disconnect() { 469 DoDisconnect(); 470 current_ai_ = NULL; 471} 472 473void TCPClientSocketWin::DoDisconnect() { 474 DCHECK(CalledOnValidThread()); 475 476 if (socket_ == INVALID_SOCKET) 477 return; 478 479 // Note: don't use CancelIo to cancel pending IO because it doesn't work 480 // when there is a Winsock layered service provider. 481 482 // In most socket implementations, closing a socket results in a graceful 483 // connection shutdown, but in Winsock we have to call shutdown explicitly. 484 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure" 485 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx 486 shutdown(socket_, SD_SEND); 487 488 // This cancels any pending IO. 489 closesocket(socket_); 490 socket_ = INVALID_SOCKET; 491 492 if (waiting_connect()) { 493 // We closed the socket, so this notification will never come. 494 // From MSDN' WSAEventSelect documentation: 495 // "Closing a socket with closesocket also cancels the association and 496 // selection of network events specified in WSAEventSelect for the socket". 497 core_->Release(); 498 } 499 500 waiting_read_ = false; 501 waiting_write_ = false; 502 503 core_->Detach(); 504 core_ = NULL; 505 506 previously_disconnected_ = true; 507} 508 509bool TCPClientSocketWin::IsConnected() const { 510 DCHECK(CalledOnValidThread()); 511 512 if (socket_ == INVALID_SOCKET || waiting_connect()) 513 return false; 514 515 // Check if connection is alive. 516 char c; 517 int rv = recv(socket_, &c, 1, MSG_PEEK); 518 if (rv == 0) 519 return false; 520 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) 521 return false; 522 523 return true; 524} 525 526bool TCPClientSocketWin::IsConnectedAndIdle() const { 527 DCHECK(CalledOnValidThread()); 528 529 if (socket_ == INVALID_SOCKET || waiting_connect()) 530 return false; 531 532 // Check if connection is alive and we haven't received any data 533 // unexpectedly. 534 char c; 535 int rv = recv(socket_, &c, 1, MSG_PEEK); 536 if (rv >= 0) 537 return false; 538 if (WSAGetLastError() != WSAEWOULDBLOCK) 539 return false; 540 541 return true; 542} 543 544int TCPClientSocketWin::GetPeerAddress(AddressList* address) const { 545 DCHECK(CalledOnValidThread()); 546 DCHECK(address); 547 if (!IsConnected()) 548 return ERR_SOCKET_NOT_CONNECTED; 549 address->Copy(current_ai_, false); 550 return OK; 551} 552 553void TCPClientSocketWin::SetSubresourceSpeculation() { 554 use_history_.set_subresource_speculation(); 555} 556 557void TCPClientSocketWin::SetOmniboxSpeculation() { 558 use_history_.set_omnibox_speculation(); 559} 560 561bool TCPClientSocketWin::WasEverUsed() const { 562 return use_history_.was_used_to_convey_data(); 563} 564 565bool TCPClientSocketWin::UsingTCPFastOpen() const { 566 // Not supported on windows. 567 return false; 568} 569 570int TCPClientSocketWin::Read(IOBuffer* buf, 571 int buf_len, 572 CompletionCallback* callback) { 573 DCHECK(CalledOnValidThread()); 574 DCHECK_NE(socket_, INVALID_SOCKET); 575 DCHECK(!waiting_read_); 576 DCHECK(!read_callback_); 577 DCHECK(!core_->read_iobuffer_); 578 579 buf_len = core_->ThrottleReadSize(buf_len); 580 581 core_->read_buffer_.len = buf_len; 582 core_->read_buffer_.buf = buf->data(); 583 584 // TODO(wtc): Remove the assertion after enough testing. 585 AssertEventNotSignaled(core_->read_overlapped_.hEvent); 586 DWORD num, flags = 0; 587 int rv = WSARecv(socket_, &core_->read_buffer_, 1, &num, &flags, 588 &core_->read_overlapped_, NULL); 589 if (rv == 0) { 590 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) { 591 // Because of how WSARecv fills memory when used asynchronously, Purify 592 // isn't able to detect that it's been initialized, so it scans for 0xcd 593 // in the buffer and reports UMRs (uninitialized memory reads) for those 594 // individual bytes. We override that in PURIFY builds to avoid the 595 // false error reports. 596 // See bug 5297. 597 base::MemoryDebug::MarkAsInitialized(core_->read_buffer_.buf, num); 598 static base::StatsCounter read_bytes("tcp.read_bytes"); 599 read_bytes.Add(num); 600 if (num > 0) 601 use_history_.set_was_used_to_convey_data(); 602 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num, 603 core_->read_buffer_.buf); 604 return static_cast<int>(num); 605 } 606 } else { 607 int os_error = WSAGetLastError(); 608 if (os_error != WSA_IO_PENDING) 609 return MapWinsockError(os_error); 610 } 611 core_->WatchForRead(); 612 waiting_read_ = true; 613 read_callback_ = callback; 614 core_->read_iobuffer_ = buf; 615 return ERR_IO_PENDING; 616} 617 618int TCPClientSocketWin::Write(IOBuffer* buf, 619 int buf_len, 620 CompletionCallback* callback) { 621 DCHECK(CalledOnValidThread()); 622 DCHECK_NE(socket_, INVALID_SOCKET); 623 DCHECK(!waiting_write_); 624 DCHECK(!write_callback_); 625 DCHECK_GT(buf_len, 0); 626 DCHECK(!core_->write_iobuffer_); 627 628 static base::StatsCounter writes("tcp.writes"); 629 writes.Increment(); 630 631 core_->write_buffer_.len = buf_len; 632 core_->write_buffer_.buf = buf->data(); 633 core_->write_buffer_length_ = buf_len; 634 635 // TODO(wtc): Remove the assertion after enough testing. 636 AssertEventNotSignaled(core_->write_overlapped_.hEvent); 637 DWORD num; 638 int rv = WSASend(socket_, &core_->write_buffer_, 1, &num, 0, 639 &core_->write_overlapped_, NULL); 640 if (rv == 0) { 641 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) { 642 rv = static_cast<int>(num); 643 if (rv > buf_len || rv < 0) { 644 // It seems that some winsock interceptors report that more was written 645 // than was available. Treat this as an error. http://crbug.com/27870 646 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len 647 << " bytes, but " << rv << " bytes reported."; 648 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 649 } 650 static base::StatsCounter write_bytes("tcp.write_bytes"); 651 write_bytes.Add(rv); 652 if (rv > 0) 653 use_history_.set_was_used_to_convey_data(); 654 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, rv, 655 core_->write_buffer_.buf); 656 return rv; 657 } 658 } else { 659 int os_error = WSAGetLastError(); 660 if (os_error != WSA_IO_PENDING) 661 return MapWinsockError(os_error); 662 } 663 core_->WatchForWrite(); 664 waiting_write_ = true; 665 write_callback_ = callback; 666 core_->write_iobuffer_ = buf; 667 return ERR_IO_PENDING; 668} 669 670bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) { 671 DCHECK(CalledOnValidThread()); 672 int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF, 673 reinterpret_cast<const char*>(&size), sizeof(size)); 674 DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError(); 675 return rv == 0; 676} 677 678bool TCPClientSocketWin::SetSendBufferSize(int32 size) { 679 DCHECK(CalledOnValidThread()); 680 int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, 681 reinterpret_cast<const char*>(&size), sizeof(size)); 682 DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError(); 683 return rv == 0; 684} 685 686int TCPClientSocketWin::CreateSocket(const struct addrinfo* ai) { 687 socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0, 688 WSA_FLAG_OVERLAPPED); 689 if (socket_ == INVALID_SOCKET) { 690 int os_error = WSAGetLastError(); 691 LOG(ERROR) << "WSASocket failed: " << os_error; 692 return os_error; 693 } 694 return SetupSocket(); 695} 696 697int TCPClientSocketWin::SetupSocket() { 698 // Increase the socket buffer sizes from the default sizes for WinXP. In 699 // performance testing, there is substantial benefit by increasing from 8KB 700 // to 64KB. 701 // See also: 702 // http://support.microsoft.com/kb/823764/EN-US 703 // On Vista, if we manually set these sizes, Vista turns off its receive 704 // window auto-tuning feature. 705 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx 706 // Since Vista's auto-tune is better than any static value we can could set, 707 // only change these on pre-vista machines. 708 int32 major_version, minor_version, fix_version; 709 base::SysInfo::OperatingSystemVersionNumbers(&major_version, &minor_version, 710 &fix_version); 711 if (major_version < 6) { 712 const int32 kSocketBufferSize = 64 * 1024; 713 SetReceiveBufferSize(kSocketBufferSize); 714 SetSendBufferSize(kSocketBufferSize); 715 } 716 717 // Disable Nagle. 718 // The Nagle implementation on windows is governed by RFC 896. The idea 719 // behind Nagle is to reduce small packets on the network. When Nagle is 720 // enabled, if a partial packet has been sent, the TCP stack will disallow 721 // further *partial* packets until an ACK has been received from the other 722 // side. Good applications should always strive to send as much data as 723 // possible and avoid partial-packet sends. However, in most real world 724 // applications, there are edge cases where this does not happen, and two 725 // partil packets may be sent back to back. For a browser, it is NEVER 726 // a benefit to delay for an RTT before the second packet is sent. 727 // 728 // As a practical example in Chromium today, consider the case of a small 729 // POST. I have verified this: 730 // Client writes 649 bytes of header (partial packet #1) 731 // Client writes 50 bytes of POST data (partial packet #2) 732 // In the above example, with Nagle, a RTT delay is inserted between these 733 // two sends due to nagle. RTTs can easily be 100ms or more. The best 734 // fix is to make sure that for POSTing data, we write as much data as 735 // possible and minimize partial packets. We will fix that. But disabling 736 // Nagle also ensure we don't run into this delay in other edge cases. 737 // See also: 738 // http://technet.microsoft.com/en-us/library/bb726981.aspx 739 const BOOL kDisableNagle = TRUE; 740 int rv = setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY, 741 reinterpret_cast<const char*>(&kDisableNagle), sizeof(kDisableNagle)); 742 DCHECK(!rv) << "Could not disable nagle"; 743 744 // Disregard any failure in disabling nagle. 745 return 0; 746} 747 748void TCPClientSocketWin::LogConnectCompletion(int net_error) { 749 scoped_refptr<NetLog::EventParameters> params; 750 if (net_error != OK) 751 params = new NetLogIntegerParameter("net_error", net_error); 752 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT, params); 753 if (net_error == OK) 754 UpdateConnectionTypeHistograms(CONNECTION_ANY); 755} 756 757void TCPClientSocketWin::DoReadCallback(int rv) { 758 DCHECK_NE(rv, ERR_IO_PENDING); 759 DCHECK(read_callback_); 760 761 // since Run may result in Read being called, clear read_callback_ up front. 762 CompletionCallback* c = read_callback_; 763 read_callback_ = NULL; 764 c->Run(rv); 765} 766 767void TCPClientSocketWin::DoWriteCallback(int rv) { 768 DCHECK_NE(rv, ERR_IO_PENDING); 769 DCHECK(write_callback_); 770 771 // since Run may result in Write being called, clear write_callback_ up front. 772 CompletionCallback* c = write_callback_; 773 write_callback_ = NULL; 774 c->Run(rv); 775} 776 777void TCPClientSocketWin::DidCompleteConnect() { 778 DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE); 779 int result; 780 781 WSANETWORKEVENTS events; 782 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 783 &events); 784 int os_error = 0; 785 if (rv == SOCKET_ERROR) { 786 NOTREACHED(); 787 os_error = WSAGetLastError(); 788 result = MapWinsockError(os_error); 789 } else if (events.lNetworkEvents & FD_CONNECT) { 790 os_error = events.iErrorCode[FD_CONNECT_BIT]; 791 result = MapConnectError(os_error); 792 } else { 793 NOTREACHED(); 794 result = ERR_UNEXPECTED; 795 } 796 797 connect_os_error_ = os_error; 798 rv = DoConnectLoop(result); 799 if (rv != ERR_IO_PENDING) { 800 LogConnectCompletion(rv); 801 DoReadCallback(rv); 802 } 803} 804 805void TCPClientSocketWin::DidCompleteRead() { 806 DCHECK(waiting_read_); 807 DWORD num_bytes, flags; 808 BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_, 809 &num_bytes, FALSE, &flags); 810 WSAResetEvent(core_->read_overlapped_.hEvent); 811 waiting_read_ = false; 812 core_->read_iobuffer_ = NULL; 813 if (ok) { 814 static base::StatsCounter read_bytes("tcp.read_bytes"); 815 read_bytes.Add(num_bytes); 816 if (num_bytes > 0) 817 use_history_.set_was_used_to_convey_data(); 818 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num_bytes, 819 core_->read_buffer_.buf); 820 } 821 DoReadCallback(ok ? num_bytes : MapWinsockError(WSAGetLastError())); 822} 823 824void TCPClientSocketWin::DidCompleteWrite() { 825 DCHECK(waiting_write_); 826 827 DWORD num_bytes, flags; 828 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_, 829 &num_bytes, FALSE, &flags); 830 WSAResetEvent(core_->write_overlapped_.hEvent); 831 waiting_write_ = false; 832 int rv; 833 if (!ok) { 834 rv = MapWinsockError(WSAGetLastError()); 835 } else { 836 rv = static_cast<int>(num_bytes); 837 if (rv > core_->write_buffer_length_ || rv < 0) { 838 // It seems that some winsock interceptors report that more was written 839 // than was available. Treat this as an error. http://crbug.com/27870 840 LOG(ERROR) << "Detected broken LSP: Asked to write " 841 << core_->write_buffer_length_ << " bytes, but " << rv 842 << " bytes reported."; 843 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 844 } else { 845 static base::StatsCounter write_bytes("tcp.write_bytes"); 846 write_bytes.Add(num_bytes); 847 if (num_bytes > 0) 848 use_history_.set_was_used_to_convey_data(); 849 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes, 850 core_->write_buffer_.buf); 851 } 852 } 853 core_->write_iobuffer_ = NULL; 854 DoWriteCallback(rv); 855} 856 857} // namespace net 858