tcp_client_socket_win.cc revision c407dc5cd9bdc5668497f21b26b09d988ab439de
1804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// Use of this source code is governed by a BSD-style license that can be 3804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// found in the LICENSE file. 4804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 5804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/socket/tcp_client_socket_win.h" 6804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 7804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "base/basictypes.h" 8804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "base/compiler_specific.h" 9804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "base/memory_debug.h" 10804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "base/stats_counters.h" 11804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "base/string_util.h" 12804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "base/sys_info.h" 13804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/address_list_net_log_param.h" 14804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/connection_type_histograms.h" 15804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/io_buffer.h" 16804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/net_errors.h" 17804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/net_log.h" 18804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/net_util.h" 19804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/sys_addrinfo.h" 20804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar#include "net/base/winsock_init.h" 21804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 22804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbarnamespace net { 23804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 24804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbarnamespace { 25804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 26804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// If the (manual-reset) event object is signaled, resets it and returns true. 27804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// Otherwise, does nothing and returns false. Called after a Winsock function 28804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// succeeds synchronously 29804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// 30804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// Our testing shows that except in rare cases (when running inside QEMU), 31804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// the event object is already signaled at this point, so we call this method 32804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// to avoid a context switch in common cases. This is just a performance 33804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar// optimization. The code still works if this function simply returns false. 34804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbarbool ResetEventIfSignaled(WSAEVENT hEvent) { 35804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar // TODO(wtc): Remove the CHECKs after enough testing. 36804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar DWORD wait_rv = WaitForSingleObject(hEvent, 0); 37804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar if (wait_rv == WAIT_TIMEOUT) 38804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar return false; // The event object is not signaled. 39804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar CHECK_EQ(WAIT_OBJECT_0, wait_rv); 40804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar BOOL ok = WSAResetEvent(hEvent); 41804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar CHECK(ok); 42804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar return true; 43804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar} 44804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 45804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar//----------------------------------------------------------------------------- 46804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar 47804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbarint MapWinsockError(int os_error) { 48804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar // There are numerous Winsock error codes, but these are the ones we thus far 49804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar // find interesting. 50804ead0dd1713c3c8a02853fc0a5d898a46889a9Daniel Dunbar switch (os_error) { 51 // connect fails with WSAEACCES when Windows Firewall blocks the 52 // connection. 53 case WSAEACCES: 54 return ERR_ACCESS_DENIED; 55 case WSAENETDOWN: 56 return ERR_INTERNET_DISCONNECTED; 57 case WSAETIMEDOUT: 58 return ERR_TIMED_OUT; 59 case WSAECONNRESET: 60 case WSAENETRESET: // Related to keep-alive 61 return ERR_CONNECTION_RESET; 62 case WSAECONNABORTED: 63 return ERR_CONNECTION_ABORTED; 64 case WSAECONNREFUSED: 65 return ERR_CONNECTION_REFUSED; 66 case WSAEDISCON: 67 // Returned by WSARecv or WSARecvFrom for message-oriented sockets (where 68 // a return value of zero means a zero-byte message) to indicate graceful 69 // connection shutdown. We should not ever see this error code for TCP 70 // sockets, which are byte stream oriented. 71 NOTREACHED(); 72 return ERR_CONNECTION_CLOSED; 73 case WSAEHOSTUNREACH: 74 case WSAENETUNREACH: 75 return ERR_ADDRESS_UNREACHABLE; 76 case WSAEADDRNOTAVAIL: 77 return ERR_ADDRESS_INVALID; 78 case WSA_IO_INCOMPLETE: 79 return ERR_UNEXPECTED; 80 case ERROR_SUCCESS: 81 return OK; 82 default: 83 LOG(WARNING) << "Unknown error " << os_error 84 << " mapped to net::ERR_FAILED"; 85 return ERR_FAILED; 86 } 87} 88 89int MapConnectError(int os_error) { 90 switch (os_error) { 91 case WSAETIMEDOUT: 92 return ERR_CONNECTION_TIMED_OUT; 93 default: { 94 int net_error = MapWinsockError(os_error); 95 if (net_error == ERR_FAILED) 96 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED. 97 return net_error; 98 } 99 } 100} 101 102} // namespace 103 104//----------------------------------------------------------------------------- 105 106// This class encapsulates all the state that has to be preserved as long as 107// there is a network IO operation in progress. If the owner TCPClientSocketWin 108// is destroyed while an operation is in progress, the Core is detached and it 109// lives until the operation completes and the OS doesn't reference any resource 110// declared on this class anymore. 111class TCPClientSocketWin::Core : public base::RefCounted<Core> { 112 public: 113 explicit Core(TCPClientSocketWin* socket); 114 115 // Start watching for the end of a read or write operation. 116 void WatchForRead(); 117 void WatchForWrite(); 118 119 // The TCPClientSocketWin is going away. 120 void Detach() { socket_ = NULL; } 121 122 // The separate OVERLAPPED variables for asynchronous operation. 123 // |read_overlapped_| is used for both Connect() and Read(). 124 // |write_overlapped_| is only used for Write(); 125 OVERLAPPED read_overlapped_; 126 OVERLAPPED write_overlapped_; 127 128 // The buffers used in Read() and Write(). 129 WSABUF read_buffer_; 130 WSABUF write_buffer_; 131 scoped_refptr<IOBuffer> read_iobuffer_; 132 scoped_refptr<IOBuffer> write_iobuffer_; 133 int write_buffer_length_; 134 135 // Throttle the read size based on our current slow start state. 136 // Returns the throttled read size. 137 int ThrottleReadSize(int size) { 138 if (slow_start_throttle_ < kMaxSlowStartThrottle) { 139 size = std::min(size, slow_start_throttle_); 140 slow_start_throttle_ *= 2; 141 } 142 return size; 143 } 144 145 private: 146 friend class base::RefCounted<Core>; 147 148 class ReadDelegate : public base::ObjectWatcher::Delegate { 149 public: 150 explicit ReadDelegate(Core* core) : core_(core) {} 151 virtual ~ReadDelegate() {} 152 153 // base::ObjectWatcher::Delegate methods: 154 virtual void OnObjectSignaled(HANDLE object); 155 156 private: 157 Core* const core_; 158 }; 159 160 class WriteDelegate : public base::ObjectWatcher::Delegate { 161 public: 162 explicit WriteDelegate(Core* core) : core_(core) {} 163 virtual ~WriteDelegate() {} 164 165 // base::ObjectWatcher::Delegate methods: 166 virtual void OnObjectSignaled(HANDLE object); 167 168 private: 169 Core* const core_; 170 }; 171 172 ~Core(); 173 174 // The socket that created this object. 175 TCPClientSocketWin* socket_; 176 177 // |reader_| handles the signals from |read_watcher_|. 178 ReadDelegate reader_; 179 // |writer_| handles the signals from |write_watcher_|. 180 WriteDelegate writer_; 181 182 // |read_watcher_| watches for events from Connect() and Read(). 183 base::ObjectWatcher read_watcher_; 184 // |write_watcher_| watches for events from Write(); 185 base::ObjectWatcher write_watcher_; 186 187 // When doing reads from the socket, we try to mirror TCP's slow start. 188 // We do this because otherwise the async IO subsystem artifically delays 189 // returning data to the application. 190 static const int kInitialSlowStartThrottle = 1 * 1024; 191 static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle; 192 int slow_start_throttle_; 193 194 DISALLOW_COPY_AND_ASSIGN(Core); 195}; 196 197TCPClientSocketWin::Core::Core( 198 TCPClientSocketWin* socket) 199 : write_buffer_length_(0), 200 socket_(socket), 201 ALLOW_THIS_IN_INITIALIZER_LIST(reader_(this)), 202 ALLOW_THIS_IN_INITIALIZER_LIST(writer_(this)), 203 slow_start_throttle_(kInitialSlowStartThrottle) { 204 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 205 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 206} 207 208TCPClientSocketWin::Core::~Core() { 209 // Make sure the message loop is not watching this object anymore. 210 read_watcher_.StopWatching(); 211 write_watcher_.StopWatching(); 212 213 WSACloseEvent(read_overlapped_.hEvent); 214 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 215 WSACloseEvent(write_overlapped_.hEvent); 216 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 217} 218 219void TCPClientSocketWin::Core::WatchForRead() { 220 // We grab an extra reference because there is an IO operation in progress. 221 // Balanced in ReadDelegate::OnObjectSignaled(). 222 AddRef(); 223 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_); 224} 225 226void TCPClientSocketWin::Core::WatchForWrite() { 227 // We grab an extra reference because there is an IO operation in progress. 228 // Balanced in WriteDelegate::OnObjectSignaled(). 229 AddRef(); 230 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_); 231} 232 233void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled( 234 HANDLE object) { 235 DCHECK_EQ(object, core_->read_overlapped_.hEvent); 236 if (core_->socket_) { 237 if (core_->socket_->waiting_connect()) { 238 core_->socket_->DidCompleteConnect(); 239 } else { 240 core_->socket_->DidCompleteRead(); 241 } 242 } 243 244 core_->Release(); 245} 246 247void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled( 248 HANDLE object) { 249 DCHECK_EQ(object, core_->write_overlapped_.hEvent); 250 if (core_->socket_) 251 core_->socket_->DidCompleteWrite(); 252 253 core_->Release(); 254} 255 256//----------------------------------------------------------------------------- 257 258TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses, 259 net::NetLog* net_log) 260 : socket_(INVALID_SOCKET), 261 addresses_(addresses), 262 current_ai_(NULL), 263 waiting_read_(false), 264 waiting_write_(false), 265 read_callback_(NULL), 266 write_callback_(NULL), 267 next_connect_state_(CONNECT_STATE_NONE), 268 connect_os_error_(0), 269 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) { 270 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, NULL); 271 EnsureWinsockInit(); 272} 273 274TCPClientSocketWin::~TCPClientSocketWin() { 275 Disconnect(); 276 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE, NULL); 277} 278 279int TCPClientSocketWin::Connect(CompletionCallback* callback) { 280 DCHECK(CalledOnValidThread()); 281 282 // If already connected, then just return OK. 283 if (socket_ != INVALID_SOCKET) 284 return OK; 285 286 static StatsCounter connects("tcp.connect"); 287 connects.Increment(); 288 289 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT, 290 new AddressListNetLogParam(addresses_)); 291 292 // We will try to connect to each address in addresses_. Start with the 293 // first one in the list. 294 next_connect_state_ = CONNECT_STATE_CONNECT; 295 current_ai_ = addresses_.head(); 296 297 int rv = DoConnectLoop(OK); 298 if (rv == ERR_IO_PENDING) { 299 // Synchronous operation not supported. 300 DCHECK(callback); 301 read_callback_ = callback; 302 } else { 303 LogConnectCompletion(rv); 304 } 305 306 return rv; 307} 308 309int TCPClientSocketWin::DoConnectLoop(int result) { 310 DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE); 311 312 int rv = result; 313 do { 314 ConnectState state = next_connect_state_; 315 next_connect_state_ = CONNECT_STATE_NONE; 316 switch (state) { 317 case CONNECT_STATE_CONNECT: 318 DCHECK_EQ(OK, rv); 319 rv = DoConnect(); 320 break; 321 case CONNECT_STATE_CONNECT_COMPLETE: 322 rv = DoConnectComplete(rv); 323 break; 324 default: 325 LOG(DFATAL) << "bad state"; 326 rv = ERR_UNEXPECTED; 327 break; 328 } 329 } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE); 330 331 return rv; 332} 333 334int TCPClientSocketWin::DoConnect() { 335 const struct addrinfo* ai = current_ai_; 336 DCHECK(ai); 337 DCHECK_EQ(0, connect_os_error_); 338 339 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 340 new NetLogStringParameter( 341 "address", NetAddressToStringWithPort(current_ai_))); 342 343 next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE; 344 345 connect_os_error_ = CreateSocket(ai); 346 if (connect_os_error_ != 0) 347 return MapWinsockError(connect_os_error_); 348 349 DCHECK(!core_); 350 core_ = new Core(this); 351 352 // WSACreateEvent creates a manual-reset event object. 353 core_->read_overlapped_.hEvent = WSACreateEvent(); 354 // WSAEventSelect sets the socket to non-blocking mode as a side effect. 355 // Our connect() and recv() calls require that the socket be non-blocking. 356 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT); 357 358 core_->write_overlapped_.hEvent = WSACreateEvent(); 359 360 if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) { 361 // Connected without waiting! 362 // 363 // The MSDN page for connect says: 364 // With a nonblocking socket, the connection attempt cannot be completed 365 // immediately. In this case, connect will return SOCKET_ERROR, and 366 // WSAGetLastError will return WSAEWOULDBLOCK. 367 // which implies that for a nonblocking socket, connect never returns 0. 368 // It's not documented whether the event object will be signaled or not 369 // if connect does return 0. So the code below is essentially dead code 370 // and we don't know if it's correct. 371 NOTREACHED(); 372 373 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) 374 return OK; 375 } else { 376 int os_error = WSAGetLastError(); 377 if (os_error != WSAEWOULDBLOCK) { 378 LOG(ERROR) << "connect failed: " << os_error; 379 connect_os_error_ = os_error; 380 return MapConnectError(os_error); 381 } 382 } 383 384 core_->WatchForRead(); 385 return ERR_IO_PENDING; 386} 387 388int TCPClientSocketWin::DoConnectComplete(int result) { 389 // Log the end of this attempt (and any OS error it threw). 390 int os_error = connect_os_error_; 391 connect_os_error_ = 0; 392 scoped_refptr<NetLog::EventParameters> params; 393 if (result != OK) 394 params = new NetLogIntegerParameter("os_error", os_error); 395 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, params); 396 397 if (result == OK) 398 return OK; // Done! 399 400 // Close whatever partially connected socket we currently have. 401 DoDisconnect(); 402 403 // Try to fall back to the next address in the list. 404 if (current_ai_->ai_next) { 405 next_connect_state_ = CONNECT_STATE_CONNECT; 406 current_ai_ = current_ai_->ai_next; 407 return OK; 408 } 409 410 // Otherwise there is nothing to fall back to, so give up. 411 return result; 412} 413 414void TCPClientSocketWin::Disconnect() { 415 DoDisconnect(); 416 current_ai_ = NULL; 417} 418 419void TCPClientSocketWin::DoDisconnect() { 420 DCHECK(CalledOnValidThread()); 421 422 if (socket_ == INVALID_SOCKET) 423 return; 424 425 // Note: don't use CancelIo to cancel pending IO because it doesn't work 426 // when there is a Winsock layered service provider. 427 428 // In most socket implementations, closing a socket results in a graceful 429 // connection shutdown, but in Winsock we have to call shutdown explicitly. 430 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure" 431 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx 432 shutdown(socket_, SD_SEND); 433 434 // This cancels any pending IO. 435 closesocket(socket_); 436 socket_ = INVALID_SOCKET; 437 438 if (waiting_connect()) { 439 // We closed the socket, so this notification will never come. 440 // From MSDN' WSAEventSelect documentation: 441 // "Closing a socket with closesocket also cancels the association and 442 // selection of network events specified in WSAEventSelect for the socket". 443 core_->Release(); 444 } 445 446 waiting_read_ = false; 447 waiting_write_ = false; 448 449 core_->Detach(); 450 core_ = NULL; 451} 452 453bool TCPClientSocketWin::IsConnected() const { 454 DCHECK(CalledOnValidThread()); 455 456 if (socket_ == INVALID_SOCKET || waiting_connect()) 457 return false; 458 459 // Check if connection is alive. 460 char c; 461 int rv = recv(socket_, &c, 1, MSG_PEEK); 462 if (rv == 0) 463 return false; 464 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) 465 return false; 466 467 return true; 468} 469 470bool TCPClientSocketWin::IsConnectedAndIdle() const { 471 DCHECK(CalledOnValidThread()); 472 473 if (socket_ == INVALID_SOCKET || waiting_connect()) 474 return false; 475 476 // Check if connection is alive and we haven't received any data 477 // unexpectedly. 478 char c; 479 int rv = recv(socket_, &c, 1, MSG_PEEK); 480 if (rv >= 0) 481 return false; 482 if (WSAGetLastError() != WSAEWOULDBLOCK) 483 return false; 484 485 return true; 486} 487 488int TCPClientSocketWin::GetPeerAddress(AddressList* address) const { 489 DCHECK(CalledOnValidThread()); 490 DCHECK(address); 491 if (!current_ai_) 492 return ERR_FAILED; 493 address->Copy(current_ai_, false); 494 return OK; 495} 496 497int TCPClientSocketWin::Read(IOBuffer* buf, 498 int buf_len, 499 CompletionCallback* callback) { 500 DCHECK(CalledOnValidThread()); 501 DCHECK_NE(socket_, INVALID_SOCKET); 502 DCHECK(!waiting_read_); 503 DCHECK(!read_callback_); 504 DCHECK(!core_->read_iobuffer_); 505 506 buf_len = core_->ThrottleReadSize(buf_len); 507 508 core_->read_buffer_.len = buf_len; 509 core_->read_buffer_.buf = buf->data(); 510 511 // TODO(wtc): Remove the CHECK after enough testing. 512 CHECK_EQ(static_cast<DWORD>(WAIT_TIMEOUT), 513 WaitForSingleObject(core_->read_overlapped_.hEvent, 0)); 514 DWORD num, flags = 0; 515 int rv = WSARecv(socket_, &core_->read_buffer_, 1, &num, &flags, 516 &core_->read_overlapped_, NULL); 517 if (rv == 0) { 518 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) { 519 // Because of how WSARecv fills memory when used asynchronously, Purify 520 // isn't able to detect that it's been initialized, so it scans for 0xcd 521 // in the buffer and reports UMRs (uninitialized memory reads) for those 522 // individual bytes. We override that in PURIFY builds to avoid the 523 // false error reports. 524 // See bug 5297. 525 base::MemoryDebug::MarkAsInitialized(core_->read_buffer_.buf, num); 526 static StatsCounter read_bytes("tcp.read_bytes"); 527 read_bytes.Add(num); 528 net_log_.AddEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, 529 new NetLogIntegerParameter("num_bytes", num)); 530 return static_cast<int>(num); 531 } 532 } else { 533 int os_error = WSAGetLastError(); 534 if (os_error != WSA_IO_PENDING) 535 return MapWinsockError(os_error); 536 } 537 core_->WatchForRead(); 538 waiting_read_ = true; 539 read_callback_ = callback; 540 core_->read_iobuffer_ = buf; 541 return ERR_IO_PENDING; 542} 543 544int TCPClientSocketWin::Write(IOBuffer* buf, 545 int buf_len, 546 CompletionCallback* callback) { 547 DCHECK(CalledOnValidThread()); 548 DCHECK_NE(socket_, INVALID_SOCKET); 549 DCHECK(!waiting_write_); 550 DCHECK(!write_callback_); 551 DCHECK_GT(buf_len, 0); 552 DCHECK(!core_->write_iobuffer_); 553 554 static StatsCounter reads("tcp.writes"); 555 reads.Increment(); 556 557 core_->write_buffer_.len = buf_len; 558 core_->write_buffer_.buf = buf->data(); 559 core_->write_buffer_length_ = buf_len; 560 561 // TODO(wtc): Remove the CHECK after enough testing. 562 CHECK_EQ(static_cast<DWORD>(WAIT_TIMEOUT), 563 WaitForSingleObject(core_->write_overlapped_.hEvent, 0)); 564 DWORD num; 565 int rv = WSASend(socket_, &core_->write_buffer_, 1, &num, 0, 566 &core_->write_overlapped_, NULL); 567 if (rv == 0) { 568 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) { 569 rv = static_cast<int>(num); 570 if (rv > buf_len || rv < 0) { 571 // It seems that some winsock interceptors report that more was written 572 // than was available. Treat this as an error. http://crbug.com/27870 573 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len 574 << " bytes, but " << rv << " bytes reported."; 575 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 576 } 577 static StatsCounter write_bytes("tcp.write_bytes"); 578 write_bytes.Add(rv); 579 net_log_.AddEvent(NetLog::TYPE_SOCKET_BYTES_SENT, 580 new NetLogIntegerParameter("num_bytes", rv)); 581 return rv; 582 } 583 } else { 584 int os_error = WSAGetLastError(); 585 if (os_error != WSA_IO_PENDING) 586 return MapWinsockError(os_error); 587 } 588 core_->WatchForWrite(); 589 waiting_write_ = true; 590 write_callback_ = callback; 591 core_->write_iobuffer_ = buf; 592 return ERR_IO_PENDING; 593} 594 595bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) { 596 DCHECK(CalledOnValidThread()); 597 int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF, 598 reinterpret_cast<const char*>(&size), sizeof(size)); 599 DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError(); 600 return rv == 0; 601} 602 603bool TCPClientSocketWin::SetSendBufferSize(int32 size) { 604 DCHECK(CalledOnValidThread()); 605 int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, 606 reinterpret_cast<const char*>(&size), sizeof(size)); 607 DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError(); 608 return rv == 0; 609} 610 611int TCPClientSocketWin::CreateSocket(const struct addrinfo* ai) { 612 socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0, 613 WSA_FLAG_OVERLAPPED); 614 if (socket_ == INVALID_SOCKET) { 615 int os_error = WSAGetLastError(); 616 LOG(ERROR) << "WSASocket failed: " << os_error; 617 return os_error; 618 } 619 620 // Increase the socket buffer sizes from the default sizes for WinXP. In 621 // performance testing, there is substantial benefit by increasing from 8KB 622 // to 64KB. 623 // See also: 624 // http://support.microsoft.com/kb/823764/EN-US 625 // On Vista, if we manually set these sizes, Vista turns off its receive 626 // window auto-tuning feature. 627 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx 628 // Since Vista's auto-tune is better than any static value we can could set, 629 // only change these on pre-vista machines. 630 int32 major_version, minor_version, fix_version; 631 base::SysInfo::OperatingSystemVersionNumbers(&major_version, &minor_version, 632 &fix_version); 633 if (major_version < 6) { 634 const int32 kSocketBufferSize = 64 * 1024; 635 SetReceiveBufferSize(kSocketBufferSize); 636 SetSendBufferSize(kSocketBufferSize); 637 } 638 639 // Disable Nagle. 640 // The Nagle implementation on windows is governed by RFC 896. The idea 641 // behind Nagle is to reduce small packets on the network. When Nagle is 642 // enabled, if a partial packet has been sent, the TCP stack will disallow 643 // further *partial* packets until an ACK has been received from the other 644 // side. Good applications should always strive to send as much data as 645 // possible and avoid partial-packet sends. However, in most real world 646 // applications, there are edge cases where this does not happen, and two 647 // partil packets may be sent back to back. For a browser, it is NEVER 648 // a benefit to delay for an RTT before the second packet is sent. 649 // 650 // As a practical example in Chromium today, consider the case of a small 651 // POST. I have verified this: 652 // Client writes 649 bytes of header (partial packet #1) 653 // Client writes 50 bytes of POST data (partial packet #2) 654 // In the above example, with Nagle, a RTT delay is inserted between these 655 // two sends due to nagle. RTTs can easily be 100ms or more. The best 656 // fix is to make sure that for POSTing data, we write as much data as 657 // possible and minimize partial packets. We will fix that. But disabling 658 // Nagle also ensure we don't run into this delay in other edge cases. 659 // See also: 660 // http://technet.microsoft.com/en-us/library/bb726981.aspx 661 const BOOL kDisableNagle = TRUE; 662 int rv = setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY, 663 reinterpret_cast<const char*>(&kDisableNagle), sizeof(kDisableNagle)); 664 DCHECK(!rv) << "Could not disable nagle"; 665 666 // Disregard any failure in disabling nagle. 667 return 0; 668} 669 670void TCPClientSocketWin::LogConnectCompletion(int net_error) { 671 scoped_refptr<NetLog::EventParameters> params; 672 if (net_error != OK) 673 params = new NetLogIntegerParameter("net_error", net_error); 674 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT, params); 675 if (net_error == OK) 676 UpdateConnectionTypeHistograms(CONNECTION_ANY); 677} 678 679void TCPClientSocketWin::DoReadCallback(int rv) { 680 DCHECK_NE(rv, ERR_IO_PENDING); 681 DCHECK(read_callback_); 682 683 static StatsCounter read_bytes("tcp.read_bytes"); 684 read_bytes.Add(rv); 685 686 // since Run may result in Read being called, clear read_callback_ up front. 687 CompletionCallback* c = read_callback_; 688 read_callback_ = NULL; 689 c->Run(rv); 690} 691 692void TCPClientSocketWin::DoWriteCallback(int rv) { 693 DCHECK_NE(rv, ERR_IO_PENDING); 694 DCHECK(write_callback_); 695 696 static StatsCounter write_bytes("tcp.write_bytes"); 697 write_bytes.Add(rv); 698 699 // since Run may result in Write being called, clear write_callback_ up front. 700 CompletionCallback* c = write_callback_; 701 write_callback_ = NULL; 702 c->Run(rv); 703} 704 705void TCPClientSocketWin::DidCompleteConnect() { 706 DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE); 707 int result; 708 709 WSANETWORKEVENTS events; 710 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 711 &events); 712 int os_error = 0; 713 if (rv == SOCKET_ERROR) { 714 NOTREACHED(); 715 os_error = WSAGetLastError(); 716 result = MapWinsockError(os_error); 717 } else if (events.lNetworkEvents & FD_CONNECT) { 718 os_error = events.iErrorCode[FD_CONNECT_BIT]; 719 result = MapConnectError(os_error); 720 } else { 721 NOTREACHED(); 722 result = ERR_UNEXPECTED; 723 } 724 725 connect_os_error_ = os_error; 726 rv = DoConnectLoop(result); 727 if (rv != ERR_IO_PENDING) { 728 LogConnectCompletion(rv); 729 DoReadCallback(rv); 730 } 731} 732 733void TCPClientSocketWin::DidCompleteRead() { 734 DCHECK(waiting_read_); 735 DWORD num_bytes, flags; 736 BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_, 737 &num_bytes, FALSE, &flags); 738 WSAResetEvent(core_->read_overlapped_.hEvent); 739 waiting_read_ = false; 740 core_->read_iobuffer_ = NULL; 741 if (ok) { 742 net_log_.AddEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, 743 new NetLogIntegerParameter("num_bytes", num_bytes)); 744 } 745 DoReadCallback(ok ? num_bytes : MapWinsockError(WSAGetLastError())); 746} 747 748void TCPClientSocketWin::DidCompleteWrite() { 749 DCHECK(waiting_write_); 750 751 DWORD num_bytes, flags; 752 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_, 753 &num_bytes, FALSE, &flags); 754 WSAResetEvent(core_->write_overlapped_.hEvent); 755 waiting_write_ = false; 756 int rv; 757 if (!ok) { 758 rv = MapWinsockError(WSAGetLastError()); 759 } else { 760 rv = static_cast<int>(num_bytes); 761 if (rv > core_->write_buffer_length_ || rv < 0) { 762 // It seems that some winsock interceptors report that more was written 763 // than was available. Treat this as an error. http://crbug.com/27870 764 LOG(ERROR) << "Detected broken LSP: Asked to write " 765 << core_->write_buffer_length_ << " bytes, but " << rv 766 << " bytes reported."; 767 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 768 } else { 769 net_log_.AddEvent(NetLog::TYPE_SOCKET_BYTES_SENT, 770 new NetLogIntegerParameter("num_bytes", rv)); 771 } 772 } 773 core_->write_iobuffer_ = NULL; 774 DoWriteCallback(rv); 775} 776 777} // namespace net 778