1#include <string.h> 2#include <stdlib.h> 3#include <stdio.h> 4#include <assert.h> 5#include <errno.h> 6#include <byteswap.h> 7#include <gpxe/timer.h> 8#include <gpxe/iobuf.h> 9#include <gpxe/malloc.h> 10#include <gpxe/retry.h> 11#include <gpxe/refcnt.h> 12#include <gpxe/xfer.h> 13#include <gpxe/open.h> 14#include <gpxe/uri.h> 15#include <gpxe/tcpip.h> 16#include <gpxe/tcp.h> 17 18/** @file 19 * 20 * TCP protocol 21 * 22 */ 23 24FILE_LICENCE ( GPL2_OR_LATER ); 25 26/** A TCP connection */ 27struct tcp_connection { 28 /** Reference counter */ 29 struct refcnt refcnt; 30 /** List of TCP connections */ 31 struct list_head list; 32 33 /** Data transfer interface */ 34 struct xfer_interface xfer; 35 /** Data transfer interface closed flag */ 36 int xfer_closed; 37 38 /** Remote socket address */ 39 struct sockaddr_tcpip peer; 40 /** Local port, in network byte order */ 41 unsigned int local_port; 42 43 /** Current TCP state */ 44 unsigned int tcp_state; 45 /** Previous TCP state 46 * 47 * Maintained only for debug messages 48 */ 49 unsigned int prev_tcp_state; 50 /** Current sequence number 51 * 52 * Equivalent to SND.UNA in RFC 793 terminology. 53 */ 54 uint32_t snd_seq; 55 /** Unacknowledged sequence count 56 * 57 * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology. 58 */ 59 uint32_t snd_sent; 60 /** Send window 61 * 62 * Equivalent to SND.WND in RFC 793 terminology 63 */ 64 uint32_t snd_win; 65 /** Current acknowledgement number 66 * 67 * Equivalent to RCV.NXT in RFC 793 terminology. 68 */ 69 uint32_t rcv_ack; 70 /** Receive window 71 * 72 * Equivalent to RCV.WND in RFC 793 terminology. 73 */ 74 uint32_t rcv_win; 75 /** Most recent received timestamp 76 * 77 * Equivalent to TS.Recent in RFC 1323 terminology. 78 */ 79 uint32_t ts_recent; 80 /** Timestamps enabled */ 81 int timestamps; 82 83 /** Transmit queue */ 84 struct list_head queue; 85 /** Retransmission timer */ 86 struct retry_timer timer; 87}; 88 89/** 90 * List of registered TCP connections 91 */ 92static LIST_HEAD ( tcp_conns ); 93 94/* Forward declarations */ 95static struct xfer_interface_operations tcp_xfer_operations; 96static void tcp_expired ( struct retry_timer *timer, int over ); 97static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, 98 uint32_t win ); 99 100/** 101 * Name TCP state 102 * 103 * @v state TCP state 104 * @ret name Name of TCP state 105 */ 106static inline __attribute__ (( always_inline )) const char * 107tcp_state ( int state ) { 108 switch ( state ) { 109 case TCP_CLOSED: return "CLOSED"; 110 case TCP_LISTEN: return "LISTEN"; 111 case TCP_SYN_SENT: return "SYN_SENT"; 112 case TCP_SYN_RCVD: return "SYN_RCVD"; 113 case TCP_ESTABLISHED: return "ESTABLISHED"; 114 case TCP_FIN_WAIT_1: return "FIN_WAIT_1"; 115 case TCP_FIN_WAIT_2: return "FIN_WAIT_2"; 116 case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK"; 117 case TCP_TIME_WAIT: return "TIME_WAIT"; 118 case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; 119 default: return "INVALID"; 120 } 121} 122 123/** 124 * Dump TCP state transition 125 * 126 * @v tcp TCP connection 127 */ 128static inline __attribute__ (( always_inline )) void 129tcp_dump_state ( struct tcp_connection *tcp ) { 130 131 if ( tcp->tcp_state != tcp->prev_tcp_state ) { 132 DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp, 133 tcp_state ( tcp->prev_tcp_state ), 134 tcp_state ( tcp->tcp_state ) ); 135 } 136 tcp->prev_tcp_state = tcp->tcp_state; 137} 138 139/** 140 * Dump TCP flags 141 * 142 * @v flags TCP flags 143 */ 144static inline __attribute__ (( always_inline )) void 145tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { 146 if ( flags & TCP_RST ) 147 DBGC2 ( tcp, " RST" ); 148 if ( flags & TCP_SYN ) 149 DBGC2 ( tcp, " SYN" ); 150 if ( flags & TCP_PSH ) 151 DBGC2 ( tcp, " PSH" ); 152 if ( flags & TCP_FIN ) 153 DBGC2 ( tcp, " FIN" ); 154 if ( flags & TCP_ACK ) 155 DBGC2 ( tcp, " ACK" ); 156} 157 158/*************************************************************************** 159 * 160 * Open and close 161 * 162 *************************************************************************** 163 */ 164 165/** 166 * Bind TCP connection to local port 167 * 168 * @v tcp TCP connection 169 * @v port Local port number, in network-endian order 170 * @ret rc Return status code 171 * 172 * If the port is 0, the connection is assigned an available port 173 * between 1024 and 65535. 174 */ 175static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) { 176 struct tcp_connection *existing; 177 static uint16_t try_port = 1023; 178 179 /* If no port specified, find the first available port */ 180 if ( ! port ) { 181 while ( try_port ) { 182 try_port++; 183 if ( try_port < 1024 ) 184 continue; 185 if ( tcp_bind ( tcp, htons ( try_port ) ) == 0 ) 186 return 0; 187 } 188 DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp ); 189 return -EADDRINUSE; 190 } 191 192 /* Attempt bind to local port */ 193 list_for_each_entry ( existing, &tcp_conns, list ) { 194 if ( existing->local_port == port ) { 195 DBGC ( tcp, "TCP %p could not bind: port %d in use\n", 196 tcp, ntohs ( port ) ); 197 return -EADDRINUSE; 198 } 199 } 200 tcp->local_port = port; 201 202 DBGC ( tcp, "TCP %p bound to port %d\n", tcp, ntohs ( port ) ); 203 return 0; 204} 205 206/** 207 * Open a TCP connection 208 * 209 * @v xfer Data transfer interface 210 * @v peer Peer socket address 211 * @v local Local socket address, or NULL 212 * @ret rc Return status code 213 */ 214static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer, 215 struct sockaddr *local ) { 216 struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; 217 struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; 218 struct tcp_connection *tcp; 219 unsigned int bind_port; 220 int rc; 221 222 /* Allocate and initialise structure */ 223 tcp = zalloc ( sizeof ( *tcp ) ); 224 if ( ! tcp ) 225 return -ENOMEM; 226 DBGC ( tcp, "TCP %p allocated\n", tcp ); 227 xfer_init ( &tcp->xfer, &tcp_xfer_operations, &tcp->refcnt ); 228 tcp->prev_tcp_state = TCP_CLOSED; 229 tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN ); 230 tcp_dump_state ( tcp ); 231 tcp->snd_seq = random(); 232 INIT_LIST_HEAD ( &tcp->queue ); 233 tcp->timer.expired = tcp_expired; 234 memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) ); 235 236 /* Bind to local port */ 237 bind_port = ( st_local ? st_local->st_port : 0 ); 238 if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 ) 239 goto err; 240 241 /* Start timer to initiate SYN */ 242 start_timer_nodelay ( &tcp->timer ); 243 244 /* Attach parent interface, transfer reference to connection 245 * list and return 246 */ 247 xfer_plug_plug ( &tcp->xfer, xfer ); 248 list_add ( &tcp->list, &tcp_conns ); 249 return 0; 250 251 err: 252 ref_put ( &tcp->refcnt ); 253 return rc; 254} 255 256/** 257 * Close TCP connection 258 * 259 * @v tcp TCP connection 260 * @v rc Reason for close 261 * 262 * Closes the data transfer interface. If the TCP state machine is in 263 * a suitable state, the connection will be deleted. 264 */ 265static void tcp_close ( struct tcp_connection *tcp, int rc ) { 266 struct io_buffer *iobuf; 267 struct io_buffer *tmp; 268 269 /* Close data transfer interface */ 270 xfer_nullify ( &tcp->xfer ); 271 xfer_close ( &tcp->xfer, rc ); 272 tcp->xfer_closed = 1; 273 274 /* If we are in CLOSED, or have otherwise not yet received a 275 * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the 276 * connection. 277 */ 278 if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { 279 280 /* Transition to CLOSED for the sake of debugging messages */ 281 tcp->tcp_state = TCP_CLOSED; 282 tcp_dump_state ( tcp ); 283 284 /* Free any unsent I/O buffers */ 285 list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { 286 list_del ( &iobuf->list ); 287 free_iob ( iobuf ); 288 } 289 290 /* Remove from list and drop reference */ 291 stop_timer ( &tcp->timer ); 292 list_del ( &tcp->list ); 293 ref_put ( &tcp->refcnt ); 294 DBGC ( tcp, "TCP %p connection deleted\n", tcp ); 295 return; 296 } 297 298 /* If we have not had our SYN acknowledged (i.e. we are in 299 * SYN_RCVD), pretend that it has been acknowledged so that we 300 * can send a FIN without breaking things. 301 */ 302 if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) 303 tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 ); 304 305 /* If we have no data remaining to send, start sending FIN */ 306 if ( list_empty ( &tcp->queue ) ) { 307 tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); 308 tcp_dump_state ( tcp ); 309 } 310} 311 312/*************************************************************************** 313 * 314 * Transmit data path 315 * 316 *************************************************************************** 317 */ 318 319/** 320 * Calculate transmission window 321 * 322 * @v tcp TCP connection 323 * @ret len Maximum length that can be sent in a single packet 324 */ 325static size_t tcp_xmit_win ( struct tcp_connection *tcp ) { 326 size_t len; 327 328 /* Not ready if we're not in a suitable connection state */ 329 if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) 330 return 0; 331 332 /* Length is the minimum of the receiver's window and the path MTU */ 333 len = tcp->snd_win; 334 if ( len > TCP_PATH_MTU ) 335 len = TCP_PATH_MTU; 336 337 return len; 338} 339 340/** 341 * Process TCP transmit queue 342 * 343 * @v tcp TCP connection 344 * @v max_len Maximum length to process 345 * @v dest I/O buffer to fill with data, or NULL 346 * @v remove Remove data from queue 347 * @ret len Length of data processed 348 * 349 * This processes at most @c max_len bytes from the TCP connection's 350 * transmit queue. Data will be copied into the @c dest I/O buffer 351 * (if provided) and, if @c remove is true, removed from the transmit 352 * queue. 353 */ 354static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len, 355 struct io_buffer *dest, int remove ) { 356 struct io_buffer *iobuf; 357 struct io_buffer *tmp; 358 size_t frag_len; 359 size_t len = 0; 360 361 list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { 362 frag_len = iob_len ( iobuf ); 363 if ( frag_len > max_len ) 364 frag_len = max_len; 365 if ( dest ) { 366 memcpy ( iob_put ( dest, frag_len ), iobuf->data, 367 frag_len ); 368 } 369 if ( remove ) { 370 iob_pull ( iobuf, frag_len ); 371 if ( ! iob_len ( iobuf ) ) { 372 list_del ( &iobuf->list ); 373 free_iob ( iobuf ); 374 } 375 } 376 len += frag_len; 377 max_len -= frag_len; 378 } 379 return len; 380} 381 382/** 383 * Transmit any outstanding data 384 * 385 * @v tcp TCP connection 386 * @v force_send Force sending of packet 387 * 388 * Transmits any outstanding data on the connection. 389 * 390 * Note that even if an error is returned, the retransmission timer 391 * will have been started if necessary, and so the stack will 392 * eventually attempt to retransmit the failed packet. 393 */ 394static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { 395 struct io_buffer *iobuf; 396 struct tcp_header *tcphdr; 397 struct tcp_mss_option *mssopt; 398 struct tcp_timestamp_padded_option *tsopt; 399 void *payload; 400 unsigned int flags; 401 size_t len = 0; 402 uint32_t seq_len; 403 uint32_t app_win; 404 uint32_t max_rcv_win; 405 int rc; 406 407 /* If retransmission timer is already running, do nothing */ 408 if ( timer_running ( &tcp->timer ) ) 409 return 0; 410 411 /* Calculate both the actual (payload) and sequence space 412 * lengths that we wish to transmit. 413 */ 414 if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) { 415 len = tcp_process_queue ( tcp, tcp_xmit_win ( tcp ), 416 NULL, 0 ); 417 } 418 seq_len = len; 419 flags = TCP_FLAGS_SENDING ( tcp->tcp_state ); 420 if ( flags & ( TCP_SYN | TCP_FIN ) ) { 421 /* SYN or FIN consume one byte, and we can never send both */ 422 assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) ); 423 seq_len++; 424 } 425 tcp->snd_sent = seq_len; 426 427 /* If we have nothing to transmit, stop now */ 428 if ( ( seq_len == 0 ) && ! force_send ) 429 return 0; 430 431 /* If we are transmitting anything that requires 432 * acknowledgement (i.e. consumes sequence space), start the 433 * retransmission timer. Do this before attempting to 434 * allocate the I/O buffer, in case allocation itself fails. 435 */ 436 if ( seq_len ) 437 start_timer ( &tcp->timer ); 438 439 /* Allocate I/O buffer */ 440 iobuf = alloc_iob ( len + MAX_HDR_LEN ); 441 if ( ! iobuf ) { 442 DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " 443 "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), 444 tcp->rcv_ack ); 445 return -ENOMEM; 446 } 447 iob_reserve ( iobuf, MAX_HDR_LEN ); 448 449 /* Fill data payload from transmit queue */ 450 tcp_process_queue ( tcp, len, iobuf, 0 ); 451 452 /* Expand receive window if possible */ 453 max_rcv_win = ( ( freemem * 3 ) / 4 ); 454 if ( max_rcv_win > TCP_MAX_WINDOW_SIZE ) 455 max_rcv_win = TCP_MAX_WINDOW_SIZE; 456 app_win = xfer_window ( &tcp->xfer ); 457 if ( max_rcv_win > app_win ) 458 max_rcv_win = app_win; 459 max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ 460 if ( tcp->rcv_win < max_rcv_win ) 461 tcp->rcv_win = max_rcv_win; 462 463 /* Fill up the TCP header */ 464 payload = iobuf->data; 465 if ( flags & TCP_SYN ) { 466 mssopt = iob_push ( iobuf, sizeof ( *mssopt ) ); 467 mssopt->kind = TCP_OPTION_MSS; 468 mssopt->length = sizeof ( *mssopt ); 469 mssopt->mss = htons ( TCP_MSS ); 470 } 471 if ( ( flags & TCP_SYN ) || tcp->timestamps ) { 472 tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); 473 memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) ); 474 tsopt->tsopt.kind = TCP_OPTION_TS; 475 tsopt->tsopt.length = sizeof ( tsopt->tsopt ); 476 tsopt->tsopt.tsval = ntohl ( currticks() ); 477 tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent ); 478 } 479 if ( ! ( flags & TCP_SYN ) ) 480 flags |= TCP_PSH; 481 tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); 482 memset ( tcphdr, 0, sizeof ( *tcphdr ) ); 483 tcphdr->src = tcp->local_port; 484 tcphdr->dest = tcp->peer.st_port; 485 tcphdr->seq = htonl ( tcp->snd_seq ); 486 tcphdr->ack = htonl ( tcp->rcv_ack ); 487 tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); 488 tcphdr->flags = flags; 489 tcphdr->win = htons ( tcp->rcv_win ); 490 tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); 491 492 /* Dump header */ 493 DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", 494 tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), 495 ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), 496 ntohl ( tcphdr->ack ), len ); 497 tcp_dump_flags ( tcp, tcphdr->flags ); 498 DBGC2 ( tcp, "\n" ); 499 500 /* Transmit packet */ 501 if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, 502 &tcphdr->csum ) ) != 0 ) { 503 DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", 504 tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), 505 tcp->rcv_ack, strerror ( rc ) ); 506 return rc; 507 } 508 509 return 0; 510} 511 512/** 513 * Retransmission timer expired 514 * 515 * @v timer Retry timer 516 * @v over Failure indicator 517 */ 518static void tcp_expired ( struct retry_timer *timer, int over ) { 519 struct tcp_connection *tcp = 520 container_of ( timer, struct tcp_connection, timer ); 521 int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ); 522 523 DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, 524 ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), 525 tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); 526 527 assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || 528 ( tcp->tcp_state == TCP_SYN_RCVD ) || 529 ( tcp->tcp_state == TCP_ESTABLISHED ) || 530 ( tcp->tcp_state == TCP_FIN_WAIT_1 ) || 531 ( tcp->tcp_state == TCP_TIME_WAIT ) || 532 ( tcp->tcp_state == TCP_CLOSE_WAIT ) || 533 ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) ); 534 535 if ( over || graceful_close ) { 536 /* If we have finally timed out and given up, or if 537 * this is the result of a graceful close, terminate 538 * the connection 539 */ 540 tcp->tcp_state = TCP_CLOSED; 541 tcp_dump_state ( tcp ); 542 tcp_close ( tcp, -ETIMEDOUT ); 543 } else { 544 /* Otherwise, retransmit the packet */ 545 tcp_xmit ( tcp, 0 ); 546 } 547} 548 549/** 550 * Send RST response to incoming packet 551 * 552 * @v in_tcphdr TCP header of incoming packet 553 * @ret rc Return status code 554 */ 555static int tcp_xmit_reset ( struct tcp_connection *tcp, 556 struct sockaddr_tcpip *st_dest, 557 struct tcp_header *in_tcphdr ) { 558 struct io_buffer *iobuf; 559 struct tcp_header *tcphdr; 560 int rc; 561 562 /* Allocate space for dataless TX buffer */ 563 iobuf = alloc_iob ( MAX_HDR_LEN ); 564 if ( ! iobuf ) { 565 DBGC ( tcp, "TCP %p could not allocate iobuf for RST " 566 "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), 567 ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); 568 return -ENOMEM; 569 } 570 iob_reserve ( iobuf, MAX_HDR_LEN ); 571 572 /* Construct RST response */ 573 tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); 574 memset ( tcphdr, 0, sizeof ( *tcphdr ) ); 575 tcphdr->src = in_tcphdr->dest; 576 tcphdr->dest = in_tcphdr->src; 577 tcphdr->seq = in_tcphdr->ack; 578 tcphdr->ack = in_tcphdr->seq; 579 tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 ); 580 tcphdr->flags = ( TCP_RST | TCP_ACK ); 581 tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE ); 582 tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); 583 584 /* Dump header */ 585 DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", 586 tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), 587 ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), 588 ntohl ( tcphdr->ack ), 0 ); 589 tcp_dump_flags ( tcp, tcphdr->flags ); 590 DBGC2 ( tcp, "\n" ); 591 592 /* Transmit packet */ 593 if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, 594 NULL, &tcphdr->csum ) ) != 0 ) { 595 DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " 596 "%s\n", tcp, ntohl ( in_tcphdr->ack ), 597 ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), 598 strerror ( rc ) ); 599 return rc; 600 } 601 602 return 0; 603} 604 605/*************************************************************************** 606 * 607 * Receive data path 608 * 609 *************************************************************************** 610 */ 611 612/** 613 * Identify TCP connection by local port number 614 * 615 * @v local_port Local port (in network-endian order) 616 * @ret tcp TCP connection, or NULL 617 */ 618static struct tcp_connection * tcp_demux ( unsigned int local_port ) { 619 struct tcp_connection *tcp; 620 621 list_for_each_entry ( tcp, &tcp_conns, list ) { 622 if ( tcp->local_port == local_port ) 623 return tcp; 624 } 625 return NULL; 626} 627 628/** 629 * Parse TCP received options 630 * 631 * @v tcp TCP connection 632 * @v data Raw options data 633 * @v len Raw options length 634 * @v options Options structure to fill in 635 */ 636static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, 637 size_t len, struct tcp_options *options ) { 638 const void *end = ( data + len ); 639 const struct tcp_option *option; 640 unsigned int kind; 641 642 memset ( options, 0, sizeof ( *options ) ); 643 while ( data < end ) { 644 option = data; 645 kind = option->kind; 646 if ( kind == TCP_OPTION_END ) 647 return; 648 if ( kind == TCP_OPTION_NOP ) { 649 data++; 650 continue; 651 } 652 switch ( kind ) { 653 case TCP_OPTION_MSS: 654 options->mssopt = data; 655 break; 656 case TCP_OPTION_TS: 657 options->tsopt = data; 658 break; 659 default: 660 DBGC ( tcp, "TCP %p received unknown option %d\n", 661 tcp, kind ); 662 break; 663 } 664 data += option->length; 665 } 666} 667 668/** 669 * Consume received sequence space 670 * 671 * @v tcp TCP connection 672 * @v seq_len Sequence space length to consume 673 */ 674static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { 675 tcp->rcv_ack += seq_len; 676 if ( tcp->rcv_win > seq_len ) { 677 tcp->rcv_win -= seq_len; 678 } else { 679 tcp->rcv_win = 0; 680 } 681} 682 683/** 684 * Handle TCP received SYN 685 * 686 * @v tcp TCP connection 687 * @v seq SEQ value (in host-endian order) 688 * @v options TCP options 689 * @ret rc Return status code 690 */ 691static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, 692 struct tcp_options *options ) { 693 694 /* Synchronise sequence numbers on first SYN */ 695 if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { 696 tcp->rcv_ack = seq; 697 if ( options->tsopt ) 698 tcp->timestamps = 1; 699 } 700 701 /* Ignore duplicate SYN */ 702 if ( ( tcp->rcv_ack - seq ) > 0 ) 703 return 0; 704 705 /* Mark SYN as received and start sending ACKs with each packet */ 706 tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) | 707 TCP_STATE_RCVD ( TCP_SYN ) ); 708 709 /* Acknowledge SYN */ 710 tcp_rx_seq ( tcp, 1 ); 711 712 return 0; 713} 714 715/** 716 * Handle TCP received ACK 717 * 718 * @v tcp TCP connection 719 * @v ack ACK value (in host-endian order) 720 * @v win WIN value (in host-endian order) 721 * @ret rc Return status code 722 */ 723static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, 724 uint32_t win ) { 725 uint32_t ack_len = ( ack - tcp->snd_seq ); 726 size_t len; 727 unsigned int acked_flags; 728 729 /* Check for out-of-range or old duplicate ACKs */ 730 if ( ack_len > tcp->snd_sent ) { 731 DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " 732 "sent only %08x..%08x\n", tcp, tcp->snd_seq, 733 ( tcp->snd_seq + ack_len ), tcp->snd_seq, 734 ( tcp->snd_seq + tcp->snd_sent ) ); 735 736 if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { 737 /* Just ignore what might be old duplicate ACKs */ 738 return 0; 739 } else { 740 /* Send RST if an out-of-range ACK is received 741 * on a not-yet-established connection, as per 742 * RFC 793. 743 */ 744 return -EINVAL; 745 } 746 } 747 748 /* Ignore ACKs that don't actually acknowledge any new data. 749 * (In particular, do not stop the retransmission timer; this 750 * avoids creating a sorceror's apprentice syndrome when a 751 * duplicate ACK is received and we still have data in our 752 * transmit queue.) 753 */ 754 if ( ack_len == 0 ) 755 return 0; 756 757 /* Stop the retransmission timer */ 758 stop_timer ( &tcp->timer ); 759 760 /* Determine acknowledged flags and data length */ 761 len = ack_len; 762 acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & 763 ( TCP_SYN | TCP_FIN ) ); 764 if ( acked_flags ) 765 len--; 766 767 /* Update SEQ and sent counters, and window size */ 768 tcp->snd_seq = ack; 769 tcp->snd_sent = 0; 770 tcp->snd_win = win; 771 772 /* Remove any acknowledged data from transmit queue */ 773 tcp_process_queue ( tcp, len, NULL, 1 ); 774 775 /* Mark SYN/FIN as acknowledged if applicable. */ 776 if ( acked_flags ) 777 tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags ); 778 779 /* Start sending FIN if we've had all possible data ACKed */ 780 if ( list_empty ( &tcp->queue ) && tcp->xfer_closed ) 781 tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); 782 783 return 0; 784} 785 786/** 787 * Handle TCP received data 788 * 789 * @v tcp TCP connection 790 * @v seq SEQ value (in host-endian order) 791 * @v iobuf I/O buffer 792 * @ret rc Return status code 793 * 794 * This function takes ownership of the I/O buffer. 795 */ 796static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, 797 struct io_buffer *iobuf ) { 798 uint32_t already_rcvd; 799 uint32_t len; 800 int rc; 801 802 /* Ignore duplicate or out-of-order data */ 803 already_rcvd = ( tcp->rcv_ack - seq ); 804 len = iob_len ( iobuf ); 805 if ( already_rcvd >= len ) { 806 free_iob ( iobuf ); 807 return 0; 808 } 809 iob_pull ( iobuf, already_rcvd ); 810 len -= already_rcvd; 811 812 /* Deliver data to application */ 813 if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { 814 DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", 815 tcp, seq, ( seq + len ), strerror ( rc ) ); 816 return rc; 817 } 818 819 /* Acknowledge new data */ 820 tcp_rx_seq ( tcp, len ); 821 822 return 0; 823} 824 825/** 826 * Handle TCP received FIN 827 * 828 * @v tcp TCP connection 829 * @v seq SEQ value (in host-endian order) 830 * @ret rc Return status code 831 */ 832static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) { 833 834 /* Ignore duplicate or out-of-order FIN */ 835 if ( ( tcp->rcv_ack - seq ) > 0 ) 836 return 0; 837 838 /* Mark FIN as received and acknowledge it */ 839 tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN ); 840 tcp_rx_seq ( tcp, 1 ); 841 842 /* Close connection */ 843 tcp_close ( tcp, 0 ); 844 845 return 0; 846} 847 848/** 849 * Handle TCP received RST 850 * 851 * @v tcp TCP connection 852 * @v seq SEQ value (in host-endian order) 853 * @ret rc Return status code 854 */ 855static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { 856 857 /* Accept RST only if it falls within the window. If we have 858 * not yet received a SYN, then we have no window to test 859 * against, so fall back to checking that our SYN has been 860 * ACKed. 861 */ 862 if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) { 863 if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win ) 864 return 0; 865 } else { 866 if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) 867 return 0; 868 } 869 870 /* Abort connection */ 871 tcp->tcp_state = TCP_CLOSED; 872 tcp_dump_state ( tcp ); 873 tcp_close ( tcp, -ECONNRESET ); 874 875 DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); 876 return -ECONNRESET; 877} 878 879/** 880 * Process received packet 881 * 882 * @v iobuf I/O buffer 883 * @v st_src Partially-filled source address 884 * @v st_dest Partially-filled destination address 885 * @v pshdr_csum Pseudo-header checksum 886 * @ret rc Return status code 887 */ 888static int tcp_rx ( struct io_buffer *iobuf, 889 struct sockaddr_tcpip *st_src, 890 struct sockaddr_tcpip *st_dest __unused, 891 uint16_t pshdr_csum ) { 892 struct tcp_header *tcphdr = iobuf->data; 893 struct tcp_connection *tcp; 894 struct tcp_options options; 895 size_t hlen; 896 uint16_t csum; 897 uint32_t start_seq; 898 uint32_t seq; 899 uint32_t ack; 900 uint32_t win; 901 unsigned int flags; 902 size_t len; 903 int rc; 904 905 /* Sanity check packet */ 906 if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) { 907 DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n", 908 iob_len ( iobuf ), sizeof ( *tcphdr ) ); 909 rc = -EINVAL; 910 goto discard; 911 } 912 hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4; 913 if ( hlen < sizeof ( *tcphdr ) ) { 914 DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n", 915 hlen, sizeof ( *tcphdr ) ); 916 rc = -EINVAL; 917 goto discard; 918 } 919 if ( hlen > iob_len ( iobuf ) ) { 920 DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n", 921 hlen, iob_len ( iobuf ) ); 922 rc = -EINVAL; 923 goto discard; 924 } 925 csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, 926 iob_len ( iobuf ) ); 927 if ( csum != 0 ) { 928 DBG ( "TCP checksum incorrect (is %04x including checksum " 929 "field, should be 0000)\n", csum ); 930 rc = -EINVAL; 931 goto discard; 932 } 933 934 /* Parse parameters from header and strip header */ 935 tcp = tcp_demux ( tcphdr->dest ); 936 start_seq = seq = ntohl ( tcphdr->seq ); 937 ack = ntohl ( tcphdr->ack ); 938 win = ntohs ( tcphdr->win ); 939 flags = tcphdr->flags; 940 tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), 941 ( hlen - sizeof ( *tcphdr ) ), &options ); 942 iob_pull ( iobuf, hlen ); 943 len = iob_len ( iobuf ); 944 945 /* Dump header */ 946 DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08zx %4zd", 947 tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), 948 ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), 949 ( ntohl ( tcphdr->seq ) + len + 950 ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len); 951 tcp_dump_flags ( tcp, tcphdr->flags ); 952 DBGC2 ( tcp, "\n" ); 953 954 /* If no connection was found, send RST */ 955 if ( ! tcp ) { 956 tcp_xmit_reset ( tcp, st_src, tcphdr ); 957 rc = -ENOTCONN; 958 goto discard; 959 } 960 961 /* Handle ACK, if present */ 962 if ( flags & TCP_ACK ) { 963 if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) { 964 tcp_xmit_reset ( tcp, st_src, tcphdr ); 965 goto discard; 966 } 967 } 968 969 /* Handle SYN, if present */ 970 if ( flags & TCP_SYN ) { 971 tcp_rx_syn ( tcp, seq, &options ); 972 seq++; 973 } 974 975 /* Handle RST, if present */ 976 if ( flags & TCP_RST ) { 977 if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 ) 978 goto discard; 979 } 980 981 /* Handle new data, if any */ 982 tcp_rx_data ( tcp, seq, iobuf ); 983 seq += len; 984 985 /* Handle FIN, if present */ 986 if ( flags & TCP_FIN ) { 987 tcp_rx_fin ( tcp, seq ); 988 seq++; 989 } 990 991 /* Update timestamp, if present and applicable */ 992 if ( ( seq == tcp->rcv_ack ) && options.tsopt ) 993 tcp->ts_recent = ntohl ( options.tsopt->tsval ); 994 995 /* Dump out any state change as a result of the received packet */ 996 tcp_dump_state ( tcp ); 997 998 /* Send out any pending data. We force sending a reply if either 999 * 1000 * a) the peer is expecting an ACK (i.e. consumed sequence space), or 1001 * b) either end of the packet was outside the receive window 1002 * 1003 * Case (b) enables us to support TCP keepalives using 1004 * zero-length packets, which we would otherwise ignore. Note 1005 * that for case (b), we need *only* consider zero-length 1006 * packets, since non-zero-length packets will already be 1007 * caught by case (a). 1008 */ 1009 tcp_xmit ( tcp, ( ( start_seq != seq ) || 1010 ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) ); 1011 1012 /* If this packet was the last we expect to receive, set up 1013 * timer to expire and cause the connection to be freed. 1014 */ 1015 if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) { 1016 tcp->timer.timeout = ( 2 * TCP_MSL ); 1017 start_timer ( &tcp->timer ); 1018 } 1019 1020 return 0; 1021 1022 discard: 1023 /* Free received packet */ 1024 free_iob ( iobuf ); 1025 return rc; 1026} 1027 1028/** TCP protocol */ 1029struct tcpip_protocol tcp_protocol __tcpip_protocol = { 1030 .name = "TCP", 1031 .rx = tcp_rx, 1032 .tcpip_proto = IP_TCP, 1033}; 1034 1035/*************************************************************************** 1036 * 1037 * Data transfer interface 1038 * 1039 *************************************************************************** 1040 */ 1041 1042/** 1043 * Close interface 1044 * 1045 * @v xfer Data transfer interface 1046 * @v rc Reason for close 1047 */ 1048static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) { 1049 struct tcp_connection *tcp = 1050 container_of ( xfer, struct tcp_connection, xfer ); 1051 1052 /* Close data transfer interface */ 1053 tcp_close ( tcp, rc ); 1054 1055 /* Transmit FIN, if possible */ 1056 tcp_xmit ( tcp, 0 ); 1057} 1058 1059/** 1060 * Check flow control window 1061 * 1062 * @v xfer Data transfer interface 1063 * @ret len Length of window 1064 */ 1065static size_t tcp_xfer_window ( struct xfer_interface *xfer ) { 1066 struct tcp_connection *tcp = 1067 container_of ( xfer, struct tcp_connection, xfer ); 1068 1069 /* Not ready if data queue is non-empty. This imposes a limit 1070 * of only one unACKed packet in the TX queue at any time; we 1071 * do this to conserve memory usage. 1072 */ 1073 if ( ! list_empty ( &tcp->queue ) ) 1074 return 0; 1075 1076 /* Return TCP window length */ 1077 return tcp_xmit_win ( tcp ); 1078} 1079 1080/** 1081 * Deliver datagram as I/O buffer 1082 * 1083 * @v xfer Data transfer interface 1084 * @v iobuf Datagram I/O buffer 1085 * @v meta Data transfer metadata 1086 * @ret rc Return status code 1087 */ 1088static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer, 1089 struct io_buffer *iobuf, 1090 struct xfer_metadata *meta __unused ) { 1091 struct tcp_connection *tcp = 1092 container_of ( xfer, struct tcp_connection, xfer ); 1093 1094 /* Enqueue packet */ 1095 list_add_tail ( &iobuf->list, &tcp->queue ); 1096 1097 /* Transmit data, if possible */ 1098 tcp_xmit ( tcp, 0 ); 1099 1100 return 0; 1101} 1102 1103/** TCP data transfer interface operations */ 1104static struct xfer_interface_operations tcp_xfer_operations = { 1105 .close = tcp_xfer_close, 1106 .vredirect = ignore_xfer_vredirect, 1107 .window = tcp_xfer_window, 1108 .alloc_iob = default_xfer_alloc_iob, 1109 .deliver_iob = tcp_xfer_deliver_iob, 1110 .deliver_raw = xfer_deliver_as_iob, 1111}; 1112 1113/*************************************************************************** 1114 * 1115 * Openers 1116 * 1117 *************************************************************************** 1118 */ 1119 1120/** TCP socket opener */ 1121struct socket_opener tcp_socket_opener __socket_opener = { 1122 .semantics = TCP_SOCK_STREAM, 1123 .family = AF_INET, 1124 .open = tcp_open, 1125}; 1126 1127/** Linkage hack */ 1128int tcp_sock_stream = TCP_SOCK_STREAM; 1129 1130/** 1131 * Open TCP URI 1132 * 1133 * @v xfer Data transfer interface 1134 * @v uri URI 1135 * @ret rc Return status code 1136 */ 1137static int tcp_open_uri ( struct xfer_interface *xfer, struct uri *uri ) { 1138 struct sockaddr_tcpip peer; 1139 1140 /* Sanity check */ 1141 if ( ! uri->host ) 1142 return -EINVAL; 1143 1144 memset ( &peer, 0, sizeof ( peer ) ); 1145 peer.st_port = htons ( uri_port ( uri, 0 ) ); 1146 return xfer_open_named_socket ( xfer, SOCK_STREAM, 1147 ( struct sockaddr * ) &peer, 1148 uri->host, NULL ); 1149} 1150 1151/** TCP URI opener */ 1152struct uri_opener tcp_uri_opener __uri_opener = { 1153 .scheme = "tcp", 1154 .open = tcp_open_uri, 1155}; 1156 1157