pseudotcp.cc revision dc0f95d653279beabeb9817299e2902918ba123e
1/*
2 * libjingle
3 * Copyright 2004--2005, Google Inc.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 *  1. Redistributions of source code must retain the above copyright notice,
9 *     this list of conditions and the following disclaimer.
10 *  2. Redistributions in binary form must reproduce the above copyright notice,
11 *     this list of conditions and the following disclaimer in the documentation
12 *     and/or other materials provided with the distribution.
13 *  3. The name of the author may not be used to endorse or promote products
14 *     derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "talk/p2p/base/pseudotcp.h"
29
30#include <cstdio>
31#include <cstdlib>
32
33#include "talk/base/basictypes.h"
34#include "talk/base/byteorder.h"
35#include "talk/base/common.h"
36#include "talk/base/logging.h"
37#include "talk/base/socket.h"
38#include "talk/base/stringutils.h"
39#include "talk/base/time.h"
40
41// The following logging is for detailed (packet-level) analysis only.
42#define _DBG_NONE     0
43#define _DBG_NORMAL   1
44#define _DBG_VERBOSE  2
45#define _DEBUGMSG _DBG_NONE
46
47namespace cricket {
48
49//////////////////////////////////////////////////////////////////////
50// Network Constants
51//////////////////////////////////////////////////////////////////////
52
53// Standard MTUs
54const uint16 PACKET_MAXIMUMS[] = {
55  65535,    // Theoretical maximum, Hyperchannel
56  32000,    // Nothing
57  17914,    // 16Mb IBM Token Ring
58  8166,   // IEEE 802.4
59  //4464,   // IEEE 802.5 (4Mb max)
60  4352,   // FDDI
61  //2048,   // Wideband Network
62  2002,   // IEEE 802.5 (4Mb recommended)
63  //1536,   // Expermental Ethernet Networks
64  //1500,   // Ethernet, Point-to-Point (default)
65  1492,   // IEEE 802.3
66  1006,   // SLIP, ARPANET
67  //576,    // X.25 Networks
68  //544,    // DEC IP Portal
69  //512,    // NETBIOS
70  508,    // IEEE 802/Source-Rt Bridge, ARCNET
71  296,    // Point-to-Point (low delay)
72  //68,     // Official minimum
73  0,      // End of list marker
74};
75
76const uint32 MAX_PACKET = 65535;
77// Note: we removed lowest level because packet overhead was larger!
78const uint32 MIN_PACKET = 296;
79
80const uint32 IP_HEADER_SIZE = 20; // (+ up to 40 bytes of options?)
81const uint32 ICMP_HEADER_SIZE = 8;
82const uint32 UDP_HEADER_SIZE = 8;
83// TODO: Make JINGLE_HEADER_SIZE transparent to this code?
84const uint32 JINGLE_HEADER_SIZE = 64; // when relay framing is in use
85
86//////////////////////////////////////////////////////////////////////
87// Global Constants and Functions
88//////////////////////////////////////////////////////////////////////
89//
90//    0                   1                   2                   3
91//    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
92//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
93//  0 |                      Conversation Number                      |
94//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
95//  4 |                        Sequence Number                        |
96//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
97//  8 |                     Acknowledgment Number                     |
98//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
99//    |               |   |U|A|P|R|S|F|                               |
100// 12 |    Control    |   |R|C|S|S|Y|I|            Window             |
101//    |               |   |G|K|H|T|N|N|                               |
102//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
103// 16 |                       Timestamp sending                       |
104//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
105// 20 |                      Timestamp receiving                      |
106//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
107// 24 |                             data                              |
108//    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
109//
110//////////////////////////////////////////////////////////////////////
111
112#define PSEUDO_KEEPALIVE 0
113
114const uint32 MAX_SEQ = 0xFFFFFFFF;
115const uint32 HEADER_SIZE = 24;
116const uint32 PACKET_OVERHEAD = HEADER_SIZE + UDP_HEADER_SIZE + IP_HEADER_SIZE + JINGLE_HEADER_SIZE;
117
118const uint32 MIN_RTO   =   250; // 250 ms (RFC1122, Sec 4.2.3.1 "fractions of a second")
119const uint32 DEF_RTO   =  3000; // 3 seconds (RFC1122, Sec 4.2.3.1)
120const uint32 MAX_RTO   = 60000; // 60 seconds
121const uint32 DEF_ACK_DELAY = 100; // 100 milliseconds
122
123const uint8 FLAG_CTL = 0x02;
124const uint8 FLAG_RST = 0x04;
125
126const uint8 CTL_CONNECT = 0;
127//const uint8 CTL_REDIRECT = 1;
128const uint8 CTL_EXTRA = 255;
129
130/*
131const uint8 FLAG_FIN = 0x01;
132const uint8 FLAG_SYN = 0x02;
133const uint8 FLAG_ACK = 0x10;
134*/
135
136const uint32 CTRL_BOUND = 0x80000000;
137
138const long DEFAULT_TIMEOUT = 4000; // If there are no pending clocks, wake up every 4 seconds
139const long CLOSED_TIMEOUT = 60 * 1000; // If the connection is closed, once per minute
140
141#if PSEUDO_KEEPALIVE
142// !?! Rethink these times
143const uint32 IDLE_PING = 20 * 1000; // 20 seconds (note: WinXP SP2 firewall udp timeout is 90 seconds)
144const uint32 IDLE_TIMEOUT = 90 * 1000; // 90 seconds;
145#endif // PSEUDO_KEEPALIVE
146
147//////////////////////////////////////////////////////////////////////
148// Helper Functions
149//////////////////////////////////////////////////////////////////////
150
151inline void long_to_bytes(uint32 val, void* buf) {
152  *static_cast<uint32*>(buf) = talk_base::HostToNetwork32(val);
153}
154
155inline void short_to_bytes(uint16 val, void* buf) {
156  *static_cast<uint16*>(buf) = talk_base::HostToNetwork16(val);
157}
158
159inline uint32 bytes_to_long(const void* buf) {
160  return talk_base::NetworkToHost32(*static_cast<const uint32*>(buf));
161}
162
163inline uint16 bytes_to_short(const void* buf) {
164  return talk_base::NetworkToHost16(*static_cast<const uint16*>(buf));
165}
166
167uint32 bound(uint32 lower, uint32 middle, uint32 upper) {
168  return talk_base::_min(talk_base::_max(lower, middle), upper);
169}
170
171//////////////////////////////////////////////////////////////////////
172// Debugging Statistics
173//////////////////////////////////////////////////////////////////////
174
175#if 0  // Not used yet
176
177enum Stat {
178  S_SENT_PACKET,   // All packet sends
179  S_RESENT_PACKET, // All packet sends that are retransmits
180  S_RECV_PACKET,   // All packet receives
181  S_RECV_NEW,      // All packet receives that are too new
182  S_RECV_OLD,      // All packet receives that are too old
183  S_NUM_STATS
184};
185
186const char* const STAT_NAMES[S_NUM_STATS] = {
187  "snt",
188  "snt-r",
189  "rcv"
190  "rcv-n",
191  "rcv-o"
192};
193
194int g_stats[S_NUM_STATS];
195inline void Incr(Stat s) { ++g_stats[s]; }
196void ReportStats() {
197  char buffer[256];
198  size_t len = 0;
199  for (int i = 0; i < S_NUM_STATS; ++i) {
200    len += talk_base::sprintfn(buffer, ARRAY_SIZE(buffer), "%s%s:%d",
201                               (i == 0) ? "" : ",", STAT_NAMES[i], g_stats[i]);
202    g_stats[i] = 0;
203  }
204  LOG(LS_INFO) << "Stats[" << buffer << "]";
205}
206
207#endif
208
209//////////////////////////////////////////////////////////////////////
210// PseudoTcp
211//////////////////////////////////////////////////////////////////////
212
213uint32 PseudoTcp::Now() {
214#if 0  // Use this to synchronize timers with logging timestamps (easier debug)
215  return talk_base::TimeSince(StartTime());
216#else
217  return talk_base::Time();
218#endif
219}
220
221PseudoTcp::PseudoTcp(IPseudoTcpNotify* notify, uint32 conv)
222    : m_notify(notify), m_shutdown(SD_NONE), m_error(0) {
223
224  // Sanity check on buffer sizes (needed for OnTcpWriteable notification logic)
225  ASSERT(sizeof(m_rbuf) + MIN_PACKET < sizeof(m_sbuf));
226
227  uint32 now = Now();
228
229  m_state = TCP_LISTEN;
230  m_conv = conv;
231  m_rcv_wnd = sizeof(m_rbuf);
232  m_snd_nxt = m_slen = 0;
233  m_snd_wnd = 1;
234  m_snd_una = m_rcv_nxt = m_rlen = 0;
235  m_bReadEnable = true;
236  m_bWriteEnable = false;
237  m_t_ack = 0;
238
239  m_msslevel = 0;
240  m_largest = 0;
241  ASSERT(MIN_PACKET > PACKET_OVERHEAD);
242  m_mss = MIN_PACKET - PACKET_OVERHEAD;
243  m_mtu_advise = MAX_PACKET;
244
245  m_rto_base = 0;
246
247  m_cwnd = 2 * m_mss;
248  m_ssthresh = sizeof(m_rbuf);
249  m_lastrecv = m_lastsend = m_lasttraffic = now;
250  m_bOutgoing = false;
251
252  m_dup_acks = 0;
253  m_recover = 0;
254
255  m_ts_recent = m_ts_lastack = 0;
256
257  m_rx_rto = DEF_RTO;
258  m_rx_srtt = m_rx_rttvar = 0;
259
260  m_use_nagling = true;
261  m_ack_delay = DEF_ACK_DELAY;
262}
263
264PseudoTcp::~PseudoTcp() {
265}
266
267int PseudoTcp::Connect() {
268  if (m_state != TCP_LISTEN) {
269    m_error = EINVAL;
270    return -1;
271  }
272
273  m_state = TCP_SYN_SENT;
274  LOG(LS_INFO) << "State: TCP_SYN_SENT";
275
276  char buffer[1];
277  buffer[0] = CTL_CONNECT;
278  queue(buffer, 1, true);
279  attemptSend();
280
281  return 0;
282}
283
284void PseudoTcp::NotifyMTU(uint16 mtu) {
285  m_mtu_advise = mtu;
286  if (m_state == TCP_ESTABLISHED) {
287    adjustMTU();
288  }
289}
290
291void PseudoTcp::NotifyClock(uint32 now) {
292  if (m_state == TCP_CLOSED)
293    return;
294
295    // Check if it's time to retransmit a segment
296  if (m_rto_base && (talk_base::TimeDiff(m_rto_base + m_rx_rto, now) <= 0)) {
297    if (m_slist.empty()) {
298      ASSERT(false);
299    } else {
300      // Note: (m_slist.front().xmit == 0)) {
301      // retransmit segments
302#if _DEBUGMSG >= _DBG_NORMAL
303      LOG(LS_INFO) << "timeout retransmit (rto: " << m_rx_rto
304                   << ") (rto_base: " << m_rto_base
305                   << ") (now: " << now
306                   << ") (dup_acks: " << static_cast<unsigned>(m_dup_acks)
307                   << ")";
308#endif // _DEBUGMSG
309      if (!transmit(m_slist.begin(), now)) {
310        closedown(ECONNABORTED);
311        return;
312      }
313
314      uint32 nInFlight = m_snd_nxt - m_snd_una;
315      m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss);
316      //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << "  nInFlight: " << nInFlight << "  m_mss: " << m_mss;
317      m_cwnd = m_mss;
318
319      // Back off retransmit timer.  Note: the limit is lower when connecting.
320      uint32 rto_limit = (m_state < TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
321      m_rx_rto = talk_base::_min(rto_limit, m_rx_rto * 2);
322      m_rto_base = now;
323    }
324  }
325
326  // Check if it's time to probe closed windows
327  if ((m_snd_wnd == 0)
328        && (talk_base::TimeDiff(m_lastsend + m_rx_rto, now) <= 0)) {
329    if (talk_base::TimeDiff(now, m_lastrecv) >= 15000) {
330      closedown(ECONNABORTED);
331      return;
332    }
333
334    // probe the window
335    packet(m_snd_nxt - 1, 0, 0, 0);
336    m_lastsend = now;
337
338    // back off retransmit timer
339    m_rx_rto = talk_base::_min(MAX_RTO, m_rx_rto * 2);
340  }
341
342  // Check if it's time to send delayed acks
343  if (m_t_ack && (talk_base::TimeDiff(m_t_ack + m_ack_delay, now) <= 0)) {
344    packet(m_snd_nxt, 0, 0, 0);
345  }
346
347#if PSEUDO_KEEPALIVE
348  // Check for idle timeout
349  if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lastrecv + IDLE_TIMEOUT, now) <= 0)) {
350    closedown(ECONNABORTED);
351    return;
352  }
353
354  // Check for ping timeout (to keep udp mapping open)
355  if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now) <= 0)) {
356    packet(m_snd_nxt, 0, 0, 0);
357  }
358#endif // PSEUDO_KEEPALIVE
359}
360
361bool PseudoTcp::NotifyPacket(const char* buffer, size_t len) {
362  if (len > MAX_PACKET) {
363    LOG_F(WARNING) << "packet too large";
364    return false;
365  }
366  return parse(reinterpret_cast<const uint8 *>(buffer), uint32(len));
367}
368
369bool PseudoTcp::GetNextClock(uint32 now, long& timeout) {
370  return clock_check(now, timeout);
371}
372
373void PseudoTcp::GetOption(Option opt, int* value) {
374  if (opt == OPT_NODELAY) {
375    *value = m_use_nagling ? 0 : 1;
376  } else if (opt == OPT_ACKDELAY) {
377    *value = m_ack_delay;
378  } else {
379    ASSERT(false);
380  }
381}
382
383void PseudoTcp::SetOption(Option opt, int value) {
384  if (opt == OPT_NODELAY) {
385    m_use_nagling = value == 0;
386  } else if (opt == OPT_ACKDELAY) {
387    m_ack_delay = value;
388  } else {
389    ASSERT(false);
390  }
391}
392
393//
394// IPStream Implementation
395//
396
397int PseudoTcp::Recv(char* buffer, size_t len) {
398  if (m_state != TCP_ESTABLISHED) {
399    m_error = ENOTCONN;
400    return SOCKET_ERROR;
401  }
402
403  if (m_rlen == 0) {
404    m_bReadEnable = true;
405    m_error = EWOULDBLOCK;
406    return SOCKET_ERROR;
407  }
408
409  uint32 read = talk_base::_min(uint32(len), m_rlen);
410  memcpy(buffer, m_rbuf, read);
411  m_rlen -= read;
412
413  // !?! until we create a circular buffer, we need to move all of the rest of the buffer up!
414  memmove(m_rbuf, m_rbuf + read, sizeof(m_rbuf) - read/*m_rlen*/);
415
416  if ((sizeof(m_rbuf) - m_rlen - m_rcv_wnd)
417      >= talk_base::_min<uint32>(sizeof(m_rbuf) / 2, m_mss)) {
418    bool bWasClosed = (m_rcv_wnd == 0); // !?! Not sure about this was closed business
419
420    m_rcv_wnd = sizeof(m_rbuf) - m_rlen;
421
422    if (bWasClosed) {
423      attemptSend(sfImmediateAck);
424    }
425  }
426
427  return read;
428}
429
430int PseudoTcp::Send(const char* buffer, size_t len) {
431  if (m_state != TCP_ESTABLISHED) {
432    m_error = ENOTCONN;
433    return SOCKET_ERROR;
434  }
435
436  if (m_slen == sizeof(m_sbuf)) {
437    m_bWriteEnable = true;
438    m_error = EWOULDBLOCK;
439    return SOCKET_ERROR;
440  }
441
442  int written = queue(buffer, uint32(len), false);
443  attemptSend();
444  return written;
445}
446
447void PseudoTcp::Close(bool force) {
448  LOG_F(LS_VERBOSE) << "(" << (force ? "true" : "false") << ")";
449  m_shutdown = force ? SD_FORCEFUL : SD_GRACEFUL;
450}
451
452int PseudoTcp::GetError() {
453  return m_error;
454}
455
456//
457// Internal Implementation
458//
459
460uint32 PseudoTcp::queue(const char* data, uint32 len, bool bCtrl) {
461  if (len > sizeof(m_sbuf) - m_slen) {
462    ASSERT(!bCtrl);
463    len = sizeof(m_sbuf) - m_slen;
464  }
465
466  // We can concatenate data if the last segment is the same type
467  // (control v. regular data), and has not been transmitted yet
468  if (!m_slist.empty() && (m_slist.back().bCtrl == bCtrl) && (m_slist.back().xmit == 0)) {
469    m_slist.back().len += len;
470  } else {
471    SSegment sseg(m_snd_una + m_slen, len, bCtrl);
472    m_slist.push_back(sseg);
473  }
474
475  memcpy(m_sbuf + m_slen, data, len);
476  m_slen += len;
477  //LOG(LS_INFO) << "PseudoTcp::queue - m_slen = " << m_slen;
478  return len;
479}
480
481IPseudoTcpNotify::WriteResult PseudoTcp::packet(uint32 seq, uint8 flags,
482                                                const char* data, uint32 len) {
483  ASSERT(HEADER_SIZE + len <= MAX_PACKET);
484
485  uint32 now = Now();
486
487  uint8 buffer[MAX_PACKET];
488  long_to_bytes(m_conv, buffer);
489  long_to_bytes(seq, buffer + 4);
490  long_to_bytes(m_rcv_nxt, buffer + 8);
491  buffer[12] = 0;
492  buffer[13] = flags;
493  short_to_bytes(uint16(m_rcv_wnd), buffer + 14);
494
495  // Timestamp computations
496  long_to_bytes(now, buffer + 16);
497  long_to_bytes(m_ts_recent, buffer + 20);
498  m_ts_lastack = m_rcv_nxt;
499
500  memcpy(buffer + HEADER_SIZE, data, len);
501
502#if _DEBUGMSG >= _DBG_VERBOSE
503  LOG(LS_INFO) << "<-- <CONV=" << m_conv
504               << "><FLG=" << static_cast<unsigned>(flags)
505               << "><SEQ=" << seq << ":" << seq + len
506               << "><ACK=" << m_rcv_nxt
507               << "><WND=" << m_rcv_wnd
508               << "><TS="  << (now % 10000)
509               << "><TSR=" << (m_ts_recent % 10000)
510               << "><LEN=" << len << ">";
511#endif // _DEBUGMSG
512
513  IPseudoTcpNotify::WriteResult wres = m_notify->TcpWritePacket(this, reinterpret_cast<char *>(buffer), len + HEADER_SIZE);
514  // Note: When data is NULL, this is an ACK packet.  We don't read the return value for those,
515  // and thus we won't retry.  So go ahead and treat the packet as a success (basically simulate
516  // as if it were dropped), which will prevent our timers from being messed up.
517  if ((wres != IPseudoTcpNotify::WR_SUCCESS) && (NULL != data))
518    return wres;
519
520  m_t_ack = 0;
521  if (len > 0) {
522    m_lastsend = now;
523  }
524  m_lasttraffic = now;
525  m_bOutgoing = true;
526
527  return IPseudoTcpNotify::WR_SUCCESS;
528}
529
530bool PseudoTcp::parse(const uint8* buffer, uint32 size) {
531  if (size < 12)
532    return false;
533
534  Segment seg;
535  seg.conv = bytes_to_long(buffer);
536  seg.seq = bytes_to_long(buffer + 4);
537  seg.ack = bytes_to_long(buffer + 8);
538  seg.flags = buffer[13];
539  seg.wnd = bytes_to_short(buffer + 14);
540
541  seg.tsval = bytes_to_long(buffer + 16);
542  seg.tsecr = bytes_to_long(buffer + 20);
543
544  seg.data = reinterpret_cast<const char *>(buffer) + HEADER_SIZE;
545  seg.len = size - HEADER_SIZE;
546
547#if _DEBUGMSG >= _DBG_VERBOSE
548  LOG(LS_INFO) << "--> <CONV=" << seg.conv
549               << "><FLG=" << static_cast<unsigned>(seg.flags)
550               << "><SEQ=" << seg.seq << ":" << seg.seq + seg.len
551               << "><ACK=" << seg.ack
552               << "><WND=" << seg.wnd
553               << "><TS="  << (seg.tsval % 10000)
554               << "><TSR=" << (seg.tsecr % 10000)
555               << "><LEN=" << seg.len << ">";
556#endif // _DEBUGMSG
557
558  return process(seg);
559}
560
561bool PseudoTcp::clock_check(uint32 now, long& nTimeout) {
562  if (m_shutdown == SD_FORCEFUL)
563    return false;
564
565  if ((m_shutdown == SD_GRACEFUL)
566      && ((m_state != TCP_ESTABLISHED)
567          || ((m_slen == 0) && (m_t_ack == 0)))) {
568    return false;
569  }
570
571  if (m_state == TCP_CLOSED) {
572    nTimeout = CLOSED_TIMEOUT;
573    return true;
574  }
575
576  nTimeout = DEFAULT_TIMEOUT;
577
578  if (m_t_ack) {
579    nTimeout = talk_base::_min<int32>(nTimeout,
580      talk_base::TimeDiff(m_t_ack + m_ack_delay, now));
581  }
582  if (m_rto_base) {
583    nTimeout = talk_base::_min<int32>(nTimeout,
584      talk_base::TimeDiff(m_rto_base + m_rx_rto, now));
585  }
586  if (m_snd_wnd == 0) {
587    nTimeout = talk_base::_min<int32>(nTimeout, talk_base::TimeDiff(m_lastsend + m_rx_rto, now));
588  }
589#if PSEUDO_KEEPALIVE
590  if (m_state == TCP_ESTABLISHED) {
591    nTimeout = talk_base::_min<int32>(nTimeout,
592      talk_base::TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now));
593  }
594#endif // PSEUDO_KEEPALIVE
595  return true;
596}
597
598bool PseudoTcp::process(Segment& seg) {
599  // If this is the wrong conversation, send a reset!?! (with the correct conversation?)
600  if (seg.conv != m_conv) {
601    //if ((seg.flags & FLAG_RST) == 0) {
602    //  packet(tcb, seg.ack, 0, FLAG_RST, 0, 0);
603    //}
604    LOG_F(LS_ERROR) << "wrong conversation";
605    return false;
606  }
607
608  uint32 now = Now();
609  m_lasttraffic = m_lastrecv = now;
610  m_bOutgoing = false;
611
612  if (m_state == TCP_CLOSED) {
613    // !?! send reset?
614    LOG_F(LS_ERROR) << "closed";
615    return false;
616  }
617
618  // Check if this is a reset segment
619  if (seg.flags & FLAG_RST) {
620    closedown(ECONNRESET);
621    return false;
622  }
623
624  // Check for control data
625  bool bConnect = false;
626  if (seg.flags & FLAG_CTL) {
627    if (seg.len == 0) {
628      LOG_F(LS_ERROR) << "Missing control code";
629      return false;
630    } else if (seg.data[0] == CTL_CONNECT) {
631      bConnect = true;
632      if (m_state == TCP_LISTEN) {
633        m_state = TCP_SYN_RECEIVED;
634        LOG(LS_INFO) << "State: TCP_SYN_RECEIVED";
635        //m_notify->associate(addr);
636        char buffer[1];
637        buffer[0] = CTL_CONNECT;
638        queue(buffer, 1, true);
639      } else if (m_state == TCP_SYN_SENT) {
640        m_state = TCP_ESTABLISHED;
641        LOG(LS_INFO) << "State: TCP_ESTABLISHED";
642        adjustMTU();
643        if (m_notify) {
644          m_notify->OnTcpOpen(this);
645        }
646        //notify(evOpen);
647      }
648    } else {
649      LOG_F(LS_WARNING) << "Unknown control code: " << seg.data[0];
650      return false;
651    }
652  }
653
654  // Update timestamp
655  if ((seg.seq <= m_ts_lastack) && (m_ts_lastack < seg.seq + seg.len)) {
656    m_ts_recent = seg.tsval;
657  }
658
659  // Check if this is a valuable ack
660  if ((seg.ack > m_snd_una) && (seg.ack <= m_snd_nxt)) {
661    // Calculate round-trip time
662    if (seg.tsecr) {
663      long rtt = talk_base::TimeDiff(now, seg.tsecr);
664      if (rtt >= 0) {
665        if (m_rx_srtt == 0) {
666          m_rx_srtt = rtt;
667          m_rx_rttvar = rtt / 2;
668        } else {
669          m_rx_rttvar = (3 * m_rx_rttvar + abs(long(rtt - m_rx_srtt))) / 4;
670          m_rx_srtt = (7 * m_rx_srtt + rtt) / 8;
671        }
672        m_rx_rto = bound(MIN_RTO, m_rx_srtt +
673            talk_base::_max<uint32>(1, 4 * m_rx_rttvar), MAX_RTO);
674#if _DEBUGMSG >= _DBG_VERBOSE
675        LOG(LS_INFO) << "rtt: " << rtt
676                     << "  srtt: " << m_rx_srtt
677                     << "  rto: " << m_rx_rto;
678#endif // _DEBUGMSG
679      } else {
680        ASSERT(false);
681      }
682    }
683
684    m_snd_wnd = seg.wnd;
685
686    uint32 nAcked = seg.ack - m_snd_una;
687    m_snd_una = seg.ack;
688
689    m_rto_base = (m_snd_una == m_snd_nxt) ? 0 : now;
690
691    m_slen -= nAcked;
692    memmove(m_sbuf, m_sbuf + nAcked, m_slen);
693    //LOG(LS_INFO) << "PseudoTcp::process - m_slen = " << m_slen;
694
695    for (uint32 nFree = nAcked; nFree > 0; ) {
696      ASSERT(!m_slist.empty());
697      if (nFree < m_slist.front().len) {
698        m_slist.front().len -= nFree;
699        nFree = 0;
700      } else {
701        if (m_slist.front().len > m_largest) {
702          m_largest = m_slist.front().len;
703        }
704        nFree -= m_slist.front().len;
705        m_slist.pop_front();
706      }
707    }
708
709    if (m_dup_acks >= 3) {
710      if (m_snd_una >= m_recover) { // NewReno
711        uint32 nInFlight = m_snd_nxt - m_snd_una;
712        m_cwnd = talk_base::_min(m_ssthresh, nInFlight + m_mss); // (Fast Retransmit)
713#if _DEBUGMSG >= _DBG_NORMAL
714        LOG(LS_INFO) << "exit recovery";
715#endif // _DEBUGMSG
716        m_dup_acks = 0;
717      } else {
718#if _DEBUGMSG >= _DBG_NORMAL
719        LOG(LS_INFO) << "recovery retransmit";
720#endif // _DEBUGMSG
721        if (!transmit(m_slist.begin(), now)) {
722          closedown(ECONNABORTED);
723          return false;
724        }
725        m_cwnd += m_mss - talk_base::_min(nAcked, m_cwnd);
726      }
727    } else {
728      m_dup_acks = 0;
729      // Slow start, congestion avoidance
730      if (m_cwnd < m_ssthresh) {
731        m_cwnd += m_mss;
732      } else {
733        m_cwnd += talk_base::_max<uint32>(1, m_mss * m_mss / m_cwnd);
734      }
735    }
736
737    // !?! A bit hacky
738    if ((m_state == TCP_SYN_RECEIVED) && !bConnect) {
739      m_state = TCP_ESTABLISHED;
740      LOG(LS_INFO) << "State: TCP_ESTABLISHED";
741      adjustMTU();
742      if (m_notify) {
743        m_notify->OnTcpOpen(this);
744      }
745      //notify(evOpen);
746    }
747
748    // If we make room in the send queue, notify the user
749    // The goal it to make sure we always have at least enough data to fill the
750    // window.  We'd like to notify the app when we are halfway to that point.
751    const uint32 kIdealRefillSize = (sizeof(m_sbuf) + sizeof(m_rbuf)) / 2;
752    if (m_bWriteEnable && (m_slen < kIdealRefillSize)) {
753      m_bWriteEnable = false;
754      if (m_notify) {
755        m_notify->OnTcpWriteable(this);
756      }
757      //notify(evWrite);
758    }
759  } else if (seg.ack == m_snd_una) {
760    // !?! Note, tcp says don't do this... but otherwise how does a closed window become open?
761    m_snd_wnd = seg.wnd;
762
763    // Check duplicate acks
764    if (seg.len > 0) {
765      // it's a dup ack, but with a data payload, so don't modify m_dup_acks
766    } else if (m_snd_una != m_snd_nxt) {
767      m_dup_acks += 1;
768      if (m_dup_acks == 3) { // (Fast Retransmit)
769#if _DEBUGMSG >= _DBG_NORMAL
770        LOG(LS_INFO) << "enter recovery";
771        LOG(LS_INFO) << "recovery retransmit";
772#endif // _DEBUGMSG
773        if (!transmit(m_slist.begin(), now)) {
774          closedown(ECONNABORTED);
775          return false;
776        }
777        m_recover = m_snd_nxt;
778        uint32 nInFlight = m_snd_nxt - m_snd_una;
779        m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss);
780        //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << "  nInFlight: " << nInFlight << "  m_mss: " << m_mss;
781        m_cwnd = m_ssthresh + 3 * m_mss;
782      } else if (m_dup_acks > 3) {
783        m_cwnd += m_mss;
784      }
785    } else {
786      m_dup_acks = 0;
787    }
788  }
789
790  // Conditions were acks must be sent:
791  // 1) Segment is too old (they missed an ACK) (immediately)
792  // 2) Segment is too new (we missed a segment) (immediately)
793  // 3) Segment has data (so we need to ACK!) (delayed)
794  // ... so the only time we don't need to ACK, is an empty segment that points to rcv_nxt!
795
796  SendFlags sflags = sfNone;
797  if (seg.seq != m_rcv_nxt) {
798    sflags = sfImmediateAck; // (Fast Recovery)
799  } else if (seg.len != 0) {
800    if (m_ack_delay == 0) {
801      sflags = sfImmediateAck;
802    } else {
803      sflags = sfDelayedAck;
804    }
805  }
806#if _DEBUGMSG >= _DBG_NORMAL
807  if (sflags == sfImmediateAck) {
808    if (seg.seq > m_rcv_nxt) {
809      LOG_F(LS_INFO) << "too new";
810    } else if (seg.seq + seg.len <= m_rcv_nxt) {
811      LOG_F(LS_INFO) << "too old";
812    }
813  }
814#endif // _DEBUGMSG
815
816  // Adjust the incoming segment to fit our receive buffer
817  if (seg.seq < m_rcv_nxt) {
818    uint32 nAdjust = m_rcv_nxt - seg.seq;
819    if (nAdjust < seg.len) {
820      seg.seq += nAdjust;
821      seg.data += nAdjust;
822      seg.len -= nAdjust;
823    } else {
824      seg.len = 0;
825    }
826  }
827  if ((seg.seq + seg.len - m_rcv_nxt) > (sizeof(m_rbuf) - m_rlen)) {
828    uint32 nAdjust = seg.seq + seg.len - m_rcv_nxt - (sizeof(m_rbuf) - m_rlen);
829    if (nAdjust < seg.len) {
830      seg.len -= nAdjust;
831    } else {
832      seg.len = 0;
833    }
834  }
835
836  bool bIgnoreData = (seg.flags & FLAG_CTL) || (m_shutdown != SD_NONE);
837  bool bNewData = false;
838
839  if (seg.len > 0) {
840    if (bIgnoreData) {
841      if (seg.seq == m_rcv_nxt) {
842        m_rcv_nxt += seg.len;
843      }
844    } else {
845      uint32 nOffset = seg.seq - m_rcv_nxt;
846      memcpy(m_rbuf + m_rlen + nOffset, seg.data, seg.len);
847      if (seg.seq == m_rcv_nxt) {
848        m_rlen += seg.len;
849        m_rcv_nxt += seg.len;
850        m_rcv_wnd -= seg.len;
851        bNewData = true;
852
853        RList::iterator it = m_rlist.begin();
854        while ((it != m_rlist.end()) && (it->seq <= m_rcv_nxt)) {
855          if (it->seq + it->len > m_rcv_nxt) {
856            sflags = sfImmediateAck; // (Fast Recovery)
857            uint32 nAdjust = (it->seq + it->len) - m_rcv_nxt;
858#if _DEBUGMSG >= _DBG_NORMAL
859            LOG(LS_INFO) << "Recovered " << nAdjust << " bytes (" << m_rcv_nxt << " -> " << m_rcv_nxt + nAdjust << ")";
860#endif // _DEBUGMSG
861            m_rlen += nAdjust;
862            m_rcv_nxt += nAdjust;
863            m_rcv_wnd -= nAdjust;
864          }
865          it = m_rlist.erase(it);
866        }
867      } else {
868#if _DEBUGMSG >= _DBG_NORMAL
869        LOG(LS_INFO) << "Saving " << seg.len << " bytes (" << seg.seq << " -> " << seg.seq + seg.len << ")";
870#endif // _DEBUGMSG
871        RSegment rseg;
872        rseg.seq = seg.seq;
873        rseg.len = seg.len;
874        RList::iterator it = m_rlist.begin();
875        while ((it != m_rlist.end()) && (it->seq < rseg.seq)) {
876          ++it;
877        }
878        m_rlist.insert(it, rseg);
879      }
880    }
881  }
882
883  attemptSend(sflags);
884
885  // If we have new data, notify the user
886  if (bNewData && m_bReadEnable) {
887    m_bReadEnable = false;
888    if (m_notify) {
889      m_notify->OnTcpReadable(this);
890    }
891    //notify(evRead);
892  }
893
894  return true;
895}
896
897bool PseudoTcp::transmit(const SList::iterator& seg, uint32 now) {
898  if (seg->xmit >= ((m_state == TCP_ESTABLISHED) ? 15 : 30)) {
899    LOG_F(LS_VERBOSE) << "too many retransmits";
900    return false;
901  }
902
903  uint32 nTransmit = talk_base::_min(seg->len, m_mss);
904
905  while (true) {
906    uint32 seq = seg->seq;
907    uint8 flags = (seg->bCtrl ? FLAG_CTL : 0);
908    const char* buffer = m_sbuf + (seg->seq - m_snd_una);
909    IPseudoTcpNotify::WriteResult wres = this->packet(seq, flags, buffer, nTransmit);
910
911    if (wres == IPseudoTcpNotify::WR_SUCCESS)
912      break;
913
914    if (wres == IPseudoTcpNotify::WR_FAIL) {
915      LOG_F(LS_VERBOSE) << "packet failed";
916      return false;
917    }
918
919    ASSERT(wres == IPseudoTcpNotify::WR_TOO_LARGE);
920
921    while (true) {
922      if (PACKET_MAXIMUMS[m_msslevel + 1] == 0) {
923        LOG_F(LS_VERBOSE) << "MTU too small";
924        return false;
925      }
926      // !?! We need to break up all outstanding and pending packets and then retransmit!?!
927
928      m_mss = PACKET_MAXIMUMS[++m_msslevel] - PACKET_OVERHEAD;
929      m_cwnd = 2 * m_mss; // I added this... haven't researched actual formula
930      if (m_mss < nTransmit) {
931        nTransmit = m_mss;
932        break;
933      }
934    }
935#if _DEBUGMSG >= _DBG_NORMAL
936    LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes";
937#endif // _DEBUGMSG
938  }
939
940  if (nTransmit < seg->len) {
941    LOG_F(LS_VERBOSE) << "mss reduced to " << m_mss;
942
943    SSegment subseg(seg->seq + nTransmit, seg->len - nTransmit, seg->bCtrl);
944    //subseg.tstamp = seg->tstamp;
945    subseg.xmit = seg->xmit;
946    seg->len = nTransmit;
947
948    SList::iterator next = seg;
949    m_slist.insert(++next, subseg);
950  }
951
952  if (seg->xmit == 0) {
953    m_snd_nxt += seg->len;
954  }
955  seg->xmit += 1;
956  //seg->tstamp = now;
957  if (m_rto_base == 0) {
958    m_rto_base = now;
959  }
960
961  return true;
962}
963
964void PseudoTcp::attemptSend(SendFlags sflags) {
965  uint32 now = Now();
966
967  if (talk_base::TimeDiff(now, m_lastsend) > static_cast<long>(m_rx_rto)) {
968    m_cwnd = m_mss;
969  }
970
971#if _DEBUGMSG
972  bool bFirst = true;
973  UNUSED(bFirst);
974#endif // _DEBUGMSG
975
976  while (true) {
977    uint32 cwnd = m_cwnd;
978    if ((m_dup_acks == 1) || (m_dup_acks == 2)) { // Limited Transmit
979      cwnd += m_dup_acks * m_mss;
980    }
981    uint32 nWindow = talk_base::_min(m_snd_wnd, cwnd);
982    uint32 nInFlight = m_snd_nxt - m_snd_una;
983    uint32 nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
984
985    uint32 nAvailable = talk_base::_min(m_slen - nInFlight, m_mss);
986
987    if (nAvailable > nUseable) {
988      if (nUseable * 4 < nWindow) {
989        // RFC 813 - avoid SWS
990        nAvailable = 0;
991      } else {
992        nAvailable = nUseable;
993      }
994    }
995
996#if _DEBUGMSG >= _DBG_VERBOSE
997    if (bFirst) {
998      bFirst = false;
999      LOG(LS_INFO) << "[cwnd: " << m_cwnd
1000                   << "  nWindow: " << nWindow
1001                   << "  nInFlight: " << nInFlight
1002                   << "  nAvailable: " << nAvailable
1003                   << "  nQueued: " << m_slen - nInFlight
1004                   << "  nEmpty: " << sizeof(m_sbuf) - m_slen
1005                   << "  ssthresh: " << m_ssthresh << "]";
1006    }
1007#endif // _DEBUGMSG
1008
1009    if (nAvailable == 0) {
1010      if (sflags == sfNone)
1011        return;
1012
1013      // If this is an immediate ack, or the second delayed ack
1014      if ((sflags == sfImmediateAck) || m_t_ack) {
1015        packet(m_snd_nxt, 0, 0, 0);
1016      } else {
1017        m_t_ack = Now();
1018      }
1019      return;
1020    }
1021
1022    // Nagle's algorithm.
1023    // If there is data already in-flight, and we haven't a full segment of
1024    // data ready to send then hold off until we get more to send, or the
1025    // in-flight data is acknowledged.
1026    if (m_use_nagling && (m_snd_nxt > m_snd_una) && (nAvailable < m_mss))  {
1027      return;
1028    }
1029
1030    // Find the next segment to transmit
1031    SList::iterator it = m_slist.begin();
1032    while (it->xmit > 0) {
1033      ++it;
1034      ASSERT(it != m_slist.end());
1035    }
1036    SList::iterator seg = it;
1037
1038    // If the segment is too large, break it into two
1039    if (seg->len > nAvailable) {
1040      SSegment subseg(seg->seq + nAvailable, seg->len - nAvailable, seg->bCtrl);
1041      seg->len = nAvailable;
1042      m_slist.insert(++it, subseg);
1043    }
1044
1045    if (!transmit(seg, now)) {
1046      LOG_F(LS_VERBOSE) << "transmit failed";
1047      // TODO: consider closing socket
1048      return;
1049    }
1050
1051    sflags = sfNone;
1052  }
1053}
1054
1055void
1056PseudoTcp::closedown(uint32 err) {
1057  m_slen = 0;
1058
1059  LOG(LS_INFO) << "State: TCP_CLOSED";
1060  m_state = TCP_CLOSED;
1061  if (m_notify) {
1062    m_notify->OnTcpClosed(this, err);
1063  }
1064  //notify(evClose, err);
1065}
1066
1067void
1068PseudoTcp::adjustMTU() {
1069  // Determine our current mss level, so that we can adjust appropriately later
1070  for (m_msslevel = 0; PACKET_MAXIMUMS[m_msslevel + 1] > 0; ++m_msslevel) {
1071    if (static_cast<uint16>(PACKET_MAXIMUMS[m_msslevel]) <= m_mtu_advise) {
1072      break;
1073    }
1074  }
1075  m_mss = m_mtu_advise - PACKET_OVERHEAD;
1076  // !?! Should we reset m_largest here?
1077#if _DEBUGMSG >= _DBG_NORMAL
1078  LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes";
1079#endif // _DEBUGMSG
1080  // Enforce minimums on ssthresh and cwnd
1081  m_ssthresh = talk_base::_max(m_ssthresh, 2 * m_mss);
1082  m_cwnd = talk_base::_max(m_cwnd, m_mss);
1083}
1084
1085}  // namespace cricket
1086