1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/types.h>
12#include <linux/timer.h>
13#include <linux/module.h>
14#include <linux/in.h>
15#include <linux/tcp.h>
16#include <linux/spinlock.h>
17#include <linux/skbuff.h>
18#include <linux/ipv6.h>
19#include <net/ip6_checksum.h>
20#include <asm/unaligned.h>
21
22#include <net/tcp.h>
23
24#include <linux/netfilter.h>
25#include <linux/netfilter_ipv4.h>
26#include <linux/netfilter_ipv6.h>
27#include <net/netfilter/nf_conntrack.h>
28#include <net/netfilter/nf_conntrack_l4proto.h>
29#include <net/netfilter/nf_conntrack_ecache.h>
30#include <net/netfilter/nf_conntrack_seqadj.h>
31#include <net/netfilter/nf_conntrack_synproxy.h>
32#include <net/netfilter/nf_log.h>
33#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35
36/* "Be conservative in what you do,
37    be liberal in what you accept from others."
38    If it's non-zero, we mark only out of window RST segments as INVALID. */
39static int nf_ct_tcp_be_liberal __read_mostly = 0;
40
41/* If it is set to zero, we disable picking up already established
42   connections. */
43static int nf_ct_tcp_loose __read_mostly = 1;
44
45/* Max number of the retransmitted packets without receiving an (acceptable)
46   ACK from the destination. If this number is reached, a shorter timer
47   will be started. */
48static int nf_ct_tcp_max_retrans __read_mostly = 3;
49
50  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
51     closely.  They're more complex. --RR */
52
53static const char *const tcp_conntrack_names[] = {
54	"NONE",
55	"SYN_SENT",
56	"SYN_RECV",
57	"ESTABLISHED",
58	"FIN_WAIT",
59	"CLOSE_WAIT",
60	"LAST_ACK",
61	"TIME_WAIT",
62	"CLOSE",
63	"SYN_SENT2",
64};
65
66#define SECS * HZ
67#define MINS * 60 SECS
68#define HOURS * 60 MINS
69#define DAYS * 24 HOURS
70
71static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
72	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
73	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
74	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
75	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
76	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
77	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
78	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
79	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
80	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
81/* RFC1122 says the R2 limit should be at least 100 seconds.
82   Linux uses 15 packets as limit, which corresponds
83   to ~13-30min depending on RTO. */
84	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
85	[TCP_CONNTRACK_UNACK]		= 5 MINS,
86};
87
88#define sNO TCP_CONNTRACK_NONE
89#define sSS TCP_CONNTRACK_SYN_SENT
90#define sSR TCP_CONNTRACK_SYN_RECV
91#define sES TCP_CONNTRACK_ESTABLISHED
92#define sFW TCP_CONNTRACK_FIN_WAIT
93#define sCW TCP_CONNTRACK_CLOSE_WAIT
94#define sLA TCP_CONNTRACK_LAST_ACK
95#define sTW TCP_CONNTRACK_TIME_WAIT
96#define sCL TCP_CONNTRACK_CLOSE
97#define sS2 TCP_CONNTRACK_SYN_SENT2
98#define sIV TCP_CONNTRACK_MAX
99#define sIG TCP_CONNTRACK_IGNORE
100
101/* What TCP flags are set from RST/SYN/FIN/ACK. */
102enum tcp_bit_set {
103	TCP_SYN_SET,
104	TCP_SYNACK_SET,
105	TCP_FIN_SET,
106	TCP_ACK_SET,
107	TCP_RST_SET,
108	TCP_NONE_SET,
109};
110
111/*
112 * The TCP state transition table needs a few words...
113 *
114 * We are the man in the middle. All the packets go through us
115 * but might get lost in transit to the destination.
116 * It is assumed that the destinations can't receive segments
117 * we haven't seen.
118 *
119 * The checked segment is in window, but our windows are *not*
120 * equivalent with the ones of the sender/receiver. We always
121 * try to guess the state of the current sender.
122 *
123 * The meaning of the states are:
124 *
125 * NONE:	initial state
126 * SYN_SENT:	SYN-only packet seen
127 * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
128 * SYN_RECV:	SYN-ACK packet seen
129 * ESTABLISHED:	ACK packet seen
130 * FIN_WAIT:	FIN packet seen
131 * CLOSE_WAIT:	ACK seen (after FIN)
132 * LAST_ACK:	FIN seen (after FIN)
133 * TIME_WAIT:	last ACK seen
134 * CLOSE:	closed connection (RST)
135 *
136 * Packets marked as IGNORED (sIG):
137 *	if they may be either invalid or valid
138 *	and the receiver may send back a connection
139 *	closing RST or a SYN/ACK.
140 *
141 * Packets marked as INVALID (sIV):
142 *	if we regard them as truly invalid packets
143 */
144static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
145	{
146/* ORIGINAL */
147/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
148/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
149/*
150 *	sNO -> sSS	Initialize a new connection
151 *	sSS -> sSS	Retransmitted SYN
152 *	sS2 -> sS2	Late retransmitted SYN
153 *	sSR -> sIG
154 *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
155 *			are errors. Receiver will reply with RST
156 *			and close the connection.
157 *			Or we are not in sync and hold a dead connection.
158 *	sFW -> sIG
159 *	sCW -> sIG
160 *	sLA -> sIG
161 *	sTW -> sSS	Reopened connection (RFC 1122).
162 *	sCL -> sSS
163 */
164/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
165/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
166/*
167 *	sNO -> sIV	Too late and no reason to do anything
168 *	sSS -> sIV	Client can't send SYN and then SYN/ACK
169 *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
170 *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
171 *	sES -> sIV	Invalid SYN/ACK packets sent by the client
172 *	sFW -> sIV
173 *	sCW -> sIV
174 *	sLA -> sIV
175 *	sTW -> sIV
176 *	sCL -> sIV
177 */
178/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
179/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
180/*
181 *	sNO -> sIV	Too late and no reason to do anything...
182 *	sSS -> sIV	Client migth not send FIN in this state:
183 *			we enforce waiting for a SYN/ACK reply first.
184 *	sS2 -> sIV
185 *	sSR -> sFW	Close started.
186 *	sES -> sFW
187 *	sFW -> sLA	FIN seen in both directions, waiting for
188 *			the last ACK.
189 *			Migth be a retransmitted FIN as well...
190 *	sCW -> sLA
191 *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
192 *	sTW -> sTW
193 *	sCL -> sCL
194 */
195/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
196/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
197/*
198 *	sNO -> sES	Assumed.
199 *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
200 *	sS2 -> sIV
201 *	sSR -> sES	Established state is reached.
202 *	sES -> sES	:-)
203 *	sFW -> sCW	Normal close request answered by ACK.
204 *	sCW -> sCW
205 *	sLA -> sTW	Last ACK detected.
206 *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
207 *	sCL -> sCL
208 */
209/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
210/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
211/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
212	},
213	{
214/* REPLY */
215/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
216/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
217/*
218 *	sNO -> sIV	Never reached.
219 *	sSS -> sS2	Simultaneous open
220 *	sS2 -> sS2	Retransmitted simultaneous SYN
221 *	sSR -> sIV	Invalid SYN packets sent by the server
222 *	sES -> sIV
223 *	sFW -> sIV
224 *	sCW -> sIV
225 *	sLA -> sIV
226 *	sTW -> sSS	Reopened connection, but server may have switched role
227 *	sCL -> sIV
228 */
229/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
230/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
231/*
232 *	sSS -> sSR	Standard open.
233 *	sS2 -> sSR	Simultaneous open
234 *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
235 *	sES -> sIG	Late retransmitted SYN/ACK?
236 *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
237 *	sCW -> sIG
238 *	sLA -> sIG
239 *	sTW -> sIG
240 *	sCL -> sIG
241 */
242/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
243/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
244/*
245 *	sSS -> sIV	Server might not send FIN in this state.
246 *	sS2 -> sIV
247 *	sSR -> sFW	Close started.
248 *	sES -> sFW
249 *	sFW -> sLA	FIN seen in both directions.
250 *	sCW -> sLA
251 *	sLA -> sLA	Retransmitted FIN.
252 *	sTW -> sTW
253 *	sCL -> sCL
254 */
255/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
256/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
257/*
258 *	sSS -> sIG	Might be a half-open connection.
259 *	sS2 -> sIG
260 *	sSR -> sSR	Might answer late resent SYN.
261 *	sES -> sES	:-)
262 *	sFW -> sCW	Normal close request answered by ACK.
263 *	sCW -> sCW
264 *	sLA -> sTW	Last ACK detected.
265 *	sTW -> sTW	Retransmitted last ACK.
266 *	sCL -> sCL
267 */
268/* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
269/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
270/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
271	}
272};
273
274static inline struct nf_tcp_net *tcp_pernet(struct net *net)
275{
276	return &net->ct.nf_ct_proto.tcp;
277}
278
279static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
280			     struct nf_conntrack_tuple *tuple)
281{
282	const struct tcphdr *hp;
283	struct tcphdr _hdr;
284
285	/* Actually only need first 8 bytes. */
286	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
287	if (hp == NULL)
288		return false;
289
290	tuple->src.u.tcp.port = hp->source;
291	tuple->dst.u.tcp.port = hp->dest;
292
293	return true;
294}
295
296static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
297			     const struct nf_conntrack_tuple *orig)
298{
299	tuple->src.u.tcp.port = orig->dst.u.tcp.port;
300	tuple->dst.u.tcp.port = orig->src.u.tcp.port;
301	return true;
302}
303
304/* Print out the per-protocol part of the tuple. */
305static int tcp_print_tuple(struct seq_file *s,
306			   const struct nf_conntrack_tuple *tuple)
307{
308	return seq_printf(s, "sport=%hu dport=%hu ",
309			  ntohs(tuple->src.u.tcp.port),
310			  ntohs(tuple->dst.u.tcp.port));
311}
312
313/* Print out the private part of the conntrack. */
314static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
315{
316	enum tcp_conntrack state;
317
318	spin_lock_bh(&ct->lock);
319	state = ct->proto.tcp.state;
320	spin_unlock_bh(&ct->lock);
321
322	return seq_printf(s, "%s ", tcp_conntrack_names[state]);
323}
324
325static unsigned int get_conntrack_index(const struct tcphdr *tcph)
326{
327	if (tcph->rst) return TCP_RST_SET;
328	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
329	else if (tcph->fin) return TCP_FIN_SET;
330	else if (tcph->ack) return TCP_ACK_SET;
331	else return TCP_NONE_SET;
332}
333
334/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
335   in IP Filter' by Guido van Rooij.
336
337   http://www.sane.nl/events/sane2000/papers.html
338   http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
339
340   The boundaries and the conditions are changed according to RFC793:
341   the packet must intersect the window (i.e. segments may be
342   after the right or before the left edge) and thus receivers may ACK
343   segments after the right edge of the window.
344
345	td_maxend = max(sack + max(win,1)) seen in reply packets
346	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
347	td_maxwin += seq + len - sender.td_maxend
348			if seq + len > sender.td_maxend
349	td_end    = max(seq + len) seen in sent packets
350
351   I.   Upper bound for valid data:	seq <= sender.td_maxend
352   II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
353   III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
354   IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
355
356   where sack is the highest right edge of sack block found in the packet
357   or ack in the case of packet without SACK option.
358
359   The upper bound limit for a valid (s)ack is not ignored -
360   we doesn't have to deal with fragments.
361*/
362
363static inline __u32 segment_seq_plus_len(__u32 seq,
364					 size_t len,
365					 unsigned int dataoff,
366					 const struct tcphdr *tcph)
367{
368	/* XXX Should I use payload length field in IP/IPv6 header ?
369	 * - YK */
370	return (seq + len - dataoff - tcph->doff*4
371		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
372}
373
374/* Fixme: what about big packets? */
375#define MAXACKWINCONST			66000
376#define MAXACKWINDOW(sender)						\
377	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
378					      : MAXACKWINCONST)
379
380/*
381 * Simplified tcp_parse_options routine from tcp_input.c
382 */
383static void tcp_options(const struct sk_buff *skb,
384			unsigned int dataoff,
385			const struct tcphdr *tcph,
386			struct ip_ct_tcp_state *state)
387{
388	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
389	const unsigned char *ptr;
390	int length = (tcph->doff*4) - sizeof(struct tcphdr);
391
392	if (!length)
393		return;
394
395	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
396				 length, buff);
397	BUG_ON(ptr == NULL);
398
399	state->td_scale =
400	state->flags = 0;
401
402	while (length > 0) {
403		int opcode=*ptr++;
404		int opsize;
405
406		switch (opcode) {
407		case TCPOPT_EOL:
408			return;
409		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
410			length--;
411			continue;
412		default:
413			opsize=*ptr++;
414			if (opsize < 2) /* "silly options" */
415				return;
416			if (opsize > length)
417				return;	/* don't parse partial options */
418
419			if (opcode == TCPOPT_SACK_PERM
420			    && opsize == TCPOLEN_SACK_PERM)
421				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
422			else if (opcode == TCPOPT_WINDOW
423				 && opsize == TCPOLEN_WINDOW) {
424				state->td_scale = *(u_int8_t *)ptr;
425
426				if (state->td_scale > 14) {
427					/* See RFC1323 */
428					state->td_scale = 14;
429				}
430				state->flags |=
431					IP_CT_TCP_FLAG_WINDOW_SCALE;
432			}
433			ptr += opsize - 2;
434			length -= opsize;
435		}
436	}
437}
438
439static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
440                     const struct tcphdr *tcph, __u32 *sack)
441{
442	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
443	const unsigned char *ptr;
444	int length = (tcph->doff*4) - sizeof(struct tcphdr);
445	__u32 tmp;
446
447	if (!length)
448		return;
449
450	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
451				 length, buff);
452	BUG_ON(ptr == NULL);
453
454	/* Fast path for timestamp-only option */
455	if (length == TCPOLEN_TSTAMP_ALIGNED
456	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
457				       | (TCPOPT_NOP << 16)
458				       | (TCPOPT_TIMESTAMP << 8)
459				       | TCPOLEN_TIMESTAMP))
460		return;
461
462	while (length > 0) {
463		int opcode = *ptr++;
464		int opsize, i;
465
466		switch (opcode) {
467		case TCPOPT_EOL:
468			return;
469		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
470			length--;
471			continue;
472		default:
473			opsize = *ptr++;
474			if (opsize < 2) /* "silly options" */
475				return;
476			if (opsize > length)
477				return;	/* don't parse partial options */
478
479			if (opcode == TCPOPT_SACK
480			    && opsize >= (TCPOLEN_SACK_BASE
481					  + TCPOLEN_SACK_PERBLOCK)
482			    && !((opsize - TCPOLEN_SACK_BASE)
483				 % TCPOLEN_SACK_PERBLOCK)) {
484				for (i = 0;
485				     i < (opsize - TCPOLEN_SACK_BASE);
486				     i += TCPOLEN_SACK_PERBLOCK) {
487					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
488
489					if (after(tmp, *sack))
490						*sack = tmp;
491				}
492				return;
493			}
494			ptr += opsize - 2;
495			length -= opsize;
496		}
497	}
498}
499
500static bool tcp_in_window(const struct nf_conn *ct,
501			  struct ip_ct_tcp *state,
502			  enum ip_conntrack_dir dir,
503			  unsigned int index,
504			  const struct sk_buff *skb,
505			  unsigned int dataoff,
506			  const struct tcphdr *tcph,
507			  u_int8_t pf)
508{
509	struct net *net = nf_ct_net(ct);
510	struct nf_tcp_net *tn = tcp_pernet(net);
511	struct ip_ct_tcp_state *sender = &state->seen[dir];
512	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
513	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
514	__u32 seq, ack, sack, end, win, swin;
515	s32 receiver_offset;
516	bool res, in_recv_win;
517
518	/*
519	 * Get the required data from the packet.
520	 */
521	seq = ntohl(tcph->seq);
522	ack = sack = ntohl(tcph->ack_seq);
523	win = ntohs(tcph->window);
524	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
525
526	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
527		tcp_sack(skb, dataoff, tcph, &sack);
528
529	/* Take into account NAT sequence number mangling */
530	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
531	ack -= receiver_offset;
532	sack -= receiver_offset;
533
534	pr_debug("tcp_in_window: START\n");
535	pr_debug("tcp_in_window: ");
536	nf_ct_dump_tuple(tuple);
537	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
538		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
539	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
540		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
541		 sender->td_end, sender->td_maxend, sender->td_maxwin,
542		 sender->td_scale,
543		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
544		 receiver->td_scale);
545
546	if (sender->td_maxwin == 0) {
547		/*
548		 * Initialize sender data.
549		 */
550		if (tcph->syn) {
551			/*
552			 * SYN-ACK in reply to a SYN
553			 * or SYN from reply direction in simultaneous open.
554			 */
555			sender->td_end =
556			sender->td_maxend = end;
557			sender->td_maxwin = (win == 0 ? 1 : win);
558
559			tcp_options(skb, dataoff, tcph, sender);
560			/*
561			 * RFC 1323:
562			 * Both sides must send the Window Scale option
563			 * to enable window scaling in either direction.
564			 */
565			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
566			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
567				sender->td_scale =
568				receiver->td_scale = 0;
569			if (!tcph->ack)
570				/* Simultaneous open */
571				return true;
572		} else {
573			/*
574			 * We are in the middle of a connection,
575			 * its history is lost for us.
576			 * Let's try to use the data from the packet.
577			 */
578			sender->td_end = end;
579			swin = win << sender->td_scale;
580			sender->td_maxwin = (swin == 0 ? 1 : swin);
581			sender->td_maxend = end + sender->td_maxwin;
582			/*
583			 * We haven't seen traffic in the other direction yet
584			 * but we have to tweak window tracking to pass III
585			 * and IV until that happens.
586			 */
587			if (receiver->td_maxwin == 0)
588				receiver->td_end = receiver->td_maxend = sack;
589		}
590	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
591		     && dir == IP_CT_DIR_ORIGINAL)
592		   || (state->state == TCP_CONNTRACK_SYN_RECV
593		     && dir == IP_CT_DIR_REPLY))
594		   && after(end, sender->td_end)) {
595		/*
596		 * RFC 793: "if a TCP is reinitialized ... then it need
597		 * not wait at all; it must only be sure to use sequence
598		 * numbers larger than those recently used."
599		 */
600		sender->td_end =
601		sender->td_maxend = end;
602		sender->td_maxwin = (win == 0 ? 1 : win);
603
604		tcp_options(skb, dataoff, tcph, sender);
605	}
606
607	if (!(tcph->ack)) {
608		/*
609		 * If there is no ACK, just pretend it was set and OK.
610		 */
611		ack = sack = receiver->td_end;
612	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
613		    (TCP_FLAG_ACK|TCP_FLAG_RST))
614		   && (ack == 0)) {
615		/*
616		 * Broken TCP stacks, that set ACK in RST packets as well
617		 * with zero ack value.
618		 */
619		ack = sack = receiver->td_end;
620	}
621
622	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
623		/*
624		 * RST sent answering SYN.
625		 */
626		seq = end = sender->td_end;
627
628	pr_debug("tcp_in_window: ");
629	nf_ct_dump_tuple(tuple);
630	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
631		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
632	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
633		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
634		 sender->td_end, sender->td_maxend, sender->td_maxwin,
635		 sender->td_scale,
636		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
637		 receiver->td_scale);
638
639	/* Is the ending sequence in the receive window (if available)? */
640	in_recv_win = !receiver->td_maxwin ||
641		      after(end, sender->td_end - receiver->td_maxwin - 1);
642
643	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
644		 before(seq, sender->td_maxend + 1),
645		 (in_recv_win ? 1 : 0),
646		 before(sack, receiver->td_end + 1),
647		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
648
649	if (before(seq, sender->td_maxend + 1) &&
650	    in_recv_win &&
651	    before(sack, receiver->td_end + 1) &&
652	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
653		/*
654		 * Take into account window scaling (RFC 1323).
655		 */
656		if (!tcph->syn)
657			win <<= sender->td_scale;
658
659		/*
660		 * Update sender data.
661		 */
662		swin = win + (sack - ack);
663		if (sender->td_maxwin < swin)
664			sender->td_maxwin = swin;
665		if (after(end, sender->td_end)) {
666			sender->td_end = end;
667			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
668		}
669		if (tcph->ack) {
670			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
671				sender->td_maxack = ack;
672				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
673			} else if (after(ack, sender->td_maxack))
674				sender->td_maxack = ack;
675		}
676
677		/*
678		 * Update receiver data.
679		 */
680		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
681			receiver->td_maxwin += end - sender->td_maxend;
682		if (after(sack + win, receiver->td_maxend - 1)) {
683			receiver->td_maxend = sack + win;
684			if (win == 0)
685				receiver->td_maxend++;
686		}
687		if (ack == receiver->td_end)
688			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
689
690		/*
691		 * Check retransmissions.
692		 */
693		if (index == TCP_ACK_SET) {
694			if (state->last_dir == dir
695			    && state->last_seq == seq
696			    && state->last_ack == ack
697			    && state->last_end == end
698			    && state->last_win == win)
699				state->retrans++;
700			else {
701				state->last_dir = dir;
702				state->last_seq = seq;
703				state->last_ack = ack;
704				state->last_end = end;
705				state->last_win = win;
706				state->retrans = 0;
707			}
708		}
709		res = true;
710	} else {
711		res = false;
712		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
713		    tn->tcp_be_liberal)
714			res = true;
715		if (!res && LOG_INVALID(net, IPPROTO_TCP))
716			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
717			"nf_ct_tcp: %s ",
718			before(seq, sender->td_maxend + 1) ?
719			in_recv_win ?
720			before(sack, receiver->td_end + 1) ?
721			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
722			: "ACK is under the lower bound (possible overly delayed ACK)"
723			: "ACK is over the upper bound (ACKed data not seen yet)"
724			: "SEQ is under the lower bound (already ACKed data retransmitted)"
725			: "SEQ is over the upper bound (over the window of the receiver)");
726	}
727
728	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
729		 "receiver end=%u maxend=%u maxwin=%u\n",
730		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
731		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
732
733	return res;
734}
735
736/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
737static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
738				 TCPHDR_URG) + 1] =
739{
740	[TCPHDR_SYN]				= 1,
741	[TCPHDR_SYN|TCPHDR_URG]			= 1,
742	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
743	[TCPHDR_RST]				= 1,
744	[TCPHDR_RST|TCPHDR_ACK]			= 1,
745	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
746	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
747	[TCPHDR_ACK]				= 1,
748	[TCPHDR_ACK|TCPHDR_URG]			= 1,
749};
750
751/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
752static int tcp_error(struct net *net, struct nf_conn *tmpl,
753		     struct sk_buff *skb,
754		     unsigned int dataoff,
755		     enum ip_conntrack_info *ctinfo,
756		     u_int8_t pf,
757		     unsigned int hooknum)
758{
759	const struct tcphdr *th;
760	struct tcphdr _tcph;
761	unsigned int tcplen = skb->len - dataoff;
762	u_int8_t tcpflags;
763
764	/* Smaller that minimal TCP header? */
765	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
766	if (th == NULL) {
767		if (LOG_INVALID(net, IPPROTO_TCP))
768			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
769				"nf_ct_tcp: short packet ");
770		return -NF_ACCEPT;
771	}
772
773	/* Not whole TCP header or malformed packet */
774	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
775		if (LOG_INVALID(net, IPPROTO_TCP))
776			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
777				"nf_ct_tcp: truncated/malformed packet ");
778		return -NF_ACCEPT;
779	}
780
781	/* Checksum invalid? Ignore.
782	 * We skip checking packets on the outgoing path
783	 * because the checksum is assumed to be correct.
784	 */
785	/* FIXME: Source route IP option packets --RR */
786	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
787	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
788		if (LOG_INVALID(net, IPPROTO_TCP))
789			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
790				  "nf_ct_tcp: bad TCP checksum ");
791		return -NF_ACCEPT;
792	}
793
794	/* Check TCP flags. */
795	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
796	if (!tcp_valid_flags[tcpflags]) {
797		if (LOG_INVALID(net, IPPROTO_TCP))
798			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
799				  "nf_ct_tcp: invalid TCP flag combination ");
800		return -NF_ACCEPT;
801	}
802
803	return NF_ACCEPT;
804}
805
806static unsigned int *tcp_get_timeouts(struct net *net)
807{
808	return tcp_pernet(net)->timeouts;
809}
810
811/* Returns verdict for packet, or -1 for invalid. */
812static int tcp_packet(struct nf_conn *ct,
813		      const struct sk_buff *skb,
814		      unsigned int dataoff,
815		      enum ip_conntrack_info ctinfo,
816		      u_int8_t pf,
817		      unsigned int hooknum,
818		      unsigned int *timeouts)
819{
820	struct net *net = nf_ct_net(ct);
821	struct nf_tcp_net *tn = tcp_pernet(net);
822	struct nf_conntrack_tuple *tuple;
823	enum tcp_conntrack new_state, old_state;
824	enum ip_conntrack_dir dir;
825	const struct tcphdr *th;
826	struct tcphdr _tcph;
827	unsigned long timeout;
828	unsigned int index;
829
830	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
831	BUG_ON(th == NULL);
832
833	spin_lock_bh(&ct->lock);
834	old_state = ct->proto.tcp.state;
835	dir = CTINFO2DIR(ctinfo);
836	index = get_conntrack_index(th);
837	new_state = tcp_conntracks[dir][index][old_state];
838	tuple = &ct->tuplehash[dir].tuple;
839
840	switch (new_state) {
841	case TCP_CONNTRACK_SYN_SENT:
842		if (old_state < TCP_CONNTRACK_TIME_WAIT)
843			break;
844		/* RFC 1122: "When a connection is closed actively,
845		 * it MUST linger in TIME-WAIT state for a time 2xMSL
846		 * (Maximum Segment Lifetime). However, it MAY accept
847		 * a new SYN from the remote TCP to reopen the connection
848		 * directly from TIME-WAIT state, if..."
849		 * We ignore the conditions because we are in the
850		 * TIME-WAIT state anyway.
851		 *
852		 * Handle aborted connections: we and the server
853		 * think there is an existing connection but the client
854		 * aborts it and starts a new one.
855		 */
856		if (((ct->proto.tcp.seen[dir].flags
857		      | ct->proto.tcp.seen[!dir].flags)
858		     & IP_CT_TCP_FLAG_CLOSE_INIT)
859		    || (ct->proto.tcp.last_dir == dir
860		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
861			/* Attempt to reopen a closed/aborted connection.
862			 * Delete this connection and look up again. */
863			spin_unlock_bh(&ct->lock);
864
865			/* Only repeat if we can actually remove the timer.
866			 * Destruction may already be in progress in process
867			 * context and we must give it a chance to terminate.
868			 */
869			if (nf_ct_kill(ct))
870				return -NF_REPEAT;
871			return NF_DROP;
872		}
873		/* Fall through */
874	case TCP_CONNTRACK_IGNORE:
875		/* Ignored packets:
876		 *
877		 * Our connection entry may be out of sync, so ignore
878		 * packets which may signal the real connection between
879		 * the client and the server.
880		 *
881		 * a) SYN in ORIGINAL
882		 * b) SYN/ACK in REPLY
883		 * c) ACK in reply direction after initial SYN in original.
884		 *
885		 * If the ignored packet is invalid, the receiver will send
886		 * a RST we'll catch below.
887		 */
888		if (index == TCP_SYNACK_SET
889		    && ct->proto.tcp.last_index == TCP_SYN_SET
890		    && ct->proto.tcp.last_dir != dir
891		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
892			/* b) This SYN/ACK acknowledges a SYN that we earlier
893			 * ignored as invalid. This means that the client and
894			 * the server are both in sync, while the firewall is
895			 * not. We get in sync from the previously annotated
896			 * values.
897			 */
898			old_state = TCP_CONNTRACK_SYN_SENT;
899			new_state = TCP_CONNTRACK_SYN_RECV;
900			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
901				ct->proto.tcp.last_end;
902			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
903				ct->proto.tcp.last_end;
904			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
905				ct->proto.tcp.last_win == 0 ?
906					1 : ct->proto.tcp.last_win;
907			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
908				ct->proto.tcp.last_wscale;
909			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
910				ct->proto.tcp.last_flags;
911			memset(&ct->proto.tcp.seen[dir], 0,
912			       sizeof(struct ip_ct_tcp_state));
913			break;
914		}
915		ct->proto.tcp.last_index = index;
916		ct->proto.tcp.last_dir = dir;
917		ct->proto.tcp.last_seq = ntohl(th->seq);
918		ct->proto.tcp.last_end =
919		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
920		ct->proto.tcp.last_win = ntohs(th->window);
921
922		/* a) This is a SYN in ORIGINAL. The client and the server
923		 * may be in sync but we are not. In that case, we annotate
924		 * the TCP options and let the packet go through. If it is a
925		 * valid SYN packet, the server will reply with a SYN/ACK, and
926		 * then we'll get in sync. Otherwise, the server ignores it. */
927		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
928			struct ip_ct_tcp_state seen = {};
929
930			ct->proto.tcp.last_flags =
931			ct->proto.tcp.last_wscale = 0;
932			tcp_options(skb, dataoff, th, &seen);
933			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
934				ct->proto.tcp.last_flags |=
935					IP_CT_TCP_FLAG_WINDOW_SCALE;
936				ct->proto.tcp.last_wscale = seen.td_scale;
937			}
938			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
939				ct->proto.tcp.last_flags |=
940					IP_CT_TCP_FLAG_SACK_PERM;
941			}
942		}
943		spin_unlock_bh(&ct->lock);
944		if (LOG_INVALID(net, IPPROTO_TCP))
945			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
946				  "nf_ct_tcp: invalid packet ignored in "
947				  "state %s ", tcp_conntrack_names[old_state]);
948		return NF_ACCEPT;
949	case TCP_CONNTRACK_MAX:
950		/* Special case for SYN proxy: when the SYN to the server or
951		 * the SYN/ACK from the server is lost, the client may transmit
952		 * a keep-alive packet while in SYN_SENT state. This needs to
953		 * be associated with the original conntrack entry in order to
954		 * generate a new SYN with the correct sequence number.
955		 */
956		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
957		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
958		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
959		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
960			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
961			spin_unlock_bh(&ct->lock);
962			return NF_ACCEPT;
963		}
964
965		/* Invalid packet */
966		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
967			 dir, get_conntrack_index(th), old_state);
968		spin_unlock_bh(&ct->lock);
969		if (LOG_INVALID(net, IPPROTO_TCP))
970			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
971				  "nf_ct_tcp: invalid state ");
972		return -NF_ACCEPT;
973	case TCP_CONNTRACK_CLOSE:
974		if (index == TCP_RST_SET
975		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
976		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
977			/* Invalid RST  */
978			spin_unlock_bh(&ct->lock);
979			if (LOG_INVALID(net, IPPROTO_TCP))
980				nf_log_packet(net, pf, 0, skb, NULL, NULL,
981					      NULL, "nf_ct_tcp: invalid RST ");
982			return -NF_ACCEPT;
983		}
984		if (index == TCP_RST_SET
985		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
986			 && ct->proto.tcp.last_index == TCP_SYN_SET)
987			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
988			    && ct->proto.tcp.last_index == TCP_ACK_SET))
989		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
990			/* RST sent to invalid SYN or ACK we had let through
991			 * at a) and c) above:
992			 *
993			 * a) SYN was in window then
994			 * c) we hold a half-open connection.
995			 *
996			 * Delete our connection entry.
997			 * We skip window checking, because packet might ACK
998			 * segments we ignored. */
999			goto in_window;
1000		}
1001		/* Just fall through */
1002	default:
1003		/* Keep compilers happy. */
1004		break;
1005	}
1006
1007	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
1008			   skb, dataoff, th, pf)) {
1009		spin_unlock_bh(&ct->lock);
1010		return -NF_ACCEPT;
1011	}
1012     in_window:
1013	/* From now on we have got in-window packets */
1014	ct->proto.tcp.last_index = index;
1015	ct->proto.tcp.last_dir = dir;
1016
1017	pr_debug("tcp_conntracks: ");
1018	nf_ct_dump_tuple(tuple);
1019	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1020		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1021		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1022		 old_state, new_state);
1023
1024	ct->proto.tcp.state = new_state;
1025	if (old_state != new_state
1026	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1027		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1028
1029	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1030	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1031		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1032	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1033		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1034		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1035		timeout = timeouts[TCP_CONNTRACK_UNACK];
1036	else
1037		timeout = timeouts[new_state];
1038	spin_unlock_bh(&ct->lock);
1039
1040	if (new_state != old_state)
1041		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1042
1043	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1044		/* If only reply is a RST, we can consider ourselves not to
1045		   have an established connection: this is a fairly common
1046		   problem case, so we can delete the conntrack
1047		   immediately.  --RR */
1048		if (th->rst) {
1049			nf_ct_kill_acct(ct, ctinfo, skb);
1050			return NF_ACCEPT;
1051		}
1052		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1053		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1054		 */
1055		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1056		    timeout > timeouts[TCP_CONNTRACK_UNACK])
1057			timeout = timeouts[TCP_CONNTRACK_UNACK];
1058	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1059		   && (old_state == TCP_CONNTRACK_SYN_RECV
1060		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1061		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1062		/* Set ASSURED if we see see valid ack in ESTABLISHED
1063		   after SYN_RECV or a valid answer for a picked up
1064		   connection. */
1065		set_bit(IPS_ASSURED_BIT, &ct->status);
1066		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1067	}
1068	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1069
1070	return NF_ACCEPT;
1071}
1072
1073/* Called when a new connection for this protocol found. */
1074static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1075		    unsigned int dataoff, unsigned int *timeouts)
1076{
1077	enum tcp_conntrack new_state;
1078	const struct tcphdr *th;
1079	struct tcphdr _tcph;
1080	struct net *net = nf_ct_net(ct);
1081	struct nf_tcp_net *tn = tcp_pernet(net);
1082	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1083	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1084
1085	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1086	BUG_ON(th == NULL);
1087
1088	/* Don't need lock here: this conntrack not in circulation yet */
1089	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1090
1091	/* Invalid: delete conntrack */
1092	if (new_state >= TCP_CONNTRACK_MAX) {
1093		pr_debug("nf_ct_tcp: invalid new deleting.\n");
1094		return false;
1095	}
1096
1097	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1098		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1099		/* SYN packet */
1100		ct->proto.tcp.seen[0].td_end =
1101			segment_seq_plus_len(ntohl(th->seq), skb->len,
1102					     dataoff, th);
1103		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1104		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1105			ct->proto.tcp.seen[0].td_maxwin = 1;
1106		ct->proto.tcp.seen[0].td_maxend =
1107			ct->proto.tcp.seen[0].td_end;
1108
1109		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1110	} else if (tn->tcp_loose == 0) {
1111		/* Don't try to pick up connections. */
1112		return false;
1113	} else {
1114		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1115		/*
1116		 * We are in the middle of a connection,
1117		 * its history is lost for us.
1118		 * Let's try to use the data from the packet.
1119		 */
1120		ct->proto.tcp.seen[0].td_end =
1121			segment_seq_plus_len(ntohl(th->seq), skb->len,
1122					     dataoff, th);
1123		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1124		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1125			ct->proto.tcp.seen[0].td_maxwin = 1;
1126		ct->proto.tcp.seen[0].td_maxend =
1127			ct->proto.tcp.seen[0].td_end +
1128			ct->proto.tcp.seen[0].td_maxwin;
1129
1130		/* We assume SACK and liberal window checking to handle
1131		 * window scaling */
1132		ct->proto.tcp.seen[0].flags =
1133		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1134					      IP_CT_TCP_FLAG_BE_LIBERAL;
1135	}
1136
1137	/* tcp_packet will set them */
1138	ct->proto.tcp.last_index = TCP_NONE_SET;
1139
1140	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1141		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1142		 sender->td_end, sender->td_maxend, sender->td_maxwin,
1143		 sender->td_scale,
1144		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1145		 receiver->td_scale);
1146	return true;
1147}
1148
1149#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1150
1151#include <linux/netfilter/nfnetlink.h>
1152#include <linux/netfilter/nfnetlink_conntrack.h>
1153
1154static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1155			 struct nf_conn *ct)
1156{
1157	struct nlattr *nest_parms;
1158	struct nf_ct_tcp_flags tmp = {};
1159
1160	spin_lock_bh(&ct->lock);
1161	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1162	if (!nest_parms)
1163		goto nla_put_failure;
1164
1165	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1166	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1167		       ct->proto.tcp.seen[0].td_scale) ||
1168	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1169		       ct->proto.tcp.seen[1].td_scale))
1170		goto nla_put_failure;
1171
1172	tmp.flags = ct->proto.tcp.seen[0].flags;
1173	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1174		    sizeof(struct nf_ct_tcp_flags), &tmp))
1175		goto nla_put_failure;
1176
1177	tmp.flags = ct->proto.tcp.seen[1].flags;
1178	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1179		    sizeof(struct nf_ct_tcp_flags), &tmp))
1180		goto nla_put_failure;
1181	spin_unlock_bh(&ct->lock);
1182
1183	nla_nest_end(skb, nest_parms);
1184
1185	return 0;
1186
1187nla_put_failure:
1188	spin_unlock_bh(&ct->lock);
1189	return -1;
1190}
1191
1192static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1193	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1194	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1195	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1196	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1197	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1198};
1199
1200static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1201{
1202	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1203	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1204	int err;
1205
1206	/* updates could not contain anything about the private
1207	 * protocol info, in that case skip the parsing */
1208	if (!pattr)
1209		return 0;
1210
1211	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr, tcp_nla_policy);
1212	if (err < 0)
1213		return err;
1214
1215	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1216	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1217		return -EINVAL;
1218
1219	spin_lock_bh(&ct->lock);
1220	if (tb[CTA_PROTOINFO_TCP_STATE])
1221		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1222
1223	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1224		struct nf_ct_tcp_flags *attr =
1225			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1226		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1227		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1228	}
1229
1230	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1231		struct nf_ct_tcp_flags *attr =
1232			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1233		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1234		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1235	}
1236
1237	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1238	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1239	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1240	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1241		ct->proto.tcp.seen[0].td_scale =
1242			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1243		ct->proto.tcp.seen[1].td_scale =
1244			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1245	}
1246	spin_unlock_bh(&ct->lock);
1247
1248	return 0;
1249}
1250
1251static int tcp_nlattr_size(void)
1252{
1253	return nla_total_size(0)	   /* CTA_PROTOINFO_TCP */
1254		+ nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1255}
1256
1257static int tcp_nlattr_tuple_size(void)
1258{
1259	return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1260}
1261#endif
1262
1263#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1264
1265#include <linux/netfilter/nfnetlink.h>
1266#include <linux/netfilter/nfnetlink_cttimeout.h>
1267
1268static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1269				     struct net *net, void *data)
1270{
1271	unsigned int *timeouts = data;
1272	struct nf_tcp_net *tn = tcp_pernet(net);
1273	int i;
1274
1275	/* set default TCP timeouts. */
1276	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1277		timeouts[i] = tn->timeouts[i];
1278
1279	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1280		timeouts[TCP_CONNTRACK_SYN_SENT] =
1281			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1282	}
1283	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1284		timeouts[TCP_CONNTRACK_SYN_RECV] =
1285			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1286	}
1287	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1288		timeouts[TCP_CONNTRACK_ESTABLISHED] =
1289			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1290	}
1291	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1292		timeouts[TCP_CONNTRACK_FIN_WAIT] =
1293			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1294	}
1295	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1296		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1297			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1298	}
1299	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1300		timeouts[TCP_CONNTRACK_LAST_ACK] =
1301			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1302	}
1303	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1304		timeouts[TCP_CONNTRACK_TIME_WAIT] =
1305			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1306	}
1307	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1308		timeouts[TCP_CONNTRACK_CLOSE] =
1309			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1310	}
1311	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1312		timeouts[TCP_CONNTRACK_SYN_SENT2] =
1313			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1314	}
1315	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1316		timeouts[TCP_CONNTRACK_RETRANS] =
1317			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1318	}
1319	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1320		timeouts[TCP_CONNTRACK_UNACK] =
1321			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1322	}
1323	return 0;
1324}
1325
1326static int
1327tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1328{
1329	const unsigned int *timeouts = data;
1330
1331	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1332			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1333	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1334			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1335	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1336			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1337	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1338			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1339	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1340			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1341	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1342			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1343	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1344			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1345	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1346			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1347	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1348			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1349	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1350			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1351	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1352			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1353		goto nla_put_failure;
1354	return 0;
1355
1356nla_put_failure:
1357	return -ENOSPC;
1358}
1359
1360static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1361	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
1362	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
1363	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
1364	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
1365	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
1366	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
1367	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
1368	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
1369	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1370	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
1371	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1372};
1373#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1374
1375#ifdef CONFIG_SYSCTL
1376static struct ctl_table tcp_sysctl_table[] = {
1377	{
1378		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1379		.maxlen		= sizeof(unsigned int),
1380		.mode		= 0644,
1381		.proc_handler	= proc_dointvec_jiffies,
1382	},
1383	{
1384		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1385		.maxlen		= sizeof(unsigned int),
1386		.mode		= 0644,
1387		.proc_handler	= proc_dointvec_jiffies,
1388	},
1389	{
1390		.procname	= "nf_conntrack_tcp_timeout_established",
1391		.maxlen		= sizeof(unsigned int),
1392		.mode		= 0644,
1393		.proc_handler	= proc_dointvec_jiffies,
1394	},
1395	{
1396		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1397		.maxlen		= sizeof(unsigned int),
1398		.mode		= 0644,
1399		.proc_handler	= proc_dointvec_jiffies,
1400	},
1401	{
1402		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1403		.maxlen		= sizeof(unsigned int),
1404		.mode		= 0644,
1405		.proc_handler	= proc_dointvec_jiffies,
1406	},
1407	{
1408		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1409		.maxlen		= sizeof(unsigned int),
1410		.mode		= 0644,
1411		.proc_handler	= proc_dointvec_jiffies,
1412	},
1413	{
1414		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1415		.maxlen		= sizeof(unsigned int),
1416		.mode		= 0644,
1417		.proc_handler	= proc_dointvec_jiffies,
1418	},
1419	{
1420		.procname	= "nf_conntrack_tcp_timeout_close",
1421		.maxlen		= sizeof(unsigned int),
1422		.mode		= 0644,
1423		.proc_handler	= proc_dointvec_jiffies,
1424	},
1425	{
1426		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1427		.maxlen		= sizeof(unsigned int),
1428		.mode		= 0644,
1429		.proc_handler	= proc_dointvec_jiffies,
1430	},
1431	{
1432		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
1433		.maxlen		= sizeof(unsigned int),
1434		.mode		= 0644,
1435		.proc_handler	= proc_dointvec_jiffies,
1436	},
1437	{
1438		.procname	= "nf_conntrack_tcp_loose",
1439		.maxlen		= sizeof(unsigned int),
1440		.mode		= 0644,
1441		.proc_handler	= proc_dointvec,
1442	},
1443	{
1444		.procname       = "nf_conntrack_tcp_be_liberal",
1445		.maxlen         = sizeof(unsigned int),
1446		.mode           = 0644,
1447		.proc_handler   = proc_dointvec,
1448	},
1449	{
1450		.procname	= "nf_conntrack_tcp_max_retrans",
1451		.maxlen		= sizeof(unsigned int),
1452		.mode		= 0644,
1453		.proc_handler	= proc_dointvec,
1454	},
1455	{ }
1456};
1457
1458#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1459static struct ctl_table tcp_compat_sysctl_table[] = {
1460	{
1461		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
1462		.maxlen		= sizeof(unsigned int),
1463		.mode		= 0644,
1464		.proc_handler	= proc_dointvec_jiffies,
1465	},
1466	{
1467		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
1468		.maxlen		= sizeof(unsigned int),
1469		.mode		= 0644,
1470		.proc_handler	= proc_dointvec_jiffies,
1471	},
1472	{
1473		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
1474		.maxlen		= sizeof(unsigned int),
1475		.mode		= 0644,
1476		.proc_handler	= proc_dointvec_jiffies,
1477	},
1478	{
1479		.procname	= "ip_conntrack_tcp_timeout_established",
1480		.maxlen		= sizeof(unsigned int),
1481		.mode		= 0644,
1482		.proc_handler	= proc_dointvec_jiffies,
1483	},
1484	{
1485		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
1486		.maxlen		= sizeof(unsigned int),
1487		.mode		= 0644,
1488		.proc_handler	= proc_dointvec_jiffies,
1489	},
1490	{
1491		.procname	= "ip_conntrack_tcp_timeout_close_wait",
1492		.maxlen		= sizeof(unsigned int),
1493		.mode		= 0644,
1494		.proc_handler	= proc_dointvec_jiffies,
1495	},
1496	{
1497		.procname	= "ip_conntrack_tcp_timeout_last_ack",
1498		.maxlen		= sizeof(unsigned int),
1499		.mode		= 0644,
1500		.proc_handler	= proc_dointvec_jiffies,
1501	},
1502	{
1503		.procname	= "ip_conntrack_tcp_timeout_time_wait",
1504		.maxlen		= sizeof(unsigned int),
1505		.mode		= 0644,
1506		.proc_handler	= proc_dointvec_jiffies,
1507	},
1508	{
1509		.procname	= "ip_conntrack_tcp_timeout_close",
1510		.maxlen		= sizeof(unsigned int),
1511		.mode		= 0644,
1512		.proc_handler	= proc_dointvec_jiffies,
1513	},
1514	{
1515		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
1516		.maxlen		= sizeof(unsigned int),
1517		.mode		= 0644,
1518		.proc_handler	= proc_dointvec_jiffies,
1519	},
1520	{
1521		.procname	= "ip_conntrack_tcp_loose",
1522		.maxlen		= sizeof(unsigned int),
1523		.mode		= 0644,
1524		.proc_handler	= proc_dointvec,
1525	},
1526	{
1527		.procname	= "ip_conntrack_tcp_be_liberal",
1528		.maxlen		= sizeof(unsigned int),
1529		.mode		= 0644,
1530		.proc_handler	= proc_dointvec,
1531	},
1532	{
1533		.procname	= "ip_conntrack_tcp_max_retrans",
1534		.maxlen		= sizeof(unsigned int),
1535		.mode		= 0644,
1536		.proc_handler	= proc_dointvec,
1537	},
1538	{ }
1539};
1540#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
1541#endif /* CONFIG_SYSCTL */
1542
1543static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1544				    struct nf_tcp_net *tn)
1545{
1546#ifdef CONFIG_SYSCTL
1547	if (pn->ctl_table)
1548		return 0;
1549
1550	pn->ctl_table = kmemdup(tcp_sysctl_table,
1551				sizeof(tcp_sysctl_table),
1552				GFP_KERNEL);
1553	if (!pn->ctl_table)
1554		return -ENOMEM;
1555
1556	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1557	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1558	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1559	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1560	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1561	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1562	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1563	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1564	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1565	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1566	pn->ctl_table[10].data = &tn->tcp_loose;
1567	pn->ctl_table[11].data = &tn->tcp_be_liberal;
1568	pn->ctl_table[12].data = &tn->tcp_max_retrans;
1569#endif
1570	return 0;
1571}
1572
1573static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
1574					   struct nf_tcp_net *tn)
1575{
1576#ifdef CONFIG_SYSCTL
1577#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
1578	pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
1579				       sizeof(tcp_compat_sysctl_table),
1580				       GFP_KERNEL);
1581	if (!pn->ctl_compat_table)
1582		return -ENOMEM;
1583
1584	pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1585	pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
1586	pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1587	pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1588	pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1589	pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1590	pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1591	pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1592	pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1593	pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1594	pn->ctl_compat_table[10].data = &tn->tcp_loose;
1595	pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
1596	pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
1597#endif
1598#endif
1599	return 0;
1600}
1601
1602static int tcp_init_net(struct net *net, u_int16_t proto)
1603{
1604	int ret;
1605	struct nf_tcp_net *tn = tcp_pernet(net);
1606	struct nf_proto_net *pn = &tn->pn;
1607
1608	if (!pn->users) {
1609		int i;
1610
1611		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1612			tn->timeouts[i] = tcp_timeouts[i];
1613
1614		tn->tcp_loose = nf_ct_tcp_loose;
1615		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1616		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1617	}
1618
1619	if (proto == AF_INET) {
1620		ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
1621		if (ret < 0)
1622			return ret;
1623
1624		ret = tcp_kmemdup_sysctl_table(pn, tn);
1625		if (ret < 0)
1626			nf_ct_kfree_compat_sysctl_table(pn);
1627	} else
1628		ret = tcp_kmemdup_sysctl_table(pn, tn);
1629
1630	return ret;
1631}
1632
1633static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1634{
1635	return &net->ct.nf_ct_proto.tcp.pn;
1636}
1637
1638struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1639{
1640	.l3proto		= PF_INET,
1641	.l4proto 		= IPPROTO_TCP,
1642	.name 			= "tcp",
1643	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1644	.invert_tuple 		= tcp_invert_tuple,
1645	.print_tuple 		= tcp_print_tuple,
1646	.print_conntrack 	= tcp_print_conntrack,
1647	.packet 		= tcp_packet,
1648	.get_timeouts		= tcp_get_timeouts,
1649	.new 			= tcp_new,
1650	.error			= tcp_error,
1651#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1652	.to_nlattr		= tcp_to_nlattr,
1653	.nlattr_size		= tcp_nlattr_size,
1654	.from_nlattr		= nlattr_to_tcp,
1655	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1656	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1657	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1658	.nla_policy		= nf_ct_port_nla_policy,
1659#endif
1660#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1661	.ctnl_timeout		= {
1662		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1663		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1664		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1665		.obj_size	= sizeof(unsigned int) *
1666					TCP_CONNTRACK_TIMEOUT_MAX,
1667		.nla_policy	= tcp_timeout_nla_policy,
1668	},
1669#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1670	.init_net		= tcp_init_net,
1671	.get_net_proto		= tcp_get_net_proto,
1672};
1673EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1674
1675struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1676{
1677	.l3proto		= PF_INET6,
1678	.l4proto 		= IPPROTO_TCP,
1679	.name 			= "tcp",
1680	.pkt_to_tuple 		= tcp_pkt_to_tuple,
1681	.invert_tuple 		= tcp_invert_tuple,
1682	.print_tuple 		= tcp_print_tuple,
1683	.print_conntrack 	= tcp_print_conntrack,
1684	.packet 		= tcp_packet,
1685	.get_timeouts		= tcp_get_timeouts,
1686	.new 			= tcp_new,
1687	.error			= tcp_error,
1688#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1689	.to_nlattr		= tcp_to_nlattr,
1690	.nlattr_size		= tcp_nlattr_size,
1691	.from_nlattr		= nlattr_to_tcp,
1692	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1693	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1694	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1695	.nla_policy		= nf_ct_port_nla_policy,
1696#endif
1697#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
1698	.ctnl_timeout		= {
1699		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1700		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1701		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1702		.obj_size	= sizeof(unsigned int) *
1703					TCP_CONNTRACK_TIMEOUT_MAX,
1704		.nla_policy	= tcp_timeout_nla_policy,
1705	},
1706#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
1707	.init_net		= tcp_init_net,
1708	.get_net_proto		= tcp_get_net_proto,
1709};
1710EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1711