nf_conntrack_reasm.c revision 7eb95156d9dce2f59794264db336ce007d71638b
1/*
2 * IPv6 fragment reassembly for connection tracking
3 *
4 * Copyright (C)2004 USAGI/WIDE Project
5 *
6 * Author:
7 *	Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
8 *
9 * Based on: net/ipv6/reassembly.c
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version
14 * 2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/errno.h>
18#include <linux/types.h>
19#include <linux/string.h>
20#include <linux/socket.h>
21#include <linux/sockios.h>
22#include <linux/jiffies.h>
23#include <linux/net.h>
24#include <linux/list.h>
25#include <linux/netdevice.h>
26#include <linux/in6.h>
27#include <linux/ipv6.h>
28#include <linux/icmpv6.h>
29#include <linux/random.h>
30#include <linux/jhash.h>
31
32#include <net/sock.h>
33#include <net/snmp.h>
34#include <net/inet_frag.h>
35
36#include <net/ipv6.h>
37#include <net/protocol.h>
38#include <net/transp_v6.h>
39#include <net/rawv6.h>
40#include <net/ndisc.h>
41#include <net/addrconf.h>
42#include <linux/sysctl.h>
43#include <linux/netfilter.h>
44#include <linux/netfilter_ipv6.h>
45#include <linux/kernel.h>
46#include <linux/module.h>
47
48#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
49#define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
50#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
51
52unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024;
53unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024;
54unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT;
55
56struct nf_ct_frag6_skb_cb
57{
58	struct inet6_skb_parm	h;
59	int			offset;
60	struct sk_buff		*orig;
61};
62
63#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb*)((skb)->cb))
64
65struct nf_ct_frag6_queue
66{
67	struct inet_frag_queue	q;
68
69	__be32			id;		/* fragment id		*/
70	struct in6_addr		saddr;
71	struct in6_addr		daddr;
72
73	unsigned int		csum;
74	__u16			nhoffset;
75};
76
77static struct inet_frags nf_frags;
78
79static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
80{
81	hlist_del(&fq->q.list);
82	list_del(&fq->q.lru_list);
83	nf_frags.nqueues--;
84}
85
86static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
87{
88	write_lock(&nf_frags.lock);
89	__fq_unlink(fq);
90	write_unlock(&nf_frags.lock);
91}
92
93static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr,
94			       struct in6_addr *daddr)
95{
96	u32 a, b, c;
97
98	a = (__force u32)saddr->s6_addr32[0];
99	b = (__force u32)saddr->s6_addr32[1];
100	c = (__force u32)saddr->s6_addr32[2];
101
102	a += JHASH_GOLDEN_RATIO;
103	b += JHASH_GOLDEN_RATIO;
104	c += nf_frags.rnd;
105	__jhash_mix(a, b, c);
106
107	a += (__force u32)saddr->s6_addr32[3];
108	b += (__force u32)daddr->s6_addr32[0];
109	c += (__force u32)daddr->s6_addr32[1];
110	__jhash_mix(a, b, c);
111
112	a += (__force u32)daddr->s6_addr32[2];
113	b += (__force u32)daddr->s6_addr32[3];
114	c += (__force u32)id;
115	__jhash_mix(a, b, c);
116
117	return c & (INETFRAGS_HASHSZ - 1);
118}
119
120int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
121
122static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
123{
124	unsigned long now = jiffies;
125	int i;
126
127	write_lock(&nf_frags.lock);
128	get_random_bytes(&nf_frags.rnd, sizeof(u32));
129	for (i = 0; i < INETFRAGS_HASHSZ; i++) {
130		struct nf_ct_frag6_queue *q;
131		struct hlist_node *p, *n;
132
133		hlist_for_each_entry_safe(q, p, n, &nf_frags.hash[i], q.list) {
134			unsigned int hval = ip6qhashfn(q->id,
135						       &q->saddr,
136						       &q->daddr);
137			if (hval != i) {
138				hlist_del(&q->q.list);
139				/* Relink to new hash chain. */
140				hlist_add_head(&q->q.list,
141					       &nf_frags.hash[hval]);
142			}
143		}
144	}
145	write_unlock(&nf_frags.lock);
146
147	mod_timer(&nf_frags.secret_timer, now + nf_ct_frag6_secret_interval);
148}
149
150/* Memory Tracking Functions. */
151static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
152{
153	if (work)
154		*work -= skb->truesize;
155	atomic_sub(skb->truesize, &nf_frags.mem);
156	if (NFCT_FRAG6_CB(skb)->orig)
157		kfree_skb(NFCT_FRAG6_CB(skb)->orig);
158
159	kfree_skb(skb);
160}
161
162static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
163				   unsigned int *work)
164{
165	if (work)
166		*work -= sizeof(struct nf_ct_frag6_queue);
167	atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
168	kfree(fq);
169}
170
171static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
172{
173	struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
174
175	if (!fq)
176		return NULL;
177	atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem);
178	return fq;
179}
180
181/* Destruction primitives. */
182
183/* Complete destruction of fq. */
184static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
185				unsigned int *work)
186{
187	struct sk_buff *fp;
188
189	BUG_TRAP(fq->q.last_in&COMPLETE);
190	BUG_TRAP(del_timer(&fq->q.timer) == 0);
191
192	/* Release all fragment data. */
193	fp = fq->q.fragments;
194	while (fp) {
195		struct sk_buff *xp = fp->next;
196
197		frag_kfree_skb(fp, work);
198		fp = xp;
199	}
200
201	frag_free_queue(fq, work);
202}
203
204static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
205{
206	if (atomic_dec_and_test(&fq->q.refcnt))
207		nf_ct_frag6_destroy(fq, work);
208}
209
210/* Kill fq entry. It is not destroyed immediately,
211 * because caller (and someone more) holds reference count.
212 */
213static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
214{
215	if (del_timer(&fq->q.timer))
216		atomic_dec(&fq->q.refcnt);
217
218	if (!(fq->q.last_in & COMPLETE)) {
219		fq_unlink(fq);
220		atomic_dec(&fq->q.refcnt);
221		fq->q.last_in |= COMPLETE;
222	}
223}
224
225static void nf_ct_frag6_evictor(void)
226{
227	struct nf_ct_frag6_queue *fq;
228	struct list_head *tmp;
229	unsigned int work;
230
231	work = atomic_read(&nf_frags.mem);
232	if (work <= nf_ct_frag6_low_thresh)
233		return;
234
235	work -= nf_ct_frag6_low_thresh;
236	while (work > 0) {
237		read_lock(&nf_frags.lock);
238		if (list_empty(&nf_frags.lru_list)) {
239			read_unlock(&nf_frags.lock);
240			return;
241		}
242		tmp = nf_frags.lru_list.next;
243		BUG_ON(tmp == NULL);
244		fq = list_entry(tmp, struct nf_ct_frag6_queue, q.lru_list);
245		atomic_inc(&fq->q.refcnt);
246		read_unlock(&nf_frags.lock);
247
248		spin_lock(&fq->q.lock);
249		if (!(fq->q.last_in&COMPLETE))
250			fq_kill(fq);
251		spin_unlock(&fq->q.lock);
252
253		fq_put(fq, &work);
254	}
255}
256
257static void nf_ct_frag6_expire(unsigned long data)
258{
259	struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
260
261	spin_lock(&fq->q.lock);
262
263	if (fq->q.last_in & COMPLETE)
264		goto out;
265
266	fq_kill(fq);
267
268out:
269	spin_unlock(&fq->q.lock);
270	fq_put(fq, NULL);
271}
272
273/* Creation primitives. */
274
275static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
276					  struct nf_ct_frag6_queue *fq_in)
277{
278	struct nf_ct_frag6_queue *fq;
279#ifdef CONFIG_SMP
280	struct hlist_node *n;
281#endif
282
283	write_lock(&nf_frags.lock);
284#ifdef CONFIG_SMP
285	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
286		if (fq->id == fq_in->id &&
287		    ipv6_addr_equal(&fq_in->saddr, &fq->saddr) &&
288		    ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) {
289			atomic_inc(&fq->q.refcnt);
290			write_unlock(&nf_frags.lock);
291			fq_in->q.last_in |= COMPLETE;
292			fq_put(fq_in, NULL);
293			return fq;
294		}
295	}
296#endif
297	fq = fq_in;
298
299	if (!mod_timer(&fq->q.timer, jiffies + nf_ct_frag6_timeout))
300		atomic_inc(&fq->q.refcnt);
301
302	atomic_inc(&fq->q.refcnt);
303	hlist_add_head(&fq->q.list, &nf_frags.hash[hash]);
304	INIT_LIST_HEAD(&fq->q.lru_list);
305	list_add_tail(&fq->q.lru_list, &nf_frags.lru_list);
306	nf_frags.nqueues++;
307	write_unlock(&nf_frags.lock);
308	return fq;
309}
310
311
312static struct nf_ct_frag6_queue *
313nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src,				   struct in6_addr *dst)
314{
315	struct nf_ct_frag6_queue *fq;
316
317	if ((fq = frag_alloc_queue()) == NULL) {
318		pr_debug("Can't alloc new queue\n");
319		goto oom;
320	}
321
322	memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
323
324	fq->id = id;
325	ipv6_addr_copy(&fq->saddr, src);
326	ipv6_addr_copy(&fq->daddr, dst);
327
328	setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq);
329	spin_lock_init(&fq->q.lock);
330	atomic_set(&fq->q.refcnt, 1);
331
332	return nf_ct_frag6_intern(hash, fq);
333
334oom:
335	return NULL;
336}
337
338static __inline__ struct nf_ct_frag6_queue *
339fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst)
340{
341	struct nf_ct_frag6_queue *fq;
342	struct hlist_node *n;
343	unsigned int hash = ip6qhashfn(id, src, dst);
344
345	read_lock(&nf_frags.lock);
346	hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) {
347		if (fq->id == id &&
348		    ipv6_addr_equal(src, &fq->saddr) &&
349		    ipv6_addr_equal(dst, &fq->daddr)) {
350			atomic_inc(&fq->q.refcnt);
351			read_unlock(&nf_frags.lock);
352			return fq;
353		}
354	}
355	read_unlock(&nf_frags.lock);
356
357	return nf_ct_frag6_create(hash, id, src, dst);
358}
359
360
361static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
362			     struct frag_hdr *fhdr, int nhoff)
363{
364	struct sk_buff *prev, *next;
365	int offset, end;
366
367	if (fq->q.last_in & COMPLETE) {
368		pr_debug("Allready completed\n");
369		goto err;
370	}
371
372	offset = ntohs(fhdr->frag_off) & ~0x7;
373	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
374			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
375
376	if ((unsigned int)end > IPV6_MAXPLEN) {
377		pr_debug("offset is too large.\n");
378		return -1;
379	}
380
381	if (skb->ip_summed == CHECKSUM_COMPLETE) {
382		const unsigned char *nh = skb_network_header(skb);
383		skb->csum = csum_sub(skb->csum,
384				     csum_partial(nh, (u8 *)(fhdr + 1) - nh,
385						  0));
386	}
387
388	/* Is this the final fragment? */
389	if (!(fhdr->frag_off & htons(IP6_MF))) {
390		/* If we already have some bits beyond end
391		 * or have different end, the segment is corrupted.
392		 */
393		if (end < fq->q.len ||
394		    ((fq->q.last_in & LAST_IN) && end != fq->q.len)) {
395			pr_debug("already received last fragment\n");
396			goto err;
397		}
398		fq->q.last_in |= LAST_IN;
399		fq->q.len = end;
400	} else {
401		/* Check if the fragment is rounded to 8 bytes.
402		 * Required by the RFC.
403		 */
404		if (end & 0x7) {
405			/* RFC2460 says always send parameter problem in
406			 * this case. -DaveM
407			 */
408			pr_debug("end of fragment not rounded to 8 bytes.\n");
409			return -1;
410		}
411		if (end > fq->q.len) {
412			/* Some bits beyond end -> corruption. */
413			if (fq->q.last_in & LAST_IN) {
414				pr_debug("last packet already reached.\n");
415				goto err;
416			}
417			fq->q.len = end;
418		}
419	}
420
421	if (end == offset)
422		goto err;
423
424	/* Point into the IP datagram 'data' part. */
425	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
426		pr_debug("queue: message is too short.\n");
427		goto err;
428	}
429	if (pskb_trim_rcsum(skb, end - offset)) {
430		pr_debug("Can't trim\n");
431		goto err;
432	}
433
434	/* Find out which fragments are in front and at the back of us
435	 * in the chain of fragments so far.  We must know where to put
436	 * this fragment, right?
437	 */
438	prev = NULL;
439	for (next = fq->q.fragments; next != NULL; next = next->next) {
440		if (NFCT_FRAG6_CB(next)->offset >= offset)
441			break;	/* bingo! */
442		prev = next;
443	}
444
445	/* We found where to put this one.  Check for overlap with
446	 * preceding fragment, and, if needed, align things so that
447	 * any overlaps are eliminated.
448	 */
449	if (prev) {
450		int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
451
452		if (i > 0) {
453			offset += i;
454			if (end <= offset) {
455				pr_debug("overlap\n");
456				goto err;
457			}
458			if (!pskb_pull(skb, i)) {
459				pr_debug("Can't pull\n");
460				goto err;
461			}
462			if (skb->ip_summed != CHECKSUM_UNNECESSARY)
463				skb->ip_summed = CHECKSUM_NONE;
464		}
465	}
466
467	/* Look for overlap with succeeding segments.
468	 * If we can merge fragments, do it.
469	 */
470	while (next && NFCT_FRAG6_CB(next)->offset < end) {
471		/* overlap is 'i' bytes */
472		int i = end - NFCT_FRAG6_CB(next)->offset;
473
474		if (i < next->len) {
475			/* Eat head of the next overlapped fragment
476			 * and leave the loop. The next ones cannot overlap.
477			 */
478			pr_debug("Eat head of the overlapped parts.: %d", i);
479			if (!pskb_pull(next, i))
480				goto err;
481
482			/* next fragment */
483			NFCT_FRAG6_CB(next)->offset += i;
484			fq->q.meat -= i;
485			if (next->ip_summed != CHECKSUM_UNNECESSARY)
486				next->ip_summed = CHECKSUM_NONE;
487			break;
488		} else {
489			struct sk_buff *free_it = next;
490
491			/* Old fragmnet is completely overridden with
492			 * new one drop it.
493			 */
494			next = next->next;
495
496			if (prev)
497				prev->next = next;
498			else
499				fq->q.fragments = next;
500
501			fq->q.meat -= free_it->len;
502			frag_kfree_skb(free_it, NULL);
503		}
504	}
505
506	NFCT_FRAG6_CB(skb)->offset = offset;
507
508	/* Insert this fragment in the chain of fragments. */
509	skb->next = next;
510	if (prev)
511		prev->next = skb;
512	else
513		fq->q.fragments = skb;
514
515	skb->dev = NULL;
516	fq->q.stamp = skb->tstamp;
517	fq->q.meat += skb->len;
518	atomic_add(skb->truesize, &nf_frags.mem);
519
520	/* The first fragment.
521	 * nhoffset is obtained from the first fragment, of course.
522	 */
523	if (offset == 0) {
524		fq->nhoffset = nhoff;
525		fq->q.last_in |= FIRST_IN;
526	}
527	write_lock(&nf_frags.lock);
528	list_move_tail(&fq->q.lru_list, &nf_frags.lru_list);
529	write_unlock(&nf_frags.lock);
530	return 0;
531
532err:
533	return -1;
534}
535
536/*
537 *	Check if this packet is complete.
538 *	Returns NULL on failure by any reason, and pointer
539 *	to current nexthdr field in reassembled frame.
540 *
541 *	It is called with locked fq, and caller must check that
542 *	queue is eligible for reassembly i.e. it is not COMPLETE,
543 *	the last and the first frames arrived and all the bits are here.
544 */
545static struct sk_buff *
546nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
547{
548	struct sk_buff *fp, *op, *head = fq->q.fragments;
549	int    payload_len;
550
551	fq_kill(fq);
552
553	BUG_TRAP(head != NULL);
554	BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
555
556	/* Unfragmented part is taken from the first segment. */
557	payload_len = ((head->data - skb_network_header(head)) -
558		       sizeof(struct ipv6hdr) + fq->q.len -
559		       sizeof(struct frag_hdr));
560	if (payload_len > IPV6_MAXPLEN) {
561		pr_debug("payload len is too large.\n");
562		goto out_oversize;
563	}
564
565	/* Head of list must not be cloned. */
566	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
567		pr_debug("skb is cloned but can't expand head");
568		goto out_oom;
569	}
570
571	/* If the first fragment is fragmented itself, we split
572	 * it to two chunks: the first with data and paged part
573	 * and the second, holding only fragments. */
574	if (skb_shinfo(head)->frag_list) {
575		struct sk_buff *clone;
576		int i, plen = 0;
577
578		if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
579			pr_debug("Can't alloc skb\n");
580			goto out_oom;
581		}
582		clone->next = head->next;
583		head->next = clone;
584		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
585		skb_shinfo(head)->frag_list = NULL;
586		for (i=0; i<skb_shinfo(head)->nr_frags; i++)
587			plen += skb_shinfo(head)->frags[i].size;
588		clone->len = clone->data_len = head->data_len - plen;
589		head->data_len -= clone->len;
590		head->len -= clone->len;
591		clone->csum = 0;
592		clone->ip_summed = head->ip_summed;
593
594		NFCT_FRAG6_CB(clone)->orig = NULL;
595		atomic_add(clone->truesize, &nf_frags.mem);
596	}
597
598	/* We have to remove fragment header from datagram and to relocate
599	 * header in order to calculate ICV correctly. */
600	skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
601	memmove(head->head + sizeof(struct frag_hdr), head->head,
602		(head->data - head->head) - sizeof(struct frag_hdr));
603	head->mac_header += sizeof(struct frag_hdr);
604	head->network_header += sizeof(struct frag_hdr);
605
606	skb_shinfo(head)->frag_list = head->next;
607	skb_reset_transport_header(head);
608	skb_push(head, head->data - skb_network_header(head));
609	atomic_sub(head->truesize, &nf_frags.mem);
610
611	for (fp=head->next; fp; fp = fp->next) {
612		head->data_len += fp->len;
613		head->len += fp->len;
614		if (head->ip_summed != fp->ip_summed)
615			head->ip_summed = CHECKSUM_NONE;
616		else if (head->ip_summed == CHECKSUM_COMPLETE)
617			head->csum = csum_add(head->csum, fp->csum);
618		head->truesize += fp->truesize;
619		atomic_sub(fp->truesize, &nf_frags.mem);
620	}
621
622	head->next = NULL;
623	head->dev = dev;
624	head->tstamp = fq->q.stamp;
625	ipv6_hdr(head)->payload_len = htons(payload_len);
626
627	/* Yes, and fold redundant checksum back. 8) */
628	if (head->ip_summed == CHECKSUM_COMPLETE)
629		head->csum = csum_partial(skb_network_header(head),
630					  skb_network_header_len(head),
631					  head->csum);
632
633	fq->q.fragments = NULL;
634
635	/* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
636	fp = skb_shinfo(head)->frag_list;
637	if (NFCT_FRAG6_CB(fp)->orig == NULL)
638		/* at above code, head skb is divided into two skbs. */
639		fp = fp->next;
640
641	op = NFCT_FRAG6_CB(head)->orig;
642	for (; fp; fp = fp->next) {
643		struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
644
645		op->next = orig;
646		op = orig;
647		NFCT_FRAG6_CB(fp)->orig = NULL;
648	}
649
650	return head;
651
652out_oversize:
653	if (net_ratelimit())
654		printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
655	goto out_fail;
656out_oom:
657	if (net_ratelimit())
658		printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
659out_fail:
660	return NULL;
661}
662
663/*
664 * find the header just before Fragment Header.
665 *
666 * if success return 0 and set ...
667 * (*prevhdrp): the value of "Next Header Field" in the header
668 *		just before Fragment Header.
669 * (*prevhoff): the offset of "Next Header Field" in the header
670 *		just before Fragment Header.
671 * (*fhoff)   : the offset of Fragment Header.
672 *
673 * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
674 *
675 */
676static int
677find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
678{
679	u8 nexthdr = ipv6_hdr(skb)->nexthdr;
680	const int netoff = skb_network_offset(skb);
681	u8 prev_nhoff = netoff + offsetof(struct ipv6hdr, nexthdr);
682	int start = netoff + sizeof(struct ipv6hdr);
683	int len = skb->len - start;
684	u8 prevhdr = NEXTHDR_IPV6;
685
686	while (nexthdr != NEXTHDR_FRAGMENT) {
687		struct ipv6_opt_hdr hdr;
688		int hdrlen;
689
690		if (!ipv6_ext_hdr(nexthdr)) {
691			return -1;
692		}
693		if (len < (int)sizeof(struct ipv6_opt_hdr)) {
694			pr_debug("too short\n");
695			return -1;
696		}
697		if (nexthdr == NEXTHDR_NONE) {
698			pr_debug("next header is none\n");
699			return -1;
700		}
701		if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
702			BUG();
703		if (nexthdr == NEXTHDR_AUTH)
704			hdrlen = (hdr.hdrlen+2)<<2;
705		else
706			hdrlen = ipv6_optlen(&hdr);
707
708		prevhdr = nexthdr;
709		prev_nhoff = start;
710
711		nexthdr = hdr.nexthdr;
712		len -= hdrlen;
713		start += hdrlen;
714	}
715
716	if (len < 0)
717		return -1;
718
719	*prevhdrp = prevhdr;
720	*prevhoff = prev_nhoff;
721	*fhoff = start;
722
723	return 0;
724}
725
726struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
727{
728	struct sk_buff *clone;
729	struct net_device *dev = skb->dev;
730	struct frag_hdr *fhdr;
731	struct nf_ct_frag6_queue *fq;
732	struct ipv6hdr *hdr;
733	int fhoff, nhoff;
734	u8 prevhdr;
735	struct sk_buff *ret_skb = NULL;
736
737	/* Jumbo payload inhibits frag. header */
738	if (ipv6_hdr(skb)->payload_len == 0) {
739		pr_debug("payload len = 0\n");
740		return skb;
741	}
742
743	if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
744		return skb;
745
746	clone = skb_clone(skb, GFP_ATOMIC);
747	if (clone == NULL) {
748		pr_debug("Can't clone skb\n");
749		return skb;
750	}
751
752	NFCT_FRAG6_CB(clone)->orig = skb;
753
754	if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
755		pr_debug("message is too short.\n");
756		goto ret_orig;
757	}
758
759	skb_set_transport_header(clone, fhoff);
760	hdr = ipv6_hdr(clone);
761	fhdr = (struct frag_hdr *)skb_transport_header(clone);
762
763	if (!(fhdr->frag_off & htons(0xFFF9))) {
764		pr_debug("Invalid fragment offset\n");
765		/* It is not a fragmented frame */
766		goto ret_orig;
767	}
768
769	if (atomic_read(&nf_frags.mem) > nf_ct_frag6_high_thresh)
770		nf_ct_frag6_evictor();
771
772	fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
773	if (fq == NULL) {
774		pr_debug("Can't find and can't create new queue\n");
775		goto ret_orig;
776	}
777
778	spin_lock(&fq->q.lock);
779
780	if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
781		spin_unlock(&fq->q.lock);
782		pr_debug("Can't insert skb to queue\n");
783		fq_put(fq, NULL);
784		goto ret_orig;
785	}
786
787	if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) {
788		ret_skb = nf_ct_frag6_reasm(fq, dev);
789		if (ret_skb == NULL)
790			pr_debug("Can't reassemble fragmented packets\n");
791	}
792	spin_unlock(&fq->q.lock);
793
794	fq_put(fq, NULL);
795	return ret_skb;
796
797ret_orig:
798	kfree_skb(clone);
799	return skb;
800}
801
802void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
803			struct net_device *in, struct net_device *out,
804			int (*okfn)(struct sk_buff *))
805{
806	struct sk_buff *s, *s2;
807
808	for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
809		nf_conntrack_put_reasm(s->nfct_reasm);
810		nf_conntrack_get_reasm(skb);
811		s->nfct_reasm = skb;
812
813		s2 = s->next;
814		s->next = NULL;
815
816		NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
817			       NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
818		s = s2;
819	}
820	nf_conntrack_put_reasm(skb);
821}
822
823int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
824{
825	struct sk_buff *s, *s2;
826
827	for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
828
829		s2 = s->next;
830		kfree_skb(s);
831	}
832
833	kfree_skb(skb);
834
835	return 0;
836}
837
838int nf_ct_frag6_init(void)
839{
840	setup_timer(&nf_frags.secret_timer, nf_ct_frag6_secret_rebuild, 0);
841	nf_frags.secret_timer.expires = jiffies
842					   + nf_ct_frag6_secret_interval;
843	add_timer(&nf_frags.secret_timer);
844
845	inet_frags_init(&nf_frags);
846
847	return 0;
848}
849
850void nf_ct_frag6_cleanup(void)
851{
852	inet_frags_fini(&nf_frags);
853
854	del_timer(&nf_frags.secret_timer);
855	nf_ct_frag6_low_thresh = 0;
856	nf_ct_frag6_evictor();
857}
858