1/*
2 * xt_HMARK - Netfilter module to set mark by means of hashing
3 *
4 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
5 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 */
11
12#include <linux/module.h>
13#include <linux/skbuff.h>
14#include <linux/icmp.h>
15
16#include <linux/netfilter/x_tables.h>
17#include <linux/netfilter/xt_HMARK.h>
18
19#include <net/ip.h>
20#if IS_ENABLED(CONFIG_NF_CONNTRACK)
21#include <net/netfilter/nf_conntrack.h>
22#endif
23#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
24#include <net/ipv6.h>
25#include <linux/netfilter_ipv6/ip6_tables.h>
26#endif
27
28MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
30MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
31MODULE_ALIAS("ipt_HMARK");
32MODULE_ALIAS("ip6t_HMARK");
33
34struct hmark_tuple {
35	__be32			src;
36	__be32			dst;
37	union hmark_ports	uports;
38	u8			proto;
39};
40
41static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
42{
43	return (addr32[0] & mask[0]) ^
44	       (addr32[1] & mask[1]) ^
45	       (addr32[2] & mask[2]) ^
46	       (addr32[3] & mask[3]);
47}
48
49static inline __be32
50hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
51{
52	switch (l3num) {
53	case AF_INET:
54		return *addr32 & *mask;
55	case AF_INET6:
56		return hmark_addr6_mask(addr32, mask);
57	}
58	return 0;
59}
60
61static inline void hmark_swap_ports(union hmark_ports *uports,
62				    const struct xt_hmark_info *info)
63{
64	union hmark_ports hp;
65	u16 src, dst;
66
67	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
68	src = ntohs(hp.b16.src);
69	dst = ntohs(hp.b16.dst);
70
71	if (dst > src)
72		uports->v32 = (dst << 16) | src;
73	else
74		uports->v32 = (src << 16) | dst;
75}
76
77static int
78hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
79		    const struct xt_hmark_info *info)
80{
81#if IS_ENABLED(CONFIG_NF_CONNTRACK)
82	enum ip_conntrack_info ctinfo;
83	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
84	struct nf_conntrack_tuple *otuple;
85	struct nf_conntrack_tuple *rtuple;
86
87	if (ct == NULL || nf_ct_is_untracked(ct))
88		return -1;
89
90	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
91	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
92
93	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
94				 info->src_mask.ip6);
95	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
96				 info->dst_mask.ip6);
97
98	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
99		return 0;
100
101	t->proto = nf_ct_protonum(ct);
102	if (t->proto != IPPROTO_ICMP) {
103		t->uports.b16.src = otuple->src.u.all;
104		t->uports.b16.dst = rtuple->src.u.all;
105		hmark_swap_ports(&t->uports, info);
106	}
107
108	return 0;
109#else
110	return -1;
111#endif
112}
113
114/* This hash function is endian independent, to ensure consistent hashing if
115 * the cluster is composed of big and little endian systems. */
116static inline u32
117hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
118{
119	u32 hash;
120	u32 src = ntohl(t->src);
121	u32 dst = ntohl(t->dst);
122
123	if (dst < src)
124		swap(src, dst);
125
126	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
127	hash = hash ^ (t->proto & info->proto_mask);
128
129	return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
130}
131
132static void
133hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
134		      struct hmark_tuple *t, const struct xt_hmark_info *info)
135{
136	int protoff;
137
138	protoff = proto_ports_offset(t->proto);
139	if (protoff < 0)
140		return;
141
142	nhoff += protoff;
143	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
144		return;
145
146	hmark_swap_ports(&t->uports, info);
147}
148
149#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
150static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
151{
152	struct icmp6hdr *icmp6h, _ih6;
153
154	icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
155	if (icmp6h == NULL)
156		return 0;
157
158	if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
159		*offset += sizeof(struct icmp6hdr);
160		return 1;
161	}
162	return 0;
163}
164
165static int
166hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
167			  const struct xt_hmark_info *info)
168{
169	struct ipv6hdr *ip6, _ip6;
170	int flag = IP6_FH_F_AUTH;
171	unsigned int nhoff = 0;
172	u16 fragoff = 0;
173	int nexthdr;
174
175	ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
176	nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
177	if (nexthdr < 0)
178		return 0;
179	/* No need to check for icmp errors on fragments */
180	if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
181		goto noicmp;
182	/* Use inner header in case of ICMP errors */
183	if (get_inner6_hdr(skb, &nhoff)) {
184		ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
185		if (ip6 == NULL)
186			return -1;
187		/* If AH present, use SPI like in ESP. */
188		flag = IP6_FH_F_AUTH;
189		nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
190		if (nexthdr < 0)
191			return -1;
192	}
193noicmp:
194	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
195	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
196
197	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
198		return 0;
199
200	t->proto = nexthdr;
201	if (t->proto == IPPROTO_ICMPV6)
202		return 0;
203
204	if (flag & IP6_FH_F_FRAG)
205		return 0;
206
207	hmark_set_tuple_ports(skb, nhoff, t, info);
208	return 0;
209}
210
211static unsigned int
212hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
213{
214	const struct xt_hmark_info *info = par->targinfo;
215	struct hmark_tuple t;
216
217	memset(&t, 0, sizeof(struct hmark_tuple));
218
219	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
220		if (hmark_ct_set_htuple(skb, &t, info) < 0)
221			return XT_CONTINUE;
222	} else {
223		if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
224			return XT_CONTINUE;
225	}
226
227	skb->mark = hmark_hash(&t, info);
228	return XT_CONTINUE;
229}
230#endif
231
232static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
233{
234	const struct icmphdr *icmph;
235	struct icmphdr _ih;
236
237	/* Not enough header? */
238	icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
239	if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
240		return 0;
241
242	/* Error message? */
243	if (icmph->type != ICMP_DEST_UNREACH &&
244	    icmph->type != ICMP_SOURCE_QUENCH &&
245	    icmph->type != ICMP_TIME_EXCEEDED &&
246	    icmph->type != ICMP_PARAMETERPROB &&
247	    icmph->type != ICMP_REDIRECT)
248		return 0;
249
250	*nhoff += iphsz + sizeof(_ih);
251	return 1;
252}
253
254static int
255hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
256			  const struct xt_hmark_info *info)
257{
258	struct iphdr *ip, _ip;
259	int nhoff = skb_network_offset(skb);
260
261	ip = (struct iphdr *) (skb->data + nhoff);
262	if (ip->protocol == IPPROTO_ICMP) {
263		/* Use inner header in case of ICMP errors */
264		if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
265			ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
266			if (ip == NULL)
267				return -1;
268		}
269	}
270
271	t->src = ip->saddr & info->src_mask.ip;
272	t->dst = ip->daddr & info->dst_mask.ip;
273
274	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
275		return 0;
276
277	t->proto = ip->protocol;
278
279	/* ICMP has no ports, skip */
280	if (t->proto == IPPROTO_ICMP)
281		return 0;
282
283	/* follow-up fragments don't contain ports, skip all fragments */
284	if (ip->frag_off & htons(IP_MF | IP_OFFSET))
285		return 0;
286
287	hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
288
289	return 0;
290}
291
292static unsigned int
293hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
294{
295	const struct xt_hmark_info *info = par->targinfo;
296	struct hmark_tuple t;
297
298	memset(&t, 0, sizeof(struct hmark_tuple));
299
300	if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
301		if (hmark_ct_set_htuple(skb, &t, info) < 0)
302			return XT_CONTINUE;
303	} else {
304		if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
305			return XT_CONTINUE;
306	}
307
308	skb->mark = hmark_hash(&t, info);
309	return XT_CONTINUE;
310}
311
312static int hmark_tg_check(const struct xt_tgchk_param *par)
313{
314	const struct xt_hmark_info *info = par->targinfo;
315
316	if (!info->hmodulus) {
317		pr_info("xt_HMARK: hash modulus can't be zero\n");
318		return -EINVAL;
319	}
320	if (info->proto_mask &&
321	    (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) {
322		pr_info("xt_HMARK: proto mask must be zero with L3 mode\n");
323		return -EINVAL;
324	}
325	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
326	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
327			     XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) {
328		pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n");
329		return -EINVAL;
330	}
331	if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
332	    (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
333			     XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
334		pr_info("xt_HMARK: spi-set and port-set can't be combined\n");
335		return -EINVAL;
336	}
337	return 0;
338}
339
340static struct xt_target hmark_tg_reg[] __read_mostly = {
341	{
342		.name		= "HMARK",
343		.family		= NFPROTO_IPV4,
344		.target		= hmark_tg_v4,
345		.targetsize	= sizeof(struct xt_hmark_info),
346		.checkentry	= hmark_tg_check,
347		.me		= THIS_MODULE,
348	},
349#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
350	{
351		.name		= "HMARK",
352		.family		= NFPROTO_IPV6,
353		.target		= hmark_tg_v6,
354		.targetsize	= sizeof(struct xt_hmark_info),
355		.checkentry	= hmark_tg_check,
356		.me		= THIS_MODULE,
357	},
358#endif
359};
360
361static int __init hmark_tg_init(void)
362{
363	return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
364}
365
366static void __exit hmark_tg_exit(void)
367{
368	xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
369}
370
371module_init(hmark_tg_init);
372module_exit(hmark_tg_exit);
373