1/*
2 * Kernel iptables module to track stats for packets based on user tags.
3 *
4 * (C) 2011 Google, Inc
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10#ifndef __XT_QTAGUID_INTERNAL_H__
11#define __XT_QTAGUID_INTERNAL_H__
12
13#include <linux/types.h>
14#include <linux/rbtree.h>
15#include <linux/spinlock_types.h>
16#include <linux/workqueue.h>
17
18/* Iface handling */
19#define IDEBUG_MASK (1<<0)
20/* Iptable Matching. Per packet. */
21#define MDEBUG_MASK (1<<1)
22/* Red-black tree handling. Per packet. */
23#define RDEBUG_MASK (1<<2)
24/* procfs ctrl/stats handling */
25#define CDEBUG_MASK (1<<3)
26/* dev and resource tracking */
27#define DDEBUG_MASK (1<<4)
28
29/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */
30#define DEFAULT_DEBUG_MASK 0
31
32/*
33 * (Un)Define these *DEBUG to compile out/in the pr_debug calls.
34 * All undef: text size ~ 0x3030; all def: ~ 0x4404.
35 */
36#define IDEBUG
37#define MDEBUG
38#define RDEBUG
39#define CDEBUG
40#define DDEBUG
41
42#define MSK_DEBUG(mask, ...) do {                           \
43		if (unlikely(qtaguid_debug_mask & (mask)))  \
44			pr_debug(__VA_ARGS__);              \
45	} while (0)
46#ifdef IDEBUG
47#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__)
48#else
49#define IF_DEBUG(...) no_printk(__VA_ARGS__)
50#endif
51#ifdef MDEBUG
52#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__)
53#else
54#define MT_DEBUG(...) no_printk(__VA_ARGS__)
55#endif
56#ifdef RDEBUG
57#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__)
58#else
59#define RB_DEBUG(...) no_printk(__VA_ARGS__)
60#endif
61#ifdef CDEBUG
62#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__)
63#else
64#define CT_DEBUG(...) no_printk(__VA_ARGS__)
65#endif
66#ifdef DDEBUG
67#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__)
68#else
69#define DR_DEBUG(...) no_printk(__VA_ARGS__)
70#endif
71
72extern uint qtaguid_debug_mask;
73
74/*---------------------------------------------------------------------------*/
75/*
76 * Tags:
77 *
78 * They represent what the data usage counters will be tracked against.
79 * By default a tag is just based on the UID.
80 * The UID is used as the base for policing, and can not be ignored.
81 * So a tag will always at least represent a UID (uid_tag).
82 *
83 * A tag can be augmented with an "accounting tag" which is associated
84 * with a UID.
85 * User space can set the acct_tag portion of the tag which is then used
86 * with sockets: all data belonging to that socket will be counted against the
87 * tag. The policing is then based on the tag's uid_tag portion,
88 * and stats are collected for the acct_tag portion separately.
89 *
90 * There could be
91 * a:  {acct_tag=1, uid_tag=10003}
92 * b:  {acct_tag=2, uid_tag=10003}
93 * c:  {acct_tag=3, uid_tag=10003}
94 * d:  {acct_tag=0, uid_tag=10003}
95 * a, b, and c represent tags associated with specific sockets.
96 * d is for the totals for that uid, including all untagged traffic.
97 * Typically d is used with policing/quota rules.
98 *
99 * We want tag_t big enough to distinguish uid_t and acct_tag.
100 * It might become a struct if needed.
101 * Nothing should be using it as an int.
102 */
103typedef uint64_t tag_t;  /* Only used via accessors */
104
105#define TAG_UID_MASK 0xFFFFFFFFULL
106#define TAG_ACCT_MASK (~0xFFFFFFFFULL)
107
108static inline int tag_compare(tag_t t1, tag_t t2)
109{
110	return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
111}
112
113static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
114{
115	return acct_tag | uid;
116}
117static inline tag_t make_tag_from_uid(uid_t uid)
118{
119	return uid;
120}
121static inline uid_t get_uid_from_tag(tag_t tag)
122{
123	return tag & TAG_UID_MASK;
124}
125static inline tag_t get_utag_from_tag(tag_t tag)
126{
127	return tag & TAG_UID_MASK;
128}
129static inline tag_t get_atag_from_tag(tag_t tag)
130{
131	return tag & TAG_ACCT_MASK;
132}
133
134static inline bool valid_atag(tag_t tag)
135{
136	return !(tag & TAG_UID_MASK);
137}
138static inline tag_t make_atag_from_value(uint32_t value)
139{
140	return (uint64_t)value << 32;
141}
142/*---------------------------------------------------------------------------*/
143
144/*
145 * Maximum number of socket tags that a UID is allowed to have active.
146 * Multiple processes belonging to the same UID contribute towards this limit.
147 * Special UIDs that can impersonate a UID also contribute (e.g. download
148 * manager, ...)
149 */
150#define DEFAULT_MAX_SOCK_TAGS 1024
151
152/*
153 * For now we only track 2 sets of counters.
154 * The default set is 0.
155 * Userspace can activate another set for a given uid being tracked.
156 */
157#define IFS_MAX_COUNTER_SETS 2
158
159enum ifs_tx_rx {
160	IFS_TX,
161	IFS_RX,
162	IFS_MAX_DIRECTIONS
163};
164
165/* For now, TCP, UDP, the rest */
166enum ifs_proto {
167	IFS_TCP,
168	IFS_UDP,
169	IFS_PROTO_OTHER,
170	IFS_MAX_PROTOS
171};
172
173struct byte_packet_counters {
174	uint64_t bytes;
175	uint64_t packets;
176};
177
178struct data_counters {
179	struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
180};
181
182static inline uint64_t dc_sum_bytes(struct data_counters *counters,
183				    int set,
184				    enum ifs_tx_rx direction)
185{
186	return counters->bpc[set][direction][IFS_TCP].bytes
187		+ counters->bpc[set][direction][IFS_UDP].bytes
188		+ counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
189}
190
191static inline uint64_t dc_sum_packets(struct data_counters *counters,
192				      int set,
193				      enum ifs_tx_rx direction)
194{
195	return counters->bpc[set][direction][IFS_TCP].packets
196		+ counters->bpc[set][direction][IFS_UDP].packets
197		+ counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
198}
199
200
201/* Generic X based nodes used as a base for rb_tree ops */
202struct tag_node {
203	struct rb_node node;
204	tag_t tag;
205};
206
207struct tag_stat {
208	struct tag_node tn;
209	struct data_counters counters;
210	/*
211	 * If this tag is acct_tag based, we need to count against the
212	 * matching parent uid_tag.
213	 */
214	struct data_counters *parent_counters;
215};
216
217struct iface_stat {
218	struct list_head list;  /* in iface_stat_list */
219	char *ifname;
220	bool active;
221	/* net_dev is only valid for active iface_stat */
222	struct net_device *net_dev;
223
224	struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
225	struct data_counters totals_via_skb;
226	/*
227	 * We keep the last_known, because some devices reset their counters
228	 * just before NETDEV_UP, while some will reset just before
229	 * NETDEV_REGISTER (which is more normal).
230	 * So now, if the device didn't do a NETDEV_UNREGISTER and we see
231	 * its current dev stats smaller that what was previously known, we
232	 * assume an UNREGISTER and just use the last_known.
233	 */
234	struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS];
235	/* last_known is usable when last_known_valid is true */
236	bool last_known_valid;
237
238	struct proc_dir_entry *proc_ptr;
239
240	struct rb_root tag_stat_tree;
241	spinlock_t tag_stat_list_lock;
242};
243
244/* This is needed to create proc_dir_entries from atomic context. */
245struct iface_stat_work {
246	struct work_struct iface_work;
247	struct iface_stat *iface_entry;
248};
249
250/*
251 * Track tag that this socket is transferring data for, and not necessarily
252 * the uid that owns the socket.
253 * This is the tag against which tag_stat.counters will be billed.
254 * These structs need to be looked up by sock and pid.
255 */
256struct sock_tag {
257	struct rb_node sock_node;
258	struct sock *sk;  /* Only used as a number, never dereferenced */
259	/* The socket is needed for sockfd_put() */
260	struct socket *socket;
261	/* Used to associate with a given pid */
262	struct list_head list;   /* in proc_qtu_data.sock_tag_list */
263	pid_t pid;
264
265	tag_t tag;
266};
267
268struct qtaguid_event_counts {
269	/* Various successful events */
270	atomic64_t sockets_tagged;
271	atomic64_t sockets_untagged;
272	atomic64_t counter_set_changes;
273	atomic64_t delete_cmds;
274	atomic64_t iface_events;  /* Number of NETDEV_* events handled */
275
276	atomic64_t match_calls;   /* Number of times iptables called mt */
277	/* Number of times iptables called mt from pre or post routing hooks */
278	atomic64_t match_calls_prepost;
279	/*
280	 * match_found_sk_*: numbers related to the netfilter matching
281	 * function finding a sock for the sk_buff.
282	 * Total skbs processed is sum(match_found*).
283	 */
284	atomic64_t match_found_sk;   /* An sk was already in the sk_buff. */
285	/* The connection tracker had or didn't have the sk. */
286	atomic64_t match_found_sk_in_ct;
287	atomic64_t match_found_no_sk_in_ct;
288	/*
289	 * No sk could be found. No apparent owner. Could happen with
290	 * unsolicited traffic.
291	 */
292	atomic64_t match_no_sk;
293	/*
294	 * The file ptr in the sk_socket wasn't there.
295	 * This might happen for traffic while the socket is being closed.
296	 */
297	atomic64_t match_no_sk_file;
298};
299
300/* Track the set active_set for the given tag. */
301struct tag_counter_set {
302	struct tag_node tn;
303	int active_set;
304};
305
306/*----------------------------------------------*/
307/*
308 * The qtu uid data is used to track resources that are created directly or
309 * indirectly by processes (uid tracked).
310 * It is shared by the processes with the same uid.
311 * Some of the resource will be counted to prevent further rogue allocations,
312 * some will need freeing once the owner process (uid) exits.
313 */
314struct uid_tag_data {
315	struct rb_node node;
316	uid_t uid;
317
318	/*
319	 * For the uid, how many accounting tags have been set.
320	 */
321	int num_active_tags;
322	/* Track the number of proc_qtu_data that reference it */
323	int num_pqd;
324	struct rb_root tag_ref_tree;
325	/* No tag_node_tree_lock; use uid_tag_data_tree_lock */
326};
327
328struct tag_ref {
329	struct tag_node tn;
330
331	/*
332	 * This tracks the number of active sockets that have a tag on them
333	 * which matches this tag_ref.tn.tag.
334	 * A tag ref can live on after the sockets are untagged.
335	 * A tag ref can only be removed during a tag delete command.
336	 */
337	int num_sock_tags;
338};
339
340struct proc_qtu_data {
341	struct rb_node node;
342	pid_t pid;
343
344	struct uid_tag_data *parent_tag_data;
345
346	/* Tracks the sock_tags that need freeing upon this proc's death */
347	struct list_head sock_tag_list;
348	/* No spinlock_t sock_tag_list_lock; use the global one. */
349};
350
351/*----------------------------------------------*/
352#endif  /* ifndef __XT_QTAGUID_INTERNAL_H__ */
353