1/* 2 * Kernel iptables module to track stats for packets based on user tags. 3 * 4 * (C) 2011 Google, Inc 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10#ifndef __XT_QTAGUID_INTERNAL_H__ 11#define __XT_QTAGUID_INTERNAL_H__ 12 13#include <linux/types.h> 14#include <linux/rbtree.h> 15#include <linux/spinlock_types.h> 16#include <linux/workqueue.h> 17 18/* Iface handling */ 19#define IDEBUG_MASK (1<<0) 20/* Iptable Matching. Per packet. */ 21#define MDEBUG_MASK (1<<1) 22/* Red-black tree handling. Per packet. */ 23#define RDEBUG_MASK (1<<2) 24/* procfs ctrl/stats handling */ 25#define CDEBUG_MASK (1<<3) 26/* dev and resource tracking */ 27#define DDEBUG_MASK (1<<4) 28 29/* E.g (IDEBUG_MASK | CDEBUG_MASK | DDEBUG_MASK) */ 30#define DEFAULT_DEBUG_MASK 0 31 32/* 33 * (Un)Define these *DEBUG to compile out/in the pr_debug calls. 34 * All undef: text size ~ 0x3030; all def: ~ 0x4404. 35 */ 36#define IDEBUG 37#define MDEBUG 38#define RDEBUG 39#define CDEBUG 40#define DDEBUG 41 42#define MSK_DEBUG(mask, ...) do { \ 43 if (unlikely(qtaguid_debug_mask & (mask))) \ 44 pr_debug(__VA_ARGS__); \ 45 } while (0) 46#ifdef IDEBUG 47#define IF_DEBUG(...) MSK_DEBUG(IDEBUG_MASK, __VA_ARGS__) 48#else 49#define IF_DEBUG(...) no_printk(__VA_ARGS__) 50#endif 51#ifdef MDEBUG 52#define MT_DEBUG(...) MSK_DEBUG(MDEBUG_MASK, __VA_ARGS__) 53#else 54#define MT_DEBUG(...) no_printk(__VA_ARGS__) 55#endif 56#ifdef RDEBUG 57#define RB_DEBUG(...) MSK_DEBUG(RDEBUG_MASK, __VA_ARGS__) 58#else 59#define RB_DEBUG(...) no_printk(__VA_ARGS__) 60#endif 61#ifdef CDEBUG 62#define CT_DEBUG(...) MSK_DEBUG(CDEBUG_MASK, __VA_ARGS__) 63#else 64#define CT_DEBUG(...) no_printk(__VA_ARGS__) 65#endif 66#ifdef DDEBUG 67#define DR_DEBUG(...) MSK_DEBUG(DDEBUG_MASK, __VA_ARGS__) 68#else 69#define DR_DEBUG(...) no_printk(__VA_ARGS__) 70#endif 71 72extern uint qtaguid_debug_mask; 73 74/*---------------------------------------------------------------------------*/ 75/* 76 * Tags: 77 * 78 * They represent what the data usage counters will be tracked against. 79 * By default a tag is just based on the UID. 80 * The UID is used as the base for policing, and can not be ignored. 81 * So a tag will always at least represent a UID (uid_tag). 82 * 83 * A tag can be augmented with an "accounting tag" which is associated 84 * with a UID. 85 * User space can set the acct_tag portion of the tag which is then used 86 * with sockets: all data belonging to that socket will be counted against the 87 * tag. The policing is then based on the tag's uid_tag portion, 88 * and stats are collected for the acct_tag portion separately. 89 * 90 * There could be 91 * a: {acct_tag=1, uid_tag=10003} 92 * b: {acct_tag=2, uid_tag=10003} 93 * c: {acct_tag=3, uid_tag=10003} 94 * d: {acct_tag=0, uid_tag=10003} 95 * a, b, and c represent tags associated with specific sockets. 96 * d is for the totals for that uid, including all untagged traffic. 97 * Typically d is used with policing/quota rules. 98 * 99 * We want tag_t big enough to distinguish uid_t and acct_tag. 100 * It might become a struct if needed. 101 * Nothing should be using it as an int. 102 */ 103typedef uint64_t tag_t; /* Only used via accessors */ 104 105#define TAG_UID_MASK 0xFFFFFFFFULL 106#define TAG_ACCT_MASK (~0xFFFFFFFFULL) 107 108static inline int tag_compare(tag_t t1, tag_t t2) 109{ 110 return t1 < t2 ? -1 : t1 == t2 ? 0 : 1; 111} 112 113static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid) 114{ 115 return acct_tag | uid; 116} 117static inline tag_t make_tag_from_uid(uid_t uid) 118{ 119 return uid; 120} 121static inline uid_t get_uid_from_tag(tag_t tag) 122{ 123 return tag & TAG_UID_MASK; 124} 125static inline tag_t get_utag_from_tag(tag_t tag) 126{ 127 return tag & TAG_UID_MASK; 128} 129static inline tag_t get_atag_from_tag(tag_t tag) 130{ 131 return tag & TAG_ACCT_MASK; 132} 133 134static inline bool valid_atag(tag_t tag) 135{ 136 return !(tag & TAG_UID_MASK); 137} 138static inline tag_t make_atag_from_value(uint32_t value) 139{ 140 return (uint64_t)value << 32; 141} 142/*---------------------------------------------------------------------------*/ 143 144/* 145 * Maximum number of socket tags that a UID is allowed to have active. 146 * Multiple processes belonging to the same UID contribute towards this limit. 147 * Special UIDs that can impersonate a UID also contribute (e.g. download 148 * manager, ...) 149 */ 150#define DEFAULT_MAX_SOCK_TAGS 1024 151 152/* 153 * For now we only track 2 sets of counters. 154 * The default set is 0. 155 * Userspace can activate another set for a given uid being tracked. 156 */ 157#define IFS_MAX_COUNTER_SETS 2 158 159enum ifs_tx_rx { 160 IFS_TX, 161 IFS_RX, 162 IFS_MAX_DIRECTIONS 163}; 164 165/* For now, TCP, UDP, the rest */ 166enum ifs_proto { 167 IFS_TCP, 168 IFS_UDP, 169 IFS_PROTO_OTHER, 170 IFS_MAX_PROTOS 171}; 172 173struct byte_packet_counters { 174 uint64_t bytes; 175 uint64_t packets; 176}; 177 178struct data_counters { 179 struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS]; 180}; 181 182static inline uint64_t dc_sum_bytes(struct data_counters *counters, 183 int set, 184 enum ifs_tx_rx direction) 185{ 186 return counters->bpc[set][direction][IFS_TCP].bytes 187 + counters->bpc[set][direction][IFS_UDP].bytes 188 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes; 189} 190 191static inline uint64_t dc_sum_packets(struct data_counters *counters, 192 int set, 193 enum ifs_tx_rx direction) 194{ 195 return counters->bpc[set][direction][IFS_TCP].packets 196 + counters->bpc[set][direction][IFS_UDP].packets 197 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets; 198} 199 200 201/* Generic X based nodes used as a base for rb_tree ops */ 202struct tag_node { 203 struct rb_node node; 204 tag_t tag; 205}; 206 207struct tag_stat { 208 struct tag_node tn; 209 struct data_counters counters; 210 /* 211 * If this tag is acct_tag based, we need to count against the 212 * matching parent uid_tag. 213 */ 214 struct data_counters *parent_counters; 215}; 216 217struct iface_stat { 218 struct list_head list; /* in iface_stat_list */ 219 char *ifname; 220 bool active; 221 /* net_dev is only valid for active iface_stat */ 222 struct net_device *net_dev; 223 224 struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS]; 225 struct data_counters totals_via_skb; 226 /* 227 * We keep the last_known, because some devices reset their counters 228 * just before NETDEV_UP, while some will reset just before 229 * NETDEV_REGISTER (which is more normal). 230 * So now, if the device didn't do a NETDEV_UNREGISTER and we see 231 * its current dev stats smaller that what was previously known, we 232 * assume an UNREGISTER and just use the last_known. 233 */ 234 struct byte_packet_counters last_known[IFS_MAX_DIRECTIONS]; 235 /* last_known is usable when last_known_valid is true */ 236 bool last_known_valid; 237 238 struct proc_dir_entry *proc_ptr; 239 240 struct rb_root tag_stat_tree; 241 spinlock_t tag_stat_list_lock; 242}; 243 244/* This is needed to create proc_dir_entries from atomic context. */ 245struct iface_stat_work { 246 struct work_struct iface_work; 247 struct iface_stat *iface_entry; 248}; 249 250/* 251 * Track tag that this socket is transferring data for, and not necessarily 252 * the uid that owns the socket. 253 * This is the tag against which tag_stat.counters will be billed. 254 * These structs need to be looked up by sock and pid. 255 */ 256struct sock_tag { 257 struct rb_node sock_node; 258 struct sock *sk; /* Only used as a number, never dereferenced */ 259 /* The socket is needed for sockfd_put() */ 260 struct socket *socket; 261 /* Used to associate with a given pid */ 262 struct list_head list; /* in proc_qtu_data.sock_tag_list */ 263 pid_t pid; 264 265 tag_t tag; 266}; 267 268struct qtaguid_event_counts { 269 /* Various successful events */ 270 atomic64_t sockets_tagged; 271 atomic64_t sockets_untagged; 272 atomic64_t counter_set_changes; 273 atomic64_t delete_cmds; 274 atomic64_t iface_events; /* Number of NETDEV_* events handled */ 275 276 atomic64_t match_calls; /* Number of times iptables called mt */ 277 /* Number of times iptables called mt from pre or post routing hooks */ 278 atomic64_t match_calls_prepost; 279 /* 280 * match_found_sk_*: numbers related to the netfilter matching 281 * function finding a sock for the sk_buff. 282 * Total skbs processed is sum(match_found*). 283 */ 284 atomic64_t match_found_sk; /* An sk was already in the sk_buff. */ 285 /* The connection tracker had or didn't have the sk. */ 286 atomic64_t match_found_sk_in_ct; 287 atomic64_t match_found_no_sk_in_ct; 288 /* 289 * No sk could be found. No apparent owner. Could happen with 290 * unsolicited traffic. 291 */ 292 atomic64_t match_no_sk; 293 /* 294 * The file ptr in the sk_socket wasn't there. 295 * This might happen for traffic while the socket is being closed. 296 */ 297 atomic64_t match_no_sk_file; 298}; 299 300/* Track the set active_set for the given tag. */ 301struct tag_counter_set { 302 struct tag_node tn; 303 int active_set; 304}; 305 306/*----------------------------------------------*/ 307/* 308 * The qtu uid data is used to track resources that are created directly or 309 * indirectly by processes (uid tracked). 310 * It is shared by the processes with the same uid. 311 * Some of the resource will be counted to prevent further rogue allocations, 312 * some will need freeing once the owner process (uid) exits. 313 */ 314struct uid_tag_data { 315 struct rb_node node; 316 uid_t uid; 317 318 /* 319 * For the uid, how many accounting tags have been set. 320 */ 321 int num_active_tags; 322 /* Track the number of proc_qtu_data that reference it */ 323 int num_pqd; 324 struct rb_root tag_ref_tree; 325 /* No tag_node_tree_lock; use uid_tag_data_tree_lock */ 326}; 327 328struct tag_ref { 329 struct tag_node tn; 330 331 /* 332 * This tracks the number of active sockets that have a tag on them 333 * which matches this tag_ref.tn.tag. 334 * A tag ref can live on after the sockets are untagged. 335 * A tag ref can only be removed during a tag delete command. 336 */ 337 int num_sock_tags; 338}; 339 340struct proc_qtu_data { 341 struct rb_node node; 342 pid_t pid; 343 344 struct uid_tag_data *parent_tag_data; 345 346 /* Tracks the sock_tags that need freeing upon this proc's death */ 347 struct list_head sock_tag_list; 348 /* No spinlock_t sock_tag_list_lock; use the global one. */ 349}; 350 351/*----------------------------------------------*/ 352#endif /* ifndef __XT_QTAGUID_INTERNAL_H__ */ 353