tc_bpf.c revision 11c39b5e98a163889fe5e1840e1b2a105bc33680
1/*
2 * tc_bpf.c	BPF common code
3 *
4 *		This program is free software; you can distribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Daniel Borkmann <dborkman@redhat.com>
10 *		Jiri Pirko <jiri@resnulli.us>
11 *		Alexei Starovoitov <ast@plumgrid.com>
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
19#include <errno.h>
20#include <fcntl.h>
21#include <stdarg.h>
22#include <sys/types.h>
23#include <sys/stat.h>
24#include <linux/filter.h>
25#include <linux/netlink.h>
26#include <linux/rtnetlink.h>
27
28#ifdef HAVE_ELF
29#include <libelf.h>
30#include <gelf.h>
31#endif
32
33#include "utils.h"
34#include "tc_util.h"
35#include "tc_bpf.h"
36
37int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
38		     char **bpf_string, bool *need_release,
39		     const char separator)
40{
41	char sp;
42
43	if (from_file) {
44		size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
45		char *tmp_string;
46		FILE *fp;
47
48		tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
49		tmp_string = malloc(tmp_len);
50		if (tmp_string == NULL)
51			return -ENOMEM;
52
53		memset(tmp_string, 0, tmp_len);
54
55		fp = fopen(arg, "r");
56		if (fp == NULL) {
57			perror("Cannot fopen");
58			free(tmp_string);
59			return -ENOENT;
60		}
61
62		if (!fgets(tmp_string, tmp_len, fp)) {
63			free(tmp_string);
64			fclose(fp);
65			return -EIO;
66		}
67
68		fclose(fp);
69
70		*need_release = true;
71		*bpf_string = tmp_string;
72	} else {
73		*need_release = false;
74		*bpf_string = arg;
75	}
76
77	if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
78	    sp != separator) {
79		if (*need_release)
80			free(*bpf_string);
81		return -EINVAL;
82	}
83
84	return 0;
85}
86
87int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
88		  bool from_file)
89{
90	char *bpf_string, *token, separator = ',';
91	int ret = 0, i = 0;
92	bool need_release;
93	__u16 bpf_len = 0;
94
95	if (argc < 1)
96		return -EINVAL;
97	if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
98			     &need_release, separator))
99		return -EINVAL;
100	if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
101		ret = -EINVAL;
102		goto out;
103	}
104
105	token = bpf_string;
106	while ((token = strchr(token, separator)) && (++token)[0]) {
107		if (i >= bpf_len) {
108			fprintf(stderr, "Real program length exceeds encoded "
109				"length parameter!\n");
110			ret = -EINVAL;
111			goto out;
112		}
113
114		if (sscanf(token, "%hu %hhu %hhu %u,",
115			   &bpf_ops[i].code, &bpf_ops[i].jt,
116			   &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
117			fprintf(stderr, "Error at instruction %d!\n", i);
118			ret = -EINVAL;
119			goto out;
120		}
121
122		i++;
123	}
124
125	if (i != bpf_len) {
126		fprintf(stderr, "Parsed program length is less than encoded"
127			"length parameter!\n");
128		ret = -EINVAL;
129		goto out;
130	}
131	ret = bpf_len;
132
133out:
134	if (need_release)
135		free(bpf_string);
136
137	return ret;
138}
139
140void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
141{
142	struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
143	int i;
144
145	if (len == 0)
146		return;
147
148	fprintf(f, "bytecode \'%u,", len);
149
150	for (i = 0; i < len - 1; i++)
151		fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
152			ops[i].jf, ops[i].k);
153
154	fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
155		ops[i].jf, ops[i].k);
156}
157
158#ifdef HAVE_ELF
159struct bpf_elf_sec_data {
160	GElf_Shdr	sec_hdr;
161	char		*sec_name;
162	Elf_Data	*sec_data;
163};
164
165static char bpf_log_buf[8192];
166
167static const char *prog_type_section(enum bpf_prog_type type)
168{
169	switch (type) {
170	case BPF_PROG_TYPE_SCHED_CLS:
171		return ELF_SECTION_CLASSIFIER;
172	/* case BPF_PROG_TYPE_SCHED_ACT:   */
173	/*	return ELF_SECTION_ACTION; */
174	default:
175		return NULL;
176	}
177}
178
179static void bpf_dump_error(const char *format, ...)  __check_format_string(1, 2);
180static void bpf_dump_error(const char *format, ...)
181{
182	va_list vl;
183
184	va_start(vl, format);
185	vfprintf(stderr, format, vl);
186	va_end(vl);
187
188	fprintf(stderr, "%s", bpf_log_buf);
189	memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
190}
191
192static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
193			  unsigned int size_value, unsigned int max_elem)
194{
195	union bpf_attr attr = {
196		.map_type	= type,
197		.key_size	= size_key,
198		.value_size	= size_value,
199		.max_entries	= max_elem,
200	};
201
202	return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
203}
204
205static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
206			 unsigned int len, const char *license)
207{
208	union bpf_attr attr = {
209		.prog_type	= type,
210		.insns		= bpf_ptr_to_u64(insns),
211		.insn_cnt	= len / sizeof(struct bpf_insn),
212		.license	= bpf_ptr_to_u64(license),
213		.log_buf	= bpf_ptr_to_u64(bpf_log_buf),
214		.log_size	= sizeof(bpf_log_buf),
215		.log_level	= 1,
216	};
217
218	return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
219}
220
221static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns,
222			   unsigned int size, const char *license)
223{
224	int prog_fd = bpf_prog_load(type, insns, size, license);
225
226	if (prog_fd < 0)
227		bpf_dump_error("BPF program rejected: %s\n", strerror(errno));
228
229	return prog_fd;
230}
231
232static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
233			  unsigned int size_value, unsigned int max_elem)
234{
235	int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
236
237	if (map_fd < 0)
238		bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
239
240	return map_fd;
241}
242
243static void bpf_maps_init(int *map_fds, unsigned int max_fds)
244{
245	int i;
246
247	for (i = 0; i < max_fds; i++)
248		map_fds[i] = -1;
249}
250
251static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds)
252{
253	int i;
254
255	for (i = 0; i < max_fds; i++) {
256		if (map_fds[i] >= 0)
257			close(map_fds[i]);
258	}
259}
260
261static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps,
262			   int *map_fds, unsigned int max_fds)
263{
264	int i, ret;
265
266	for (i = 0; i < num_maps && num_maps <= max_fds; i++) {
267		struct bpf_elf_map *map = &maps[i];
268
269		ret = bpf_map_attach(map->type, map->size_key,
270				     map->size_value, map->max_elem);
271		if (ret < 0)
272			goto err_unwind;
273
274		map_fds[i] = ret;
275	}
276
277	return 0;
278
279err_unwind:
280	bpf_maps_destroy(map_fds, i);
281	return ret;
282}
283
284static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
285				 struct bpf_elf_sec_data *sec_data)
286{
287	GElf_Shdr sec_hdr;
288	Elf_Scn *sec_fd;
289	Elf_Data *sec_edata;
290	char *sec_name;
291
292	memset(sec_data, 0, sizeof(*sec_data));
293
294	sec_fd = elf_getscn(elf_fd, sec_index);
295	if (!sec_fd)
296		return -EINVAL;
297
298	if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
299		return -EIO;
300
301	sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
302			      sec_hdr.sh_name);
303	if (!sec_name || !sec_hdr.sh_size)
304		return -ENOENT;
305
306	sec_edata = elf_getdata(sec_fd, NULL);
307	if (!sec_edata || elf_getdata(sec_fd, sec_edata))
308		return -EIO;
309
310	memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
311	sec_data->sec_name = sec_name;
312	sec_data->sec_data = sec_edata;
313
314	return 0;
315}
316
317static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
318			       struct bpf_elf_sec_data *data_insn,
319			       Elf_Data *sym_tab, int *map_fds, int max_fds)
320{
321	Elf_Data *idata = data_insn->sec_data;
322	GElf_Shdr *rhdr = &data_relo->sec_hdr;
323	int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
324	struct bpf_insn *insns = idata->d_buf;
325	unsigned int num_insns = idata->d_size / sizeof(*insns);
326
327	for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
328		unsigned int ioff, fnum;
329		GElf_Rel relo;
330		GElf_Sym sym;
331
332		if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
333			return -EIO;
334
335		ioff = relo.r_offset / sizeof(struct bpf_insn);
336		if (ioff >= num_insns)
337			return -EINVAL;
338		if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
339			return -EINVAL;
340
341		if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
342			return -EIO;
343
344		fnum = sym.st_value / sizeof(struct bpf_elf_map);
345		if (fnum >= max_fds)
346			return -EINVAL;
347
348		insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
349		insns[ioff].imm = map_fds[fnum];
350	}
351
352	return 0;
353}
354
355static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
356			       int *map_fds, unsigned int max_fds,
357			       char *license, unsigned int lic_len,
358			       Elf_Data **sym_tab)
359{
360	int sec_index, ret = -1;
361
362	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
363		struct bpf_elf_sec_data data_anc;
364
365		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
366					    &data_anc);
367		if (ret < 0)
368			continue;
369
370		/* Extract and load eBPF map fds. */
371		if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) {
372			struct bpf_elf_map *maps = data_anc.sec_data->d_buf;
373			unsigned int maps_num = data_anc.sec_data->d_size /
374						sizeof(*maps);
375
376			sec_seen[sec_index] = true;
377			ret = bpf_maps_attach(maps, maps_num, map_fds,
378					      max_fds);
379			if (ret < 0)
380				return ret;
381		}
382		/* Extract eBPF license. */
383		else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
384			if (data_anc.sec_data->d_size > lic_len)
385				return -ENOMEM;
386
387			sec_seen[sec_index] = true;
388			memcpy(license, data_anc.sec_data->d_buf,
389			       data_anc.sec_data->d_size);
390		}
391		/* Extract symbol table for relocations (map fd fixups). */
392		else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
393			sec_seen[sec_index] = true;
394			*sym_tab = data_anc.sec_data;
395		}
396	}
397
398	return ret;
399}
400
401static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
402			       enum bpf_prog_type type, char *license,
403			       Elf_Data *sym_tab, int *map_fds, unsigned int max_fds)
404{
405	int sec_index, prog_fd = -1;
406
407	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
408		struct bpf_elf_sec_data data_relo, data_insn;
409		int ins_index, ret;
410
411		/* Attach eBPF programs with relocation data (maps). */
412		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
413					    &data_relo);
414		if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
415			continue;
416
417		ins_index = data_relo.sec_hdr.sh_info;
418
419		ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
420					    &data_insn);
421		if (ret < 0)
422			continue;
423		if (strcmp(data_insn.sec_name, prog_type_section(type)))
424			continue;
425
426		sec_seen[sec_index] = true;
427		sec_seen[ins_index] = true;
428
429		ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab,
430					  map_fds, max_fds);
431		if (ret < 0)
432			continue;
433
434		prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
435					  data_insn.sec_data->d_size, license);
436		if (prog_fd < 0)
437			continue;
438
439		break;
440	}
441
442	return prog_fd;
443}
444
445static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
446			  enum bpf_prog_type type, char *license)
447{
448	int sec_index, prog_fd = -1;
449
450	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
451		struct bpf_elf_sec_data data_insn;
452		int ret;
453
454		/* Attach eBPF programs without relocation data. */
455		if (sec_seen[sec_index])
456			continue;
457
458		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
459					    &data_insn);
460		if (ret < 0)
461			continue;
462		if (strcmp(data_insn.sec_name, prog_type_section(type)))
463			continue;
464
465		prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
466					  data_insn.sec_data->d_size, license);
467		if (prog_fd < 0)
468			continue;
469
470		break;
471	}
472
473	return prog_fd;
474}
475
476int bpf_open_object(const char *path, enum bpf_prog_type type)
477{
478	int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds);
479	char license[ELF_MAX_LICENSE_LEN];
480	int file_fd, prog_fd = -1, ret;
481	Elf_Data *sym_tab = NULL;
482	GElf_Ehdr elf_hdr;
483	bool *sec_seen;
484	Elf *elf_fd;
485
486	if (elf_version(EV_CURRENT) == EV_NONE)
487		return -EINVAL;
488
489	file_fd = open(path, O_RDONLY, 0);
490	if (file_fd < 0)
491		return -errno;
492
493	elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
494	if (!elf_fd) {
495		ret = -EINVAL;
496		goto out;
497	}
498
499	if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
500		ret = -EIO;
501		goto out_elf;
502	}
503
504	sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen));
505	if (!sec_seen) {
506		ret = -ENOMEM;
507		goto out_elf;
508	}
509
510	memset(license, 0, sizeof(license));
511	bpf_maps_init(map_fds, max_fds);
512
513	ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds,
514				  license, sizeof(license), &sym_tab);
515	if (ret < 0)
516		goto out_maps;
517	if (sym_tab)
518		prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type,
519					      license, sym_tab, map_fds, max_fds);
520	if (prog_fd < 0)
521		prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type,
522					 license);
523	if (prog_fd < 0)
524		goto out_maps;
525out_sec:
526	free(sec_seen);
527out_elf:
528	elf_end(elf_fd);
529out:
530	close(file_fd);
531	return prog_fd;
532
533out_maps:
534	bpf_maps_destroy(map_fds, max_fds);
535	goto out_sec;
536}
537
538#endif /* HAVE_ELF */
539