tc_bpf.c revision 473d7840c39addf966cf0cc699c2a2b3cbfe4647
1/*
2 * tc_bpf.c	BPF common code
3 *
4 *		This program is free software; you can distribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Daniel Borkmann <dborkman@redhat.com>
10 *		Jiri Pirko <jiri@resnulli.us>
11 *		Alexei Starovoitov <ast@plumgrid.com>
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <stdarg.h>
23#include <sys/types.h>
24#include <sys/stat.h>
25#include <sys/un.h>
26#include <linux/filter.h>
27#include <linux/netlink.h>
28#include <linux/rtnetlink.h>
29
30#ifdef HAVE_ELF
31#include <libelf.h>
32#include <gelf.h>
33#endif
34
35#include "utils.h"
36
37#include "bpf_elf.h"
38#include "bpf_scm.h"
39
40#include "tc_util.h"
41#include "tc_bpf.h"
42
43int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
44		     char **bpf_string, bool *need_release,
45		     const char separator)
46{
47	char sp;
48
49	if (from_file) {
50		size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
51		char *tmp_string;
52		FILE *fp;
53
54		tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
55		tmp_string = malloc(tmp_len);
56		if (tmp_string == NULL)
57			return -ENOMEM;
58
59		memset(tmp_string, 0, tmp_len);
60
61		fp = fopen(arg, "r");
62		if (fp == NULL) {
63			perror("Cannot fopen");
64			free(tmp_string);
65			return -ENOENT;
66		}
67
68		if (!fgets(tmp_string, tmp_len, fp)) {
69			free(tmp_string);
70			fclose(fp);
71			return -EIO;
72		}
73
74		fclose(fp);
75
76		*need_release = true;
77		*bpf_string = tmp_string;
78	} else {
79		*need_release = false;
80		*bpf_string = arg;
81	}
82
83	if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
84	    sp != separator) {
85		if (*need_release)
86			free(*bpf_string);
87		return -EINVAL;
88	}
89
90	return 0;
91}
92
93int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
94		  bool from_file)
95{
96	char *bpf_string, *token, separator = ',';
97	int ret = 0, i = 0;
98	bool need_release;
99	__u16 bpf_len = 0;
100
101	if (argc < 1)
102		return -EINVAL;
103	if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
104			     &need_release, separator))
105		return -EINVAL;
106	if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
107		ret = -EINVAL;
108		goto out;
109	}
110
111	token = bpf_string;
112	while ((token = strchr(token, separator)) && (++token)[0]) {
113		if (i >= bpf_len) {
114			fprintf(stderr, "Real program length exceeds encoded "
115				"length parameter!\n");
116			ret = -EINVAL;
117			goto out;
118		}
119
120		if (sscanf(token, "%hu %hhu %hhu %u,",
121			   &bpf_ops[i].code, &bpf_ops[i].jt,
122			   &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
123			fprintf(stderr, "Error at instruction %d!\n", i);
124			ret = -EINVAL;
125			goto out;
126		}
127
128		i++;
129	}
130
131	if (i != bpf_len) {
132		fprintf(stderr, "Parsed program length is less than encoded"
133			"length parameter!\n");
134		ret = -EINVAL;
135		goto out;
136	}
137	ret = bpf_len;
138
139out:
140	if (need_release)
141		free(bpf_string);
142
143	return ret;
144}
145
146void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
147{
148	struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
149	int i;
150
151	if (len == 0)
152		return;
153
154	fprintf(f, "bytecode \'%u,", len);
155
156	for (i = 0; i < len - 1; i++)
157		fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
158			ops[i].jf, ops[i].k);
159
160	fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
161		ops[i].jf, ops[i].k);
162}
163
164const char *bpf_default_section(const enum bpf_prog_type type)
165{
166	switch (type) {
167	case BPF_PROG_TYPE_SCHED_CLS:
168		return ELF_SECTION_CLASSIFIER;
169	case BPF_PROG_TYPE_SCHED_ACT:
170		return ELF_SECTION_ACTION;
171	default:
172		return NULL;
173	}
174}
175
176#ifdef HAVE_ELF
177struct bpf_elf_sec_data {
178	GElf_Shdr sec_hdr;
179	char *sec_name;
180	Elf_Data *sec_data;
181};
182
183struct bpf_map_data {
184	int *fds;
185	const char *obj;
186	struct bpf_elf_st *st;
187	struct bpf_elf_map *ent;
188};
189
190/* If we provide a small buffer with log level enabled, the kernel
191 * could fail program load as no buffer space is available for the
192 * log and thus verifier fails. In case something doesn't pass the
193 * verifier we still want to hand something descriptive to the user.
194 */
195static char bpf_log_buf[65536];
196static bool bpf_verbose;
197
198static struct bpf_elf_st bpf_st;
199
200static int map_fds[ELF_MAX_MAPS];
201static struct bpf_elf_map map_ent[ELF_MAX_MAPS];
202
203static void bpf_dump_error(const char *format, ...)  __check_format_string(1, 2);
204static void bpf_dump_error(const char *format, ...)
205{
206	va_list vl;
207
208	va_start(vl, format);
209	vfprintf(stderr, format, vl);
210	va_end(vl);
211
212	if (bpf_log_buf[0]) {
213		fprintf(stderr, "%s\n", bpf_log_buf);
214		memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
215	}
216}
217
218static void bpf_save_finfo(int file_fd)
219{
220	struct stat st;
221	int ret;
222
223	memset(&bpf_st, 0, sizeof(bpf_st));
224
225	ret = fstat(file_fd, &st);
226	if (ret < 0) {
227		fprintf(stderr, "Stat of elf file failed: %s\n",
228			strerror(errno));
229		return;
230	}
231
232	bpf_st.st_dev = st.st_dev;
233	bpf_st.st_ino = st.st_ino;
234}
235
236static void bpf_clear_finfo(void)
237{
238	memset(&bpf_st, 0, sizeof(bpf_st));
239}
240
241static bool bpf_may_skip_map_creation(int file_fd)
242{
243	struct stat st;
244	int ret;
245
246	ret = fstat(file_fd, &st);
247	if (ret < 0) {
248		fprintf(stderr, "Stat of elf file failed: %s\n",
249			strerror(errno));
250		return false;
251	}
252
253	return (bpf_st.st_dev == st.st_dev) &&
254	       (bpf_st.st_ino == st.st_ino);
255}
256
257static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
258			  unsigned int size_value, unsigned int max_elem)
259{
260	union bpf_attr attr = {
261		.map_type	= type,
262		.key_size	= size_key,
263		.value_size	= size_value,
264		.max_entries	= max_elem,
265	};
266
267	return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
268}
269
270static int bpf_update_map(int fd, const void *key, const void *value,
271			  uint64_t flags)
272{
273	union bpf_attr attr = {
274		.map_fd		= fd,
275		.key		= bpf_ptr_to_u64(key),
276		.value		= bpf_ptr_to_u64(value),
277		.flags		= flags,
278	};
279
280	return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
281}
282
283static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
284			 unsigned int len, const char *license)
285{
286	union bpf_attr attr = {
287		.prog_type	= type,
288		.insns		= bpf_ptr_to_u64(insns),
289		.insn_cnt	= len / sizeof(struct bpf_insn),
290		.license	= bpf_ptr_to_u64(license),
291		.log_buf	= bpf_ptr_to_u64(bpf_log_buf),
292		.log_size	= sizeof(bpf_log_buf),
293		.log_level	= 1,
294	};
295
296	return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
297}
298
299static int bpf_prog_attach(enum bpf_prog_type type, const char *sec,
300			   const struct bpf_insn *insns, unsigned int size,
301			   const char *license)
302{
303	int prog_fd = bpf_prog_load(type, insns, size, license);
304
305	if (prog_fd < 0 || bpf_verbose) {
306		bpf_dump_error("%s (section \'%s\'): %s\n", prog_fd < 0 ?
307			       "BPF program rejected" :
308			       "BPF program verification",
309			       sec, strerror(errno));
310	}
311
312	return prog_fd;
313}
314
315static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
316			  unsigned int size_value, unsigned int max_elem)
317{
318	int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
319
320	if (map_fd < 0)
321		bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
322
323	return map_fd;
324}
325
326static void bpf_maps_init(void)
327{
328	int i;
329
330	memset(map_ent, 0, sizeof(map_ent));
331	for (i = 0; i < ARRAY_SIZE(map_fds); i++)
332		map_fds[i] = -1;
333}
334
335static int bpf_maps_count(void)
336{
337	int i, count = 0;
338
339	for (i = 0; i < ARRAY_SIZE(map_fds); i++) {
340		if (map_fds[i] < 0)
341			break;
342		count++;
343	}
344
345	return count;
346}
347
348static void bpf_maps_destroy(void)
349{
350	int i;
351
352	memset(map_ent, 0, sizeof(map_ent));
353	for (i = 0; i < ARRAY_SIZE(map_fds); i++) {
354		if (map_fds[i] >= 0)
355			close(map_fds[i]);
356	}
357}
358
359static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps)
360{
361	int i, ret;
362
363	for (i = 0; (i < num_maps) && (num_maps <= ARRAY_SIZE(map_fds)); i++) {
364		struct bpf_elf_map *map = &maps[i];
365
366		ret = bpf_map_attach(map->type, map->size_key,
367				     map->size_value, map->max_elem);
368		if (ret < 0)
369			goto err_unwind;
370
371		map_fds[i] = ret;
372	}
373
374	return 0;
375
376err_unwind:
377	bpf_maps_destroy();
378	return ret;
379}
380
381static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
382				 struct bpf_elf_sec_data *sec_data)
383{
384	GElf_Shdr sec_hdr;
385	Elf_Scn *sec_fd;
386	Elf_Data *sec_edata;
387	char *sec_name;
388
389	memset(sec_data, 0, sizeof(*sec_data));
390
391	sec_fd = elf_getscn(elf_fd, sec_index);
392	if (!sec_fd)
393		return -EINVAL;
394
395	if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
396		return -EIO;
397
398	sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
399			      sec_hdr.sh_name);
400	if (!sec_name || !sec_hdr.sh_size)
401		return -ENOENT;
402
403	sec_edata = elf_getdata(sec_fd, NULL);
404	if (!sec_edata || elf_getdata(sec_fd, sec_edata))
405		return -EIO;
406
407	memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
408	sec_data->sec_name = sec_name;
409	sec_data->sec_data = sec_edata;
410
411	return 0;
412}
413
414static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
415			       struct bpf_elf_sec_data *data_insn,
416			       Elf_Data *sym_tab)
417{
418	Elf_Data *idata = data_insn->sec_data;
419	GElf_Shdr *rhdr = &data_relo->sec_hdr;
420	int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
421	struct bpf_insn *insns = idata->d_buf;
422	unsigned int num_insns = idata->d_size / sizeof(*insns);
423
424	for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
425		unsigned int ioff, fnum;
426		GElf_Rel relo;
427		GElf_Sym sym;
428
429		if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
430			return -EIO;
431
432		ioff = relo.r_offset / sizeof(struct bpf_insn);
433		if (ioff >= num_insns)
434			return -EINVAL;
435		if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
436			return -EINVAL;
437
438		if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
439			return -EIO;
440
441		fnum = sym.st_value / sizeof(struct bpf_elf_map);
442		if (fnum >= ARRAY_SIZE(map_fds))
443			return -EINVAL;
444		if (map_fds[fnum] < 0)
445			return -EINVAL;
446
447		insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
448		insns[ioff].imm = map_fds[fnum];
449	}
450
451	return 0;
452}
453
454static int bpf_fetch_ancillary(int file_fd, Elf *elf_fd, GElf_Ehdr *elf_hdr,
455			       bool *sec_done, char *license, unsigned int lic_len,
456			       Elf_Data **sym_tab)
457{
458	int sec_index, ret = -1;
459
460	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
461		struct bpf_elf_sec_data data_anc;
462
463		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
464					    &data_anc);
465		if (ret < 0)
466			continue;
467
468		/* Extract and load eBPF map fds. */
469		if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS) &&
470		    !bpf_may_skip_map_creation(file_fd)) {
471			struct bpf_elf_map *maps;
472			unsigned int maps_num;
473
474			if (data_anc.sec_data->d_size % sizeof(*maps) != 0)
475				return -EINVAL;
476
477			maps = data_anc.sec_data->d_buf;
478			maps_num = data_anc.sec_data->d_size / sizeof(*maps);
479			memcpy(map_ent, maps, data_anc.sec_data->d_size);
480
481			ret = bpf_maps_attach(maps, maps_num);
482			if (ret < 0)
483				return ret;
484
485			sec_done[sec_index] = true;
486		}
487		/* Extract eBPF license. */
488		else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
489			if (data_anc.sec_data->d_size > lic_len)
490				return -ENOMEM;
491
492			sec_done[sec_index] = true;
493			memcpy(license, data_anc.sec_data->d_buf,
494			       data_anc.sec_data->d_size);
495		}
496		/* Extract symbol table for relocations (map fd fixups). */
497		else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
498			sec_done[sec_index] = true;
499			*sym_tab = data_anc.sec_data;
500		}
501	}
502
503	return ret;
504}
505
506static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
507			       enum bpf_prog_type type, const char *sec,
508			       const char *license, Elf_Data *sym_tab)
509{
510	int sec_index, prog_fd = -1;
511
512	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
513		struct bpf_elf_sec_data data_relo, data_insn;
514		int ins_index, ret;
515
516		/* Attach eBPF programs with relocation data (maps). */
517		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
518					    &data_relo);
519		if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
520			continue;
521
522		ins_index = data_relo.sec_hdr.sh_info;
523
524		ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
525					    &data_insn);
526		if (ret < 0)
527			continue;
528		if (strcmp(data_insn.sec_name, sec))
529			continue;
530
531		ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab);
532		if (ret < 0)
533			continue;
534
535		prog_fd = bpf_prog_attach(type, sec, data_insn.sec_data->d_buf,
536					  data_insn.sec_data->d_size, license);
537		if (prog_fd < 0)
538			continue;
539
540		sec_done[sec_index] = true;
541		sec_done[ins_index] = true;
542		break;
543	}
544
545	return prog_fd;
546}
547
548static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
549			  enum bpf_prog_type type, const char *sec,
550			  const char *license)
551{
552	int sec_index, prog_fd = -1;
553
554	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
555		struct bpf_elf_sec_data data_insn;
556		int ret;
557
558		/* Attach eBPF programs without relocation data. */
559		if (sec_done[sec_index])
560			continue;
561
562		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
563					    &data_insn);
564		if (ret < 0)
565			continue;
566		if (strcmp(data_insn.sec_name, sec))
567			continue;
568
569		prog_fd = bpf_prog_attach(type, sec, data_insn.sec_data->d_buf,
570					  data_insn.sec_data->d_size, license);
571		if (prog_fd < 0)
572			continue;
573
574		sec_done[sec_index] = true;
575		break;
576	}
577
578	return prog_fd;
579}
580
581static int bpf_fetch_prog_sec(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
582			      enum bpf_prog_type type, const char *sec,
583			      const char *license, Elf_Data *sym_tab)
584{
585	int ret = -1;
586
587	if (sym_tab)
588		ret = bpf_fetch_prog_relo(elf_fd, elf_hdr, sec_done, type,
589					  sec, license, sym_tab);
590	if (ret < 0)
591		ret = bpf_fetch_prog(elf_fd, elf_hdr, sec_done, type, sec,
592				     license);
593	return ret;
594}
595
596static int bpf_fill_prog_arrays(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_done,
597				enum bpf_prog_type type, const char *license,
598				Elf_Data *sym_tab)
599{
600	int sec_index;
601
602	for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
603		struct bpf_elf_sec_data data_insn;
604		int ret, map_id, key_id, prog_fd;
605
606		if (sec_done[sec_index])
607			continue;
608
609		ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
610					    &data_insn);
611		if (ret < 0)
612			continue;
613
614		ret = sscanf(data_insn.sec_name, "%i/%i", &map_id, &key_id);
615		if (ret != 2)
616			continue;
617
618		if (map_id >= ARRAY_SIZE(map_fds) || map_fds[map_id] < 0)
619			return -ENOENT;
620		if (map_ent[map_id].type != BPF_MAP_TYPE_PROG_ARRAY ||
621		    map_ent[map_id].max_elem <= key_id)
622			return -EINVAL;
623
624		prog_fd = bpf_fetch_prog_sec(elf_fd, elf_hdr, sec_done,
625					     type, data_insn.sec_name,
626					     license, sym_tab);
627		if (prog_fd < 0)
628			return -EIO;
629
630		ret = bpf_update_map(map_fds[map_id], &key_id, &prog_fd,
631				     BPF_ANY);
632		if (ret < 0)
633			return -ENOENT;
634
635		sec_done[sec_index] = true;
636	}
637
638	return 0;
639}
640
641int bpf_open_object(const char *path, enum bpf_prog_type type,
642		    const char *sec, bool verbose)
643{
644	char license[ELF_MAX_LICENSE_LEN];
645	int file_fd, prog_fd = -1, ret;
646	Elf_Data *sym_tab = NULL;
647	GElf_Ehdr elf_hdr;
648	bool *sec_done;
649	Elf *elf_fd;
650
651	if (elf_version(EV_CURRENT) == EV_NONE)
652		return -EINVAL;
653
654	file_fd = open(path, O_RDONLY, 0);
655	if (file_fd < 0)
656		return -errno;
657
658	elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
659	if (!elf_fd) {
660		ret = -EINVAL;
661		goto out;
662	}
663
664	if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
665		ret = -EIO;
666		goto out_elf;
667	}
668
669	sec_done = calloc(elf_hdr.e_shnum, sizeof(*sec_done));
670	if (!sec_done) {
671		ret = -ENOMEM;
672		goto out_elf;
673	}
674
675	memset(license, 0, sizeof(license));
676	bpf_verbose = verbose;
677
678	if (!bpf_may_skip_map_creation(file_fd))
679		bpf_maps_init();
680
681	ret = bpf_fetch_ancillary(file_fd, elf_fd, &elf_hdr, sec_done,
682				  license, sizeof(license), &sym_tab);
683	if (ret < 0)
684		goto out_maps;
685
686	prog_fd = bpf_fetch_prog_sec(elf_fd, &elf_hdr, sec_done, type,
687				     sec, license, sym_tab);
688	if (prog_fd < 0)
689		goto out_maps;
690
691	if (!bpf_may_skip_map_creation(file_fd)) {
692		ret = bpf_fill_prog_arrays(elf_fd, &elf_hdr, sec_done,
693					   type, license, sym_tab);
694		if (ret < 0)
695			goto out_prog;
696	}
697
698	bpf_save_finfo(file_fd);
699
700	free(sec_done);
701
702	elf_end(elf_fd);
703	close(file_fd);
704
705	return prog_fd;
706
707out_prog:
708	close(prog_fd);
709out_maps:
710	bpf_maps_destroy();
711	free(sec_done);
712out_elf:
713	elf_end(elf_fd);
714out:
715	close(file_fd);
716	bpf_clear_finfo();
717	return prog_fd;
718}
719
720static int
721bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
722		 const struct bpf_map_data *aux, unsigned int entries)
723{
724	struct bpf_map_set_msg msg;
725	int *cmsg_buf, min_fd;
726	char *amsg_buf;
727	int i;
728
729	memset(&msg, 0, sizeof(msg));
730
731	msg.aux.uds_ver = BPF_SCM_AUX_VER;
732	msg.aux.num_ent = entries;
733
734	strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
735	memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
736
737	cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
738	amsg_buf = (char *)msg.aux.ent;
739
740	for (i = 0; i < entries; i += min_fd) {
741		int ret;
742
743		min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
744		bpf_map_set_init_single(&msg, min_fd);
745
746		memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
747		memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
748
749		ret = sendmsg(fd, &msg.hdr, 0);
750		if (ret <= 0)
751			return ret ? : -1;
752	}
753
754	return 0;
755}
756
757static int
758bpf_map_set_recv(int fd, int *fds,  struct bpf_map_aux *aux,
759		 unsigned int entries)
760{
761	struct bpf_map_set_msg msg;
762	int *cmsg_buf, min_fd;
763	char *amsg_buf, *mmsg_buf;
764	unsigned int needed = 1;
765	int i;
766
767	cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
768	amsg_buf = (char *)msg.aux.ent;
769	mmsg_buf = (char *)&msg.aux;
770
771	for (i = 0; i < min(entries, needed); i += min_fd) {
772		struct cmsghdr *cmsg;
773		int ret;
774
775		min_fd = min(entries, entries - i);
776		bpf_map_set_init_single(&msg, min_fd);
777
778		ret = recvmsg(fd, &msg.hdr, 0);
779		if (ret <= 0)
780			return ret ? : -1;
781
782		cmsg = CMSG_FIRSTHDR(&msg.hdr);
783		if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
784			return -EINVAL;
785		if (msg.hdr.msg_flags & MSG_CTRUNC)
786			return -EIO;
787		if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
788			return -ENOSYS;
789
790		min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
791		if (min_fd > entries || min_fd <= 0)
792			return -EINVAL;
793
794		memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
795		memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
796		memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
797
798		needed = aux->num_ent;
799	}
800
801	return 0;
802}
803
804int bpf_send_map_fds(const char *path, const char *obj)
805{
806	struct sockaddr_un addr;
807	struct bpf_map_data bpf_aux;
808	int fd, ret;
809
810	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
811	if (fd < 0) {
812		fprintf(stderr, "Cannot open socket: %s\n",
813			strerror(errno));
814		return -1;
815	}
816
817	memset(&addr, 0, sizeof(addr));
818	addr.sun_family = AF_UNIX;
819	strncpy(addr.sun_path, path, sizeof(addr.sun_path));
820
821	ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
822	if (ret < 0) {
823		fprintf(stderr, "Cannot connect to %s: %s\n",
824			path, strerror(errno));
825		return -1;
826	}
827
828	memset(&bpf_aux, 0, sizeof(bpf_aux));
829
830	bpf_aux.fds = map_fds;
831	bpf_aux.ent = map_ent;
832
833	bpf_aux.obj = obj;
834	bpf_aux.st = &bpf_st;
835
836	ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
837			       bpf_maps_count());
838	if (ret < 0)
839		fprintf(stderr, "Cannot send fds to %s: %s\n",
840			path, strerror(errno));
841
842	close(fd);
843	return ret;
844}
845
846int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
847		     unsigned int entries)
848{
849	struct sockaddr_un addr;
850	int fd, ret;
851
852	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
853	if (fd < 0) {
854		fprintf(stderr, "Cannot open socket: %s\n",
855			strerror(errno));
856		return -1;
857	}
858
859	memset(&addr, 0, sizeof(addr));
860	addr.sun_family = AF_UNIX;
861	strncpy(addr.sun_path, path, sizeof(addr.sun_path));
862
863	ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
864	if (ret < 0) {
865		fprintf(stderr, "Cannot bind to socket: %s\n",
866			strerror(errno));
867		return -1;
868	}
869
870	ret = bpf_map_set_recv(fd, fds, aux, entries);
871	if (ret < 0)
872		fprintf(stderr, "Cannot recv fds from %s: %s\n",
873			path, strerror(errno));
874
875	unlink(addr.sun_path);
876	close(fd);
877	return ret;
878}
879#endif /* HAVE_ELF */
880