tc_bpf.c revision 9e607f2e722604a57a2c1ec9a174fcc505d9c451
1/*
2 * tc_bpf.c	BPF common code
3 *
4 *		This program is free software; you can distribute it and/or
5 *		modify it under the terms of the GNU General Public License
6 *		as published by the Free Software Foundation; either version
7 *		2 of the License, or (at your option) any later version.
8 *
9 * Authors:	Daniel Borkmann <dborkman@redhat.com>
10 *		Jiri Pirko <jiri@resnulli.us>
11 *		Alexei Starovoitov <ast@plumgrid.com>
12 */
13
14#include <stdio.h>
15#include <stdlib.h>
16#include <unistd.h>
17#include <string.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <errno.h>
21#include <fcntl.h>
22#include <stdarg.h>
23
24#ifdef HAVE_ELF
25#include <libelf.h>
26#include <gelf.h>
27#endif
28
29#include <sys/types.h>
30#include <sys/stat.h>
31#include <sys/un.h>
32#include <sys/vfs.h>
33#include <sys/mount.h>
34#include <sys/syscall.h>
35#include <sys/sendfile.h>
36#include <sys/resource.h>
37
38#include <linux/bpf.h>
39#include <linux/filter.h>
40#include <linux/if_alg.h>
41
42#include "utils.h"
43
44#include "bpf_elf.h"
45#include "bpf_scm.h"
46
47#include "tc_util.h"
48#include "tc_bpf.h"
49
50#ifdef HAVE_ELF
51static int bpf_obj_open(const char *path, enum bpf_prog_type type,
52			const char *sec, bool verbose);
53#else
54static int bpf_obj_open(const char *path, enum bpf_prog_type type,
55			const char *sec, bool verbose)
56{
57	fprintf(stderr, "No ELF library support compiled in.\n");
58	errno = ENOSYS;
59	return -1;
60}
61#endif
62
63static inline __u64 bpf_ptr_to_u64(const void *ptr)
64{
65	return (__u64)(unsigned long)ptr;
66}
67
68static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
69{
70#ifdef __NR_bpf
71	return syscall(__NR_bpf, cmd, attr, size);
72#else
73	fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
74	errno = ENOSYS;
75	return -1;
76#endif
77}
78
79static int bpf_obj_get(const char *pathname)
80{
81	union bpf_attr attr = {
82		.pathname	= bpf_ptr_to_u64(pathname),
83	};
84
85	return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
86}
87
88static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
89			    char **bpf_string, bool *need_release,
90			    const char separator)
91{
92	char sp;
93
94	if (from_file) {
95		size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
96		char *tmp_string;
97		FILE *fp;
98
99		tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
100		tmp_string = malloc(tmp_len);
101		if (tmp_string == NULL)
102			return -ENOMEM;
103
104		memset(tmp_string, 0, tmp_len);
105
106		fp = fopen(arg, "r");
107		if (fp == NULL) {
108			perror("Cannot fopen");
109			free(tmp_string);
110			return -ENOENT;
111		}
112
113		if (!fgets(tmp_string, tmp_len, fp)) {
114			free(tmp_string);
115			fclose(fp);
116			return -EIO;
117		}
118
119		fclose(fp);
120
121		*need_release = true;
122		*bpf_string = tmp_string;
123	} else {
124		*need_release = false;
125		*bpf_string = arg;
126	}
127
128	if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
129	    sp != separator) {
130		if (*need_release)
131			free(*bpf_string);
132		return -EINVAL;
133	}
134
135	return 0;
136}
137
138static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
139			 bool from_file)
140{
141	char *bpf_string, *token, separator = ',';
142	int ret = 0, i = 0;
143	bool need_release;
144	__u16 bpf_len = 0;
145
146	if (argc < 1)
147		return -EINVAL;
148	if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
149			     &need_release, separator))
150		return -EINVAL;
151	if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
152		ret = -EINVAL;
153		goto out;
154	}
155
156	token = bpf_string;
157	while ((token = strchr(token, separator)) && (++token)[0]) {
158		if (i >= bpf_len) {
159			fprintf(stderr, "Real program length exceeds encoded "
160				"length parameter!\n");
161			ret = -EINVAL;
162			goto out;
163		}
164
165		if (sscanf(token, "%hu %hhu %hhu %u,",
166			   &bpf_ops[i].code, &bpf_ops[i].jt,
167			   &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
168			fprintf(stderr, "Error at instruction %d!\n", i);
169			ret = -EINVAL;
170			goto out;
171		}
172
173		i++;
174	}
175
176	if (i != bpf_len) {
177		fprintf(stderr, "Parsed program length is less than encoded"
178			"length parameter!\n");
179		ret = -EINVAL;
180		goto out;
181	}
182	ret = bpf_len;
183out:
184	if (need_release)
185		free(bpf_string);
186
187	return ret;
188}
189
190void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
191{
192	struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
193	int i;
194
195	if (len == 0)
196		return;
197
198	fprintf(f, "bytecode \'%u,", len);
199
200	for (i = 0; i < len - 1; i++)
201		fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
202			ops[i].jf, ops[i].k);
203
204	fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
205		ops[i].jf, ops[i].k);
206}
207
208static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map)
209{
210	char file[PATH_MAX], buff[4096];
211	struct bpf_elf_map tmp, zero;
212	unsigned int val;
213	FILE *fp;
214
215	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
216
217	fp = fopen(file, "r");
218	if (!fp) {
219		fprintf(stderr, "No procfs support?!\n");
220		return -EIO;
221	}
222
223	memset(&tmp, 0, sizeof(tmp));
224	while (fgets(buff, sizeof(buff), fp)) {
225		if (sscanf(buff, "map_type:\t%u", &val) == 1)
226			tmp.type = val;
227		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
228			tmp.size_key = val;
229		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
230			tmp.size_value = val;
231		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
232			tmp.max_elem = val;
233	}
234
235	fclose(fp);
236
237	if (!memcmp(&tmp, map, offsetof(struct bpf_elf_map, id))) {
238		return 0;
239	} else {
240		memset(&zero, 0, sizeof(zero));
241		/* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
242		 * so just accept it. We know we do have an eBPF fd and in this
243		 * case, everything is 0. It is guaranteed that no such map exists
244		 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
245		 */
246		if (!memcmp(&tmp, &zero, offsetof(struct bpf_elf_map, id)))
247			return 0;
248
249		fprintf(stderr, "Map specs from pinned file differ!\n");
250		return -EINVAL;
251	}
252}
253
254static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
255{
256	struct statfs st_fs;
257
258	if (statfs(mnt, &st_fs) < 0)
259		return -ENOENT;
260	if ((unsigned long)st_fs.f_type != magic)
261		return -ENOENT;
262
263	return 0;
264}
265
266static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
267				  char *mnt, int len,
268				  const char * const *known_mnts)
269{
270	const char * const *ptr;
271	char type[100];
272	FILE *fp;
273
274	if (known_mnts) {
275		ptr = known_mnts;
276		while (*ptr) {
277			if (bpf_valid_mntpt(*ptr, magic) == 0) {
278				strncpy(mnt, *ptr, len - 1);
279				mnt[len - 1] = 0;
280				return mnt;
281			}
282			ptr++;
283		}
284	}
285
286	fp = fopen("/proc/mounts", "r");
287	if (fp == NULL || len != PATH_MAX)
288		return NULL;
289
290	while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
291		      mnt, type) == 2) {
292		if (strcmp(type, fstype) == 0)
293			break;
294	}
295
296	fclose(fp);
297	if (strcmp(type, fstype) != 0)
298		return NULL;
299
300	return mnt;
301}
302
303int bpf_trace_pipe(void)
304{
305	char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
306	static const char * const tracefs_known_mnts[] = {
307		TRACE_DIR_MNT,
308		"/sys/kernel/debug/tracing",
309		"/tracing",
310		"/trace",
311		0,
312	};
313	char tpipe[PATH_MAX];
314	const char *mnt;
315	int fd;
316
317	mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
318			     sizeof(tracefs_mnt), tracefs_known_mnts);
319	if (!mnt) {
320		fprintf(stderr, "tracefs not mounted?\n");
321		return -1;
322	}
323
324	snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
325
326	fd = open(tpipe, O_RDONLY);
327	if (fd < 0)
328		return -1;
329
330	fprintf(stderr, "Running! Hang up with ^C!\n\n");
331	while (1) {
332		static char buff[4096];
333		ssize_t ret;
334
335		ret = read(fd, buff, sizeof(buff) - 1);
336		if (ret > 0) {
337			write(2, buff, ret);
338			fflush(stderr);
339		}
340	}
341
342	return 0;
343}
344
345const char *bpf_default_section(const enum bpf_prog_type type)
346{
347	switch (type) {
348	case BPF_PROG_TYPE_SCHED_CLS:
349		return ELF_SECTION_CLASSIFIER;
350	case BPF_PROG_TYPE_SCHED_ACT:
351		return ELF_SECTION_ACTION;
352	default:
353		return NULL;
354	}
355}
356
357int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
358		     enum bpf_prog_type type, const char **ptr_object,
359		     const char **ptr_uds_name, struct nlmsghdr *n)
360{
361	struct sock_filter opcodes[BPF_MAXINSNS];
362	const char *file, *section, *uds_name;
363	char **argv = *ptr_argv;
364	int argc = *ptr_argc;
365	char annotation[256];
366	bool verbose = false;
367	int ret;
368	enum bpf_mode {
369		CBPF_BYTECODE,
370		CBPF_FILE,
371		EBPF_OBJECT,
372		EBPF_PINNED,
373	} mode;
374
375	if (matches(*argv, "bytecode") == 0 ||
376	    strcmp(*argv, "bc") == 0) {
377		mode = CBPF_BYTECODE;
378	} else if (matches(*argv, "bytecode-file") == 0 ||
379		   strcmp(*argv, "bcf") == 0) {
380		mode = CBPF_FILE;
381	} else if (matches(*argv, "object-file") == 0 ||
382		   strcmp(*argv, "obj") == 0) {
383		mode = EBPF_OBJECT;
384	} else if (matches(*argv, "object-pinned") == 0 ||
385		   matches(*argv, "pinned") == 0 ||
386		   matches(*argv, "fd") == 0) {
387		mode = EBPF_PINNED;
388	} else {
389		fprintf(stderr, "What mode is \"%s\"?\n", *argv);
390		return -1;
391	}
392
393	NEXT_ARG();
394	file = section = uds_name = NULL;
395	if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
396		file = *argv;
397		NEXT_ARG_FWD();
398
399		section = bpf_default_section(type);
400		if (argc > 0 && matches(*argv, "section") == 0) {
401			NEXT_ARG();
402			section = *argv;
403			NEXT_ARG_FWD();
404		}
405
406		uds_name = getenv(BPF_ENV_UDS);
407		if (argc > 0 && !uds_name &&
408		    matches(*argv, "export") == 0) {
409			NEXT_ARG();
410			uds_name = *argv;
411			NEXT_ARG_FWD();
412		}
413
414		if (argc > 0 && matches(*argv, "verbose") == 0) {
415			verbose = true;
416			NEXT_ARG_FWD();
417		}
418
419		PREV_ARG();
420	}
421
422	if (mode == CBPF_BYTECODE || mode == CBPF_FILE)
423		ret = bpf_ops_parse(argc, argv, opcodes, mode == CBPF_FILE);
424	else if (mode == EBPF_OBJECT)
425		ret = bpf_obj_open(file, type, section, verbose);
426	else if (mode == EBPF_PINNED)
427		ret = bpf_obj_get(file);
428	if (ret < 0)
429		return -1;
430
431	if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
432		addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
433		addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
434			  ret * sizeof(struct sock_filter));
435	} else if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
436		snprintf(annotation, sizeof(annotation), "%s:[%s]",
437			 basename(file), mode == EBPF_PINNED ? "*fsobj" :
438			 section);
439
440		addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
441		addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
442	}
443
444	*ptr_object = file;
445	*ptr_uds_name = uds_name;
446
447	*ptr_argc = argc;
448	*ptr_argv = argv;
449
450	return 0;
451}
452
453#ifdef HAVE_ELF
454struct bpf_elf_prog {
455	enum bpf_prog_type	type;
456	const struct bpf_insn	*insns;
457	size_t			size;
458	const char		*license;
459};
460
461struct bpf_elf_ctx {
462	Elf			*elf_fd;
463	GElf_Ehdr		elf_hdr;
464	Elf_Data		*sym_tab;
465	Elf_Data		*str_tab;
466	int			obj_fd;
467	int			map_fds[ELF_MAX_MAPS];
468	struct bpf_elf_map	maps[ELF_MAX_MAPS];
469	int			sym_num;
470	int			map_num;
471	bool			*sec_done;
472	int			sec_maps;
473	char			license[ELF_MAX_LICENSE_LEN];
474	enum bpf_prog_type	type;
475	bool			verbose;
476	struct bpf_elf_st	stat;
477};
478
479struct bpf_elf_sec_data {
480	GElf_Shdr		sec_hdr;
481	Elf_Data		*sec_data;
482	const char		*sec_name;
483};
484
485struct bpf_map_data {
486	int			*fds;
487	const char		*obj;
488	struct bpf_elf_st	*st;
489	struct bpf_elf_map	*ent;
490};
491
492/* If we provide a small buffer with log level enabled, the kernel
493 * could fail program load as no buffer space is available for the
494 * log and thus verifier fails. In case something doesn't pass the
495 * verifier we still want to hand something descriptive to the user.
496 */
497static char bpf_log_buf[65536];
498
499static __check_format_string(1, 2) void bpf_dump_error(const char *format, ...)
500{
501	va_list vl;
502
503	va_start(vl, format);
504	vfprintf(stderr, format, vl);
505	va_end(vl);
506
507	if (bpf_log_buf[0]) {
508		fprintf(stderr, "%s\n", bpf_log_buf);
509		memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
510	}
511}
512
513static int bpf_map_create(enum bpf_map_type type, unsigned int size_key,
514			  unsigned int size_value, unsigned int max_elem)
515{
516	union bpf_attr attr = {
517		.map_type	= type,
518		.key_size	= size_key,
519		.value_size	= size_value,
520		.max_entries	= max_elem,
521	};
522
523	return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
524}
525
526static int bpf_map_update(int fd, const void *key, const void *value,
527			  uint64_t flags)
528{
529	union bpf_attr attr = {
530		.map_fd		= fd,
531		.key		= bpf_ptr_to_u64(key),
532		.value		= bpf_ptr_to_u64(value),
533		.flags		= flags,
534	};
535
536	return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
537}
538
539static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
540			 size_t size, const char *license)
541{
542	union bpf_attr attr = {
543		.prog_type	= type,
544		.insns		= bpf_ptr_to_u64(insns),
545		.insn_cnt	= size / sizeof(struct bpf_insn),
546		.license	= bpf_ptr_to_u64(license),
547		.log_buf	= bpf_ptr_to_u64(bpf_log_buf),
548		.log_size	= sizeof(bpf_log_buf),
549		.log_level	= 1,
550	};
551
552	if (getenv(BPF_ENV_NOLOG)) {
553		attr.log_buf	= 0;
554		attr.log_size	= 0;
555		attr.log_level	= 0;
556	}
557
558	return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
559}
560
561static int bpf_obj_pin(int fd, const char *pathname)
562{
563	union bpf_attr attr = {
564		.pathname	= bpf_ptr_to_u64(pathname),
565		.bpf_fd		= fd,
566	};
567
568	return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
569}
570
571static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
572{
573	struct sockaddr_alg alg = {
574		.salg_family	= AF_ALG,
575		.salg_type	= "hash",
576		.salg_name	= "sha1",
577	};
578	int ret, cfd, ofd, ffd;
579	struct stat stbuff;
580	ssize_t size;
581
582	if (!object || len != 20)
583		return -EINVAL;
584
585	cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
586	if (cfd < 0) {
587		fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
588			strerror(errno));
589		return cfd;
590	}
591
592	ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
593	if (ret < 0) {
594		fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
595		goto out_cfd;
596	}
597
598	ofd = accept(cfd, NULL, 0);
599	if (ofd < 0) {
600		fprintf(stderr, "Error accepting socket: %s\n",
601			strerror(errno));
602		ret = ofd;
603		goto out_cfd;
604	}
605
606	ffd = open(object, O_RDONLY);
607	if (ffd < 0) {
608		fprintf(stderr, "Error opening object %s: %s\n",
609			object, strerror(errno));
610		ret = ffd;
611		goto out_ofd;
612	}
613
614        ret = fstat(ffd, &stbuff);
615	if (ret < 0) {
616		fprintf(stderr, "Error doing fstat: %s\n",
617			strerror(errno));
618		goto out_ffd;
619	}
620
621	size = sendfile(ofd, ffd, NULL, stbuff.st_size);
622	if (size != stbuff.st_size) {
623		fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
624			size, stbuff.st_size, strerror(errno));
625		ret = -1;
626		goto out_ffd;
627	}
628
629	size = read(ofd, out, len);
630	if (size != len) {
631		fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
632			size, len, strerror(errno));
633		ret = -1;
634	} else {
635		ret = 0;
636	}
637out_ffd:
638	close(ffd);
639out_ofd:
640	close(ofd);
641out_cfd:
642	close(cfd);
643	return ret;
644}
645
646static const char *bpf_get_obj_uid(const char *pathname)
647{
648	static bool bpf_uid_cached = false;
649	static char bpf_uid[64];
650	uint8_t tmp[20];
651	int ret;
652
653	if (bpf_uid_cached)
654		goto done;
655
656	ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
657	if (ret) {
658		fprintf(stderr, "Object hashing failed!\n");
659		return NULL;
660	}
661
662	hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
663	bpf_uid_cached = true;
664done:
665	return bpf_uid;
666}
667
668static int bpf_mnt_fs(const char *target)
669{
670	bool bind_done = false;
671
672	while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
673		if (errno != EINVAL || bind_done) {
674			fprintf(stderr, "mount --make-private %s failed: %s\n",
675				target,	strerror(errno));
676			return -1;
677		}
678
679		if (mount(target, target, "none", MS_BIND, NULL)) {
680			fprintf(stderr, "mount --bind %s %s failed: %s\n",
681				target,	target, strerror(errno));
682			return -1;
683		}
684
685		bind_done = true;
686	}
687
688	if (mount("bpf", target, "bpf", 0, NULL)) {
689		fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
690			target,	strerror(errno));
691		return -1;
692	}
693
694	return 0;
695}
696
697static const char *bpf_get_tc_dir(void)
698{
699	static bool bpf_mnt_cached = false;
700	static char bpf_tc_dir[PATH_MAX];
701	static const char *mnt;
702	static const char * const bpf_known_mnts[] = {
703		BPF_DIR_MNT,
704		0,
705	};
706	char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
707	char bpf_glo_dir[PATH_MAX];
708	int ret;
709
710	if (bpf_mnt_cached)
711		goto done;
712
713	mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
714			     bpf_known_mnts);
715	if (!mnt) {
716		mnt = getenv(BPF_ENV_MNT);
717		if (!mnt)
718			mnt = BPF_DIR_MNT;
719		ret = bpf_mnt_fs(mnt);
720		if (ret) {
721			mnt = NULL;
722			goto out;
723		}
724	}
725
726	snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
727	ret = mkdir(bpf_tc_dir, S_IRWXU);
728	if (ret && errno != EEXIST) {
729		fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
730			strerror(errno));
731		mnt = NULL;
732		goto out;
733	}
734
735	snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
736		 bpf_tc_dir, BPF_DIR_GLOBALS);
737	ret = mkdir(bpf_glo_dir, S_IRWXU);
738	if (ret && errno != EEXIST) {
739		fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
740			strerror(errno));
741		mnt = NULL;
742		goto out;
743	}
744
745	mnt = bpf_tc_dir;
746out:
747	bpf_mnt_cached = true;
748done:
749	return mnt;
750}
751
752static int bpf_init_env(const char *pathname)
753{
754	struct rlimit limit = {
755		.rlim_cur = RLIM_INFINITY,
756		.rlim_max = RLIM_INFINITY,
757	};
758
759	/* Don't bother in case we fail! */
760	setrlimit(RLIMIT_MEMLOCK, &limit);
761
762	if (!bpf_get_tc_dir()) {
763		fprintf(stderr, "Continuing without mounted eBPF fs. "
764			"Too old kernel?\n");
765		return 0;
766	}
767
768	if (!bpf_get_obj_uid(pathname))
769		return -1;
770
771	return 0;
772}
773
774static bool bpf_no_pinning(int pinning)
775{
776	switch (pinning) {
777	case PIN_OBJECT_NS:
778	case PIN_GLOBAL_NS:
779		return false;
780	case PIN_NONE:
781	default:
782		return true;
783	}
784}
785
786static void bpf_make_pathname(char *pathname, size_t len, const char *name,
787			      int pinning)
788{
789	switch (pinning) {
790	case PIN_OBJECT_NS:
791		snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
792			 bpf_get_obj_uid(NULL), name);
793		break;
794	case PIN_GLOBAL_NS:
795		snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
796			 BPF_DIR_GLOBALS, name);
797		break;
798	}
799}
800
801static int bpf_probe_pinned(const char *name, int pinning)
802{
803	char pathname[PATH_MAX];
804
805	if (bpf_no_pinning(pinning) || !bpf_get_tc_dir())
806		return 0;
807
808	bpf_make_pathname(pathname, sizeof(pathname), name, pinning);
809	return bpf_obj_get(pathname);
810}
811
812static int bpf_place_pinned(int fd, const char *name, int pinning)
813{
814	char pathname[PATH_MAX];
815	int ret;
816
817	if (bpf_no_pinning(pinning) || !bpf_get_tc_dir())
818		return 0;
819
820	if (pinning == PIN_OBJECT_NS) {
821		snprintf(pathname, sizeof(pathname), "%s/%s",
822			 bpf_get_tc_dir(), bpf_get_obj_uid(NULL));
823
824		ret = mkdir(pathname, S_IRWXU);
825		if (ret && errno != EEXIST) {
826			fprintf(stderr, "mkdir %s failed: %s\n", pathname,
827				strerror(errno));
828			return ret;
829		}
830	}
831
832	bpf_make_pathname(pathname, sizeof(pathname), name, pinning);
833	return bpf_obj_pin(fd, pathname);
834}
835
836static int bpf_prog_attach(const char *section,
837			   const struct bpf_elf_prog *prog, bool verbose)
838{
839	int fd;
840
841	/* We can add pinning here later as well, same as bpf_map_attach(). */
842	errno = 0;
843	fd = bpf_prog_load(prog->type, prog->insns, prog->size,
844			   prog->license);
845	if (fd < 0 || verbose) {
846		bpf_dump_error("Prog section \'%s\' (type:%u insns:%zu "
847			       "license:\'%s\') %s%s (%d)!\n\n",
848			       section, prog->type,
849			       prog->size / sizeof(struct bpf_insn),
850			       prog->license, fd < 0 ? "rejected :" :
851			       "loaded", fd < 0 ? strerror(errno) : "",
852			       fd < 0 ? errno : fd);
853	}
854
855	return fd;
856}
857
858static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
859			  bool verbose)
860{
861	int fd, ret;
862
863	fd = bpf_probe_pinned(name, map->pinning);
864	if (fd > 0) {
865		ret = bpf_map_selfcheck_pinned(fd, map);
866		if (ret < 0) {
867			close(fd);
868			fprintf(stderr, "Map \'%s\' self-check failed!\n",
869				name);
870			return ret;
871		}
872		if (verbose)
873			fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
874				name);
875		return fd;
876	}
877
878	errno = 0;
879	fd = bpf_map_create(map->type, map->size_key, map->size_value,
880			    map->max_elem);
881	if (fd < 0 || verbose) {
882		bpf_dump_error("Map \'%s\' (type:%u id:%u pinning:%u "
883			       "ksize:%u vsize:%u max-elems:%u) %s%s (%d)!\n",
884			       name, map->type, map->id, map->pinning,
885			       map->size_key, map->size_value, map->max_elem,
886			       fd < 0 ? "rejected: " : "loaded", fd < 0 ?
887			       strerror(errno) : "", fd < 0 ? errno : fd);
888		if (fd < 0)
889			return fd;
890	}
891
892	ret = bpf_place_pinned(fd, name, map->pinning);
893	if (ret < 0 && errno != EEXIST) {
894		fprintf(stderr, "Could not pin %s map: %s\n", name,
895			strerror(errno));
896		close(fd);
897		return ret;
898	}
899
900	return fd;
901}
902
903#define __ELF_ST_BIND(x)	((x) >> 4)
904#define __ELF_ST_TYPE(x)	(((unsigned int) x) & 0xf)
905
906static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
907				    const GElf_Sym *sym)
908{
909	return ctx->str_tab->d_buf + sym->st_name;
910}
911
912static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
913{
914	GElf_Sym sym;
915	int i;
916
917	for (i = 0; i < ctx->sym_num; i++) {
918		if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
919			continue;
920
921		if (__ELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
922		    __ELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
923		    sym.st_shndx != ctx->sec_maps ||
924		    sym.st_value / sizeof(struct bpf_elf_map) != which)
925			continue;
926
927		return bpf_str_tab_name(ctx, &sym);
928	}
929
930	return NULL;
931}
932
933static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
934{
935	const char *map_name;
936	int i, fd;
937
938	for (i = 0; i < ctx->map_num; i++) {
939		map_name = bpf_map_fetch_name(ctx, i);
940		if (!map_name)
941			return -EIO;
942
943		fd = bpf_map_attach(map_name, &ctx->maps[i], ctx->verbose);
944		if (fd < 0)
945			return fd;
946
947		ctx->map_fds[i] = fd;
948	}
949
950	return 0;
951}
952
953static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
954				 struct bpf_elf_sec_data *data)
955{
956	Elf_Data *sec_edata;
957	GElf_Shdr sec_hdr;
958	Elf_Scn *sec_fd;
959	char *sec_name;
960
961	memset(data, 0, sizeof(*data));
962
963	sec_fd = elf_getscn(ctx->elf_fd, section);
964	if (!sec_fd)
965		return -EINVAL;
966	if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
967		return -EIO;
968
969	sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
970			      sec_hdr.sh_name);
971	if (!sec_name || !sec_hdr.sh_size)
972		return -ENOENT;
973
974	sec_edata = elf_getdata(sec_fd, NULL);
975	if (!sec_edata || elf_getdata(sec_fd, sec_edata))
976		return -EIO;
977
978	memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
979
980	data->sec_name = sec_name;
981	data->sec_data = sec_edata;
982	return 0;
983}
984
985static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
986			  struct bpf_elf_sec_data *data)
987{
988	if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
989		return -EINVAL;
990
991	ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
992	ctx->sec_maps = section;
993	ctx->sec_done[section] = true;
994
995	if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
996		fprintf(stderr, "Too many BPF maps in ELF section!\n");
997		return -ENOMEM;
998	}
999
1000	memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
1001	return 0;
1002}
1003
1004static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
1005			     struct bpf_elf_sec_data *data)
1006{
1007	if (data->sec_data->d_size > sizeof(ctx->license))
1008		return -ENOMEM;
1009
1010	memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
1011	ctx->sec_done[section] = true;
1012	return 0;
1013}
1014
1015static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
1016			    struct bpf_elf_sec_data *data)
1017{
1018	ctx->sym_tab = data->sec_data;
1019	ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
1020	ctx->sec_done[section] = true;
1021	return 0;
1022}
1023
1024static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
1025			    struct bpf_elf_sec_data *data)
1026{
1027	ctx->str_tab = data->sec_data;
1028	ctx->sec_done[section] = true;
1029	return 0;
1030}
1031
1032static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
1033{
1034	struct bpf_elf_sec_data data;
1035	int i, ret = -1;
1036
1037	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1038		ret = bpf_fill_section_data(ctx, i, &data);
1039		if (ret < 0)
1040			continue;
1041
1042		if (!strcmp(data.sec_name, ELF_SECTION_MAPS))
1043			ret = bpf_fetch_maps(ctx, i, &data);
1044		else if (!strcmp(data.sec_name, ELF_SECTION_LICENSE))
1045			ret = bpf_fetch_license(ctx, i, &data);
1046		else if (data.sec_hdr.sh_type == SHT_SYMTAB)
1047			ret = bpf_fetch_symtab(ctx, i, &data);
1048		else if (data.sec_hdr.sh_type == SHT_STRTAB &&
1049			 i != ctx->elf_hdr.e_shstrndx)
1050			ret = bpf_fetch_strtab(ctx, i, &data);
1051		if (ret < 0) {
1052			fprintf(stderr, "Error parsing section %d! Perhaps"
1053				"check with readelf -a?\n", i);
1054			break;
1055		}
1056	}
1057
1058	if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) {
1059		ret = bpf_maps_attach_all(ctx);
1060		if (ret < 0) {
1061			fprintf(stderr, "Error loading maps into kernel!\n");
1062			return ret;
1063		}
1064	}
1065
1066	return ret;
1067}
1068
1069static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
1070{
1071	struct bpf_elf_sec_data data;
1072	struct bpf_elf_prog prog;
1073	int ret, i, fd = -1;
1074
1075	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1076		if (ctx->sec_done[i])
1077			continue;
1078
1079		ret = bpf_fill_section_data(ctx, i, &data);
1080		if (ret < 0 || strcmp(data.sec_name, section))
1081			continue;
1082
1083		memset(&prog, 0, sizeof(prog));
1084		prog.type    = ctx->type;
1085		prog.insns   = data.sec_data->d_buf;
1086		prog.size    = data.sec_data->d_size;
1087		prog.license = ctx->license;
1088
1089		fd = bpf_prog_attach(section, &prog, ctx->verbose);
1090		if (fd < 0)
1091			continue;
1092
1093		ctx->sec_done[i] = true;
1094		break;
1095	}
1096
1097	return fd;
1098}
1099
1100static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
1101			       struct bpf_elf_sec_data *data_relo,
1102			       struct bpf_elf_sec_data *data_insn)
1103{
1104	Elf_Data *idata = data_insn->sec_data;
1105	GElf_Shdr *rhdr = &data_relo->sec_hdr;
1106	int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
1107	struct bpf_insn *insns = idata->d_buf;
1108	unsigned int num_insns = idata->d_size / sizeof(*insns);
1109
1110	for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
1111		unsigned int ioff, rmap;
1112		GElf_Rel relo;
1113		GElf_Sym sym;
1114
1115		if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
1116			return -EIO;
1117
1118		ioff = relo.r_offset / sizeof(struct bpf_insn);
1119		if (ioff >= num_insns ||
1120		    insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
1121			return -EINVAL;
1122
1123		if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
1124			return -EIO;
1125
1126		rmap = sym.st_value / sizeof(struct bpf_elf_map);
1127		if (rmap >= ARRAY_SIZE(ctx->map_fds))
1128			return -EINVAL;
1129		if (!ctx->map_fds[rmap])
1130			return -EINVAL;
1131
1132		if (ctx->verbose)
1133			fprintf(stderr, "Map \'%s\' (%d) injected into prog "
1134				"section \'%s\' at offset %u!\n",
1135				bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
1136				data_insn->sec_name, ioff);
1137
1138		insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
1139		insns[ioff].imm     = ctx->map_fds[rmap];
1140	}
1141
1142	return 0;
1143}
1144
1145static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section)
1146{
1147	struct bpf_elf_sec_data data_relo, data_insn;
1148	struct bpf_elf_prog prog;
1149	int ret, idx, i, fd = -1;
1150
1151	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1152		ret = bpf_fill_section_data(ctx, i, &data_relo);
1153		if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
1154			continue;
1155
1156		idx = data_relo.sec_hdr.sh_info;
1157		ret = bpf_fill_section_data(ctx, idx, &data_insn);
1158		if (ret < 0 || strcmp(data_insn.sec_name, section))
1159			continue;
1160
1161		ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
1162		if (ret < 0)
1163			continue;
1164
1165		memset(&prog, 0, sizeof(prog));
1166		prog.type    = ctx->type;
1167		prog.insns   = data_insn.sec_data->d_buf;
1168		prog.size    = data_insn.sec_data->d_size;
1169		prog.license = ctx->license;
1170
1171		fd = bpf_prog_attach(section, &prog, ctx->verbose);
1172		if (fd < 0)
1173			continue;
1174
1175		ctx->sec_done[i]   = true;
1176		ctx->sec_done[idx] = true;
1177		break;
1178	}
1179
1180	return fd;
1181}
1182
1183static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
1184{
1185	int ret = -1;
1186
1187	if (ctx->sym_tab)
1188		ret = bpf_fetch_prog_relo(ctx, section);
1189	if (ret < 0)
1190		ret = bpf_fetch_prog(ctx, section);
1191
1192	return ret;
1193}
1194
1195static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
1196{
1197	int i;
1198
1199	for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
1200		if (ctx->map_fds[i] && ctx->maps[i].id == id &&
1201		    ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
1202			return i;
1203	return -1;
1204}
1205
1206static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
1207{
1208	struct bpf_elf_sec_data data;
1209	uint32_t map_id, key_id;
1210	int fd, i, ret, idx;
1211
1212	for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
1213		if (ctx->sec_done[i])
1214			continue;
1215
1216		ret = bpf_fill_section_data(ctx, i, &data);
1217		if (ret < 0)
1218			continue;
1219
1220		ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
1221		if (ret != 2)
1222			continue;
1223
1224		idx = bpf_find_map_by_id(ctx, map_id);
1225		if (idx < 0)
1226			continue;
1227
1228		fd = bpf_fetch_prog_sec(ctx, data.sec_name);
1229		if (fd < 0)
1230			return -EIO;
1231
1232		ret = bpf_map_update(ctx->map_fds[idx], &key_id,
1233				     &fd, BPF_ANY);
1234		if (ret < 0)
1235			return -ENOENT;
1236
1237		ctx->sec_done[i] = true;
1238	}
1239
1240	return 0;
1241}
1242
1243static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
1244{
1245	struct stat st;
1246	int ret;
1247
1248	memset(&ctx->stat, 0, sizeof(ctx->stat));
1249
1250	ret = fstat(ctx->obj_fd, &st);
1251	if (ret < 0) {
1252		fprintf(stderr, "Stat of elf file failed: %s\n",
1253			strerror(errno));
1254		return;
1255	}
1256
1257	ctx->stat.st_dev = st.st_dev;
1258	ctx->stat.st_ino = st.st_ino;
1259}
1260
1261static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
1262			    enum bpf_prog_type type, bool verbose)
1263{
1264	int ret = -EINVAL;
1265
1266	if (elf_version(EV_CURRENT) == EV_NONE ||
1267	    bpf_init_env(pathname))
1268		return ret;
1269
1270	memset(ctx, 0, sizeof(*ctx));
1271	ctx->verbose = verbose;
1272	ctx->type    = type;
1273
1274	ctx->obj_fd = open(pathname, O_RDONLY);
1275	if (ctx->obj_fd < 0)
1276		return ctx->obj_fd;
1277
1278	ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
1279	if (!ctx->elf_fd) {
1280		ret = -EINVAL;
1281		goto out_fd;
1282	}
1283
1284	if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
1285	    &ctx->elf_hdr) {
1286		ret = -EIO;
1287		goto out_elf;
1288	}
1289
1290	ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
1291			       sizeof(*(ctx->sec_done)));
1292	if (!ctx->sec_done) {
1293		ret = -ENOMEM;
1294		goto out_elf;
1295	}
1296
1297	bpf_save_finfo(ctx);
1298	return 0;
1299out_elf:
1300	elf_end(ctx->elf_fd);
1301out_fd:
1302	close(ctx->obj_fd);
1303	return ret;
1304}
1305
1306static int bpf_maps_count(struct bpf_elf_ctx *ctx)
1307{
1308	int i, count = 0;
1309
1310	for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
1311		if (!ctx->map_fds[i])
1312			break;
1313		count++;
1314	}
1315
1316	return count;
1317}
1318
1319static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
1320{
1321	int i;
1322
1323	for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
1324		if (ctx->map_fds[i])
1325			close(ctx->map_fds[i]);
1326	}
1327}
1328
1329static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
1330{
1331	if (failure)
1332		bpf_maps_teardown(ctx);
1333
1334	free(ctx->sec_done);
1335	elf_end(ctx->elf_fd);
1336	close(ctx->obj_fd);
1337}
1338
1339static struct bpf_elf_ctx __ctx;
1340
1341static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
1342			const char *section, bool verbose)
1343{
1344	struct bpf_elf_ctx *ctx = &__ctx;
1345	int fd = 0, ret;
1346
1347	ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
1348	if (ret < 0) {
1349		fprintf(stderr, "Cannot initialize ELF context!\n");
1350		return ret;
1351	}
1352
1353	ret = bpf_fetch_ancillary(ctx);
1354	if (ret < 0) {
1355		fprintf(stderr, "Error fetching ELF ancillary data!\n");
1356		goto out;
1357	}
1358
1359	fd = bpf_fetch_prog_sec(ctx, section);
1360	if (fd < 0) {
1361		fprintf(stderr, "Error fetching program/map!\n");
1362		ret = fd;
1363		goto out;
1364	}
1365
1366	ret = bpf_fill_prog_arrays(ctx);
1367	if (ret < 0)
1368		fprintf(stderr, "Error filling program arrays!\n");
1369out:
1370	bpf_elf_ctx_destroy(ctx, ret < 0);
1371	if (ret < 0) {
1372		if (fd)
1373			close(fd);
1374		return ret;
1375	}
1376
1377	return fd;
1378}
1379
1380static int
1381bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
1382		 const struct bpf_map_data *aux, unsigned int entries)
1383{
1384	struct bpf_map_set_msg msg;
1385	int *cmsg_buf, min_fd;
1386	char *amsg_buf;
1387	int i;
1388
1389	memset(&msg, 0, sizeof(msg));
1390
1391	msg.aux.uds_ver = BPF_SCM_AUX_VER;
1392	msg.aux.num_ent = entries;
1393
1394	strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
1395	memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
1396
1397	cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
1398	amsg_buf = (char *)msg.aux.ent;
1399
1400	for (i = 0; i < entries; i += min_fd) {
1401		int ret;
1402
1403		min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
1404		bpf_map_set_init_single(&msg, min_fd);
1405
1406		memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
1407		memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
1408
1409		ret = sendmsg(fd, &msg.hdr, 0);
1410		if (ret <= 0)
1411			return ret ? : -1;
1412	}
1413
1414	return 0;
1415}
1416
1417static int
1418bpf_map_set_recv(int fd, int *fds,  struct bpf_map_aux *aux,
1419		 unsigned int entries)
1420{
1421	struct bpf_map_set_msg msg;
1422	int *cmsg_buf, min_fd;
1423	char *amsg_buf, *mmsg_buf;
1424	unsigned int needed = 1;
1425	int i;
1426
1427	cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
1428	amsg_buf = (char *)msg.aux.ent;
1429	mmsg_buf = (char *)&msg.aux;
1430
1431	for (i = 0; i < min(entries, needed); i += min_fd) {
1432		struct cmsghdr *cmsg;
1433		int ret;
1434
1435		min_fd = min(entries, entries - i);
1436		bpf_map_set_init_single(&msg, min_fd);
1437
1438		ret = recvmsg(fd, &msg.hdr, 0);
1439		if (ret <= 0)
1440			return ret ? : -1;
1441
1442		cmsg = CMSG_FIRSTHDR(&msg.hdr);
1443		if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
1444			return -EINVAL;
1445		if (msg.hdr.msg_flags & MSG_CTRUNC)
1446			return -EIO;
1447		if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
1448			return -ENOSYS;
1449
1450		min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
1451		if (min_fd > entries || min_fd <= 0)
1452			return -EINVAL;
1453
1454		memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
1455		memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
1456		memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
1457
1458		needed = aux->num_ent;
1459	}
1460
1461	return 0;
1462}
1463
1464int bpf_send_map_fds(const char *path, const char *obj)
1465{
1466	struct bpf_elf_ctx *ctx = &__ctx;
1467	struct sockaddr_un addr;
1468	struct bpf_map_data bpf_aux;
1469	int fd, ret;
1470
1471	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
1472	if (fd < 0) {
1473		fprintf(stderr, "Cannot open socket: %s\n",
1474			strerror(errno));
1475		return -1;
1476	}
1477
1478	memset(&addr, 0, sizeof(addr));
1479	addr.sun_family = AF_UNIX;
1480	strncpy(addr.sun_path, path, sizeof(addr.sun_path));
1481
1482	ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
1483	if (ret < 0) {
1484		fprintf(stderr, "Cannot connect to %s: %s\n",
1485			path, strerror(errno));
1486		return -1;
1487	}
1488
1489	memset(&bpf_aux, 0, sizeof(bpf_aux));
1490
1491	bpf_aux.fds = ctx->map_fds;
1492	bpf_aux.ent = ctx->maps;
1493	bpf_aux.st  = &ctx->stat;
1494	bpf_aux.obj = obj;
1495
1496	ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
1497			       bpf_maps_count(ctx));
1498	if (ret < 0)
1499		fprintf(stderr, "Cannot send fds to %s: %s\n",
1500			path, strerror(errno));
1501
1502	bpf_maps_teardown(ctx);
1503	close(fd);
1504	return ret;
1505}
1506
1507int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
1508		     unsigned int entries)
1509{
1510	struct sockaddr_un addr;
1511	int fd, ret;
1512
1513	fd = socket(AF_UNIX, SOCK_DGRAM, 0);
1514	if (fd < 0) {
1515		fprintf(stderr, "Cannot open socket: %s\n",
1516			strerror(errno));
1517		return -1;
1518	}
1519
1520	memset(&addr, 0, sizeof(addr));
1521	addr.sun_family = AF_UNIX;
1522	strncpy(addr.sun_path, path, sizeof(addr.sun_path));
1523
1524	ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
1525	if (ret < 0) {
1526		fprintf(stderr, "Cannot bind to socket: %s\n",
1527			strerror(errno));
1528		return -1;
1529	}
1530
1531	ret = bpf_map_set_recv(fd, fds, aux, entries);
1532	if (ret < 0)
1533		fprintf(stderr, "Cannot recv fds from %s: %s\n",
1534			path, strerror(errno));
1535
1536	unlink(addr.sun_path);
1537	close(fd);
1538	return ret;
1539}
1540#endif /* HAVE_ELF */
1541