tc_bpf.c revision fd7f9c7fd11fa926bda2edc8bc492e7515753a32
1/* 2 * tc_bpf.c BPF common code 3 * 4 * This program is free software; you can distribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Daniel Borkmann <dborkman@redhat.com> 10 * Jiri Pirko <jiri@resnulli.us> 11 * Alexei Starovoitov <ast@plumgrid.com> 12 */ 13 14#include <stdio.h> 15#include <stdlib.h> 16#include <unistd.h> 17#include <string.h> 18#include <stdbool.h> 19#include <stdint.h> 20#include <errno.h> 21#include <fcntl.h> 22#include <stdarg.h> 23 24#ifdef HAVE_ELF 25#include <libelf.h> 26#include <gelf.h> 27#endif 28 29#include <sys/types.h> 30#include <sys/stat.h> 31#include <sys/un.h> 32#include <sys/vfs.h> 33#include <sys/mount.h> 34#include <sys/syscall.h> 35#include <sys/sendfile.h> 36#include <sys/resource.h> 37 38#include <linux/bpf.h> 39#include <linux/filter.h> 40#include <linux/if_alg.h> 41 42#include "utils.h" 43 44#include "bpf_elf.h" 45#include "bpf_scm.h" 46 47#include "tc_util.h" 48#include "tc_bpf.h" 49 50#ifdef HAVE_ELF 51static int bpf_obj_open(const char *path, enum bpf_prog_type type, 52 const char *sec, bool verbose); 53#else 54static int bpf_obj_open(const char *path, enum bpf_prog_type type, 55 const char *sec, bool verbose) 56{ 57 fprintf(stderr, "No ELF library support compiled in.\n"); 58 errno = ENOSYS; 59 return -1; 60} 61#endif 62 63static inline __u64 bpf_ptr_to_u64(const void *ptr) 64{ 65 return (__u64)(unsigned long)ptr; 66} 67 68static int bpf(int cmd, union bpf_attr *attr, unsigned int size) 69{ 70#ifdef __NR_bpf 71 return syscall(__NR_bpf, cmd, attr, size); 72#else 73 fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); 74 errno = ENOSYS; 75 return -1; 76#endif 77} 78 79static int bpf_map_update(int fd, const void *key, const void *value, 80 uint64_t flags) 81{ 82 union bpf_attr attr = { 83 .map_fd = fd, 84 .key = bpf_ptr_to_u64(key), 85 .value = bpf_ptr_to_u64(value), 86 .flags = flags, 87 }; 88 89 return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); 90} 91 92static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, 93 char **bpf_string, bool *need_release, 94 const char separator) 95{ 96 char sp; 97 98 if (from_file) { 99 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); 100 char *tmp_string; 101 FILE *fp; 102 103 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; 104 tmp_string = malloc(tmp_len); 105 if (tmp_string == NULL) 106 return -ENOMEM; 107 108 memset(tmp_string, 0, tmp_len); 109 110 fp = fopen(arg, "r"); 111 if (fp == NULL) { 112 perror("Cannot fopen"); 113 free(tmp_string); 114 return -ENOENT; 115 } 116 117 if (!fgets(tmp_string, tmp_len, fp)) { 118 free(tmp_string); 119 fclose(fp); 120 return -EIO; 121 } 122 123 fclose(fp); 124 125 *need_release = true; 126 *bpf_string = tmp_string; 127 } else { 128 *need_release = false; 129 *bpf_string = arg; 130 } 131 132 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || 133 sp != separator) { 134 if (*need_release) 135 free(*bpf_string); 136 return -EINVAL; 137 } 138 139 return 0; 140} 141 142static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, 143 bool from_file) 144{ 145 char *bpf_string, *token, separator = ','; 146 int ret = 0, i = 0; 147 bool need_release; 148 __u16 bpf_len = 0; 149 150 if (argc < 1) 151 return -EINVAL; 152 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, 153 &need_release, separator)) 154 return -EINVAL; 155 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { 156 ret = -EINVAL; 157 goto out; 158 } 159 160 token = bpf_string; 161 while ((token = strchr(token, separator)) && (++token)[0]) { 162 if (i >= bpf_len) { 163 fprintf(stderr, "Real program length exceeds encoded " 164 "length parameter!\n"); 165 ret = -EINVAL; 166 goto out; 167 } 168 169 if (sscanf(token, "%hu %hhu %hhu %u,", 170 &bpf_ops[i].code, &bpf_ops[i].jt, 171 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { 172 fprintf(stderr, "Error at instruction %d!\n", i); 173 ret = -EINVAL; 174 goto out; 175 } 176 177 i++; 178 } 179 180 if (i != bpf_len) { 181 fprintf(stderr, "Parsed program length is less than encoded" 182 "length parameter!\n"); 183 ret = -EINVAL; 184 goto out; 185 } 186 ret = bpf_len; 187out: 188 if (need_release) 189 free(bpf_string); 190 191 return ret; 192} 193 194void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) 195{ 196 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); 197 int i; 198 199 if (len == 0) 200 return; 201 202 fprintf(f, "bytecode \'%u,", len); 203 204 for (i = 0; i < len - 1; i++) 205 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, 206 ops[i].jf, ops[i].k); 207 208 fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt, 209 ops[i].jf, ops[i].k); 210} 211 212static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, 213 int length) 214{ 215 char file[PATH_MAX], buff[4096]; 216 struct bpf_elf_map tmp, zero; 217 unsigned int val; 218 FILE *fp; 219 220 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 221 222 fp = fopen(file, "r"); 223 if (!fp) { 224 fprintf(stderr, "No procfs support?!\n"); 225 return -EIO; 226 } 227 228 memset(&tmp, 0, sizeof(tmp)); 229 while (fgets(buff, sizeof(buff), fp)) { 230 if (sscanf(buff, "map_type:\t%u", &val) == 1) 231 tmp.type = val; 232 else if (sscanf(buff, "key_size:\t%u", &val) == 1) 233 tmp.size_key = val; 234 else if (sscanf(buff, "value_size:\t%u", &val) == 1) 235 tmp.size_value = val; 236 else if (sscanf(buff, "max_entries:\t%u", &val) == 1) 237 tmp.max_elem = val; 238 } 239 240 fclose(fp); 241 242 if (!memcmp(&tmp, map, length)) { 243 return 0; 244 } else { 245 memset(&zero, 0, sizeof(zero)); 246 /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, 247 * so just accept it. We know we do have an eBPF fd and in this 248 * case, everything is 0. It is guaranteed that no such map exists 249 * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. 250 */ 251 if (!memcmp(&tmp, &zero, length)) 252 return 0; 253 254 fprintf(stderr, "Map specs from pinned file differ!\n"); 255 return -EINVAL; 256 } 257} 258 259static int bpf_mnt_fs(const char *target) 260{ 261 bool bind_done = false; 262 263 while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { 264 if (errno != EINVAL || bind_done) { 265 fprintf(stderr, "mount --make-private %s failed: %s\n", 266 target, strerror(errno)); 267 return -1; 268 } 269 270 if (mount(target, target, "none", MS_BIND, NULL)) { 271 fprintf(stderr, "mount --bind %s %s failed: %s\n", 272 target, target, strerror(errno)); 273 return -1; 274 } 275 276 bind_done = true; 277 } 278 279 if (mount("bpf", target, "bpf", 0, NULL)) { 280 fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", 281 target, strerror(errno)); 282 return -1; 283 } 284 285 return 0; 286} 287 288static int bpf_valid_mntpt(const char *mnt, unsigned long magic) 289{ 290 struct statfs st_fs; 291 292 if (statfs(mnt, &st_fs) < 0) 293 return -ENOENT; 294 if ((unsigned long)st_fs.f_type != magic) 295 return -ENOENT; 296 297 return 0; 298} 299 300static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, 301 char *mnt, int len, 302 const char * const *known_mnts) 303{ 304 const char * const *ptr; 305 char type[100]; 306 FILE *fp; 307 308 if (known_mnts) { 309 ptr = known_mnts; 310 while (*ptr) { 311 if (bpf_valid_mntpt(*ptr, magic) == 0) { 312 strncpy(mnt, *ptr, len - 1); 313 mnt[len - 1] = 0; 314 return mnt; 315 } 316 ptr++; 317 } 318 } 319 320 fp = fopen("/proc/mounts", "r"); 321 if (fp == NULL || len != PATH_MAX) 322 return NULL; 323 324 while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", 325 mnt, type) == 2) { 326 if (strcmp(type, fstype) == 0) 327 break; 328 } 329 330 fclose(fp); 331 if (strcmp(type, fstype) != 0) 332 return NULL; 333 334 return mnt; 335} 336 337int bpf_trace_pipe(void) 338{ 339 char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; 340 static const char * const tracefs_known_mnts[] = { 341 TRACE_DIR_MNT, 342 "/sys/kernel/debug/tracing", 343 "/tracing", 344 "/trace", 345 0, 346 }; 347 char tpipe[PATH_MAX]; 348 const char *mnt; 349 int fd; 350 351 mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, 352 sizeof(tracefs_mnt), tracefs_known_mnts); 353 if (!mnt) { 354 fprintf(stderr, "tracefs not mounted?\n"); 355 return -1; 356 } 357 358 snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); 359 360 fd = open(tpipe, O_RDONLY); 361 if (fd < 0) 362 return -1; 363 364 fprintf(stderr, "Running! Hang up with ^C!\n\n"); 365 while (1) { 366 static char buff[4096]; 367 ssize_t ret; 368 369 ret = read(fd, buff, sizeof(buff) - 1); 370 if (ret > 0) { 371 write(2, buff, ret); 372 fflush(stderr); 373 } 374 } 375 376 return 0; 377} 378 379static const char *bpf_get_tc_dir(void) 380{ 381 static bool bpf_mnt_cached = false; 382 static char bpf_tc_dir[PATH_MAX]; 383 static const char *mnt; 384 static const char * const bpf_known_mnts[] = { 385 BPF_DIR_MNT, 386 0, 387 }; 388 char bpf_mnt[PATH_MAX] = BPF_DIR_MNT; 389 char bpf_glo_dir[PATH_MAX]; 390 int ret; 391 392 if (bpf_mnt_cached) 393 goto done; 394 395 mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt), 396 bpf_known_mnts); 397 if (!mnt) { 398 mnt = getenv(BPF_ENV_MNT); 399 if (!mnt) 400 mnt = BPF_DIR_MNT; 401 ret = bpf_mnt_fs(mnt); 402 if (ret) { 403 mnt = NULL; 404 goto out; 405 } 406 } 407 408 snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC); 409 ret = mkdir(bpf_tc_dir, S_IRWXU); 410 if (ret && errno != EEXIST) { 411 fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir, 412 strerror(errno)); 413 mnt = NULL; 414 goto out; 415 } 416 417 snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s", 418 bpf_tc_dir, BPF_DIR_GLOBALS); 419 ret = mkdir(bpf_glo_dir, S_IRWXU); 420 if (ret && errno != EEXIST) { 421 fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, 422 strerror(errno)); 423 mnt = NULL; 424 goto out; 425 } 426 427 mnt = bpf_tc_dir; 428out: 429 bpf_mnt_cached = true; 430done: 431 return mnt; 432} 433 434static int bpf_obj_get(const char *pathname) 435{ 436 union bpf_attr attr; 437 char tmp[PATH_MAX]; 438 439 if (strlen(pathname) > 2 && pathname[0] == 'm' && 440 pathname[1] == ':' && bpf_get_tc_dir()) { 441 snprintf(tmp, sizeof(tmp), "%s/%s", 442 bpf_get_tc_dir(), pathname + 2); 443 pathname = tmp; 444 } 445 446 memset(&attr, 0, sizeof(attr)); 447 attr.pathname = bpf_ptr_to_u64(pathname); 448 449 return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); 450} 451 452const char *bpf_default_section(const enum bpf_prog_type type) 453{ 454 switch (type) { 455 case BPF_PROG_TYPE_SCHED_CLS: 456 return ELF_SECTION_CLASSIFIER; 457 case BPF_PROG_TYPE_SCHED_ACT: 458 return ELF_SECTION_ACTION; 459 default: 460 return NULL; 461 } 462} 463 464enum bpf_mode { 465 CBPF_BYTECODE = 0, 466 CBPF_FILE, 467 EBPF_OBJECT, 468 EBPF_PINNED, 469 __BPF_MODE_MAX, 470#define BPF_MODE_MAX __BPF_MODE_MAX 471}; 472 473static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl, 474 enum bpf_prog_type *type, enum bpf_mode *mode, 475 const char **ptr_object, const char **ptr_section, 476 const char **ptr_uds_name, struct sock_filter *opcodes) 477{ 478 const char *file, *section, *uds_name; 479 bool verbose = false; 480 int ret, argc; 481 char **argv; 482 483 argv = *ptr_argv; 484 argc = *ptr_argc; 485 486 if (opt_tbl[CBPF_BYTECODE] && 487 (matches(*argv, "bytecode") == 0 || 488 strcmp(*argv, "bc") == 0)) { 489 *mode = CBPF_BYTECODE; 490 } else if (opt_tbl[CBPF_FILE] && 491 (matches(*argv, "bytecode-file") == 0 || 492 strcmp(*argv, "bcf") == 0)) { 493 *mode = CBPF_FILE; 494 } else if (opt_tbl[EBPF_OBJECT] && 495 (matches(*argv, "object-file") == 0 || 496 strcmp(*argv, "obj") == 0)) { 497 *mode = EBPF_OBJECT; 498 } else if (opt_tbl[EBPF_PINNED] && 499 (matches(*argv, "object-pinned") == 0 || 500 matches(*argv, "pinned") == 0 || 501 matches(*argv, "fd") == 0)) { 502 *mode = EBPF_PINNED; 503 } else { 504 fprintf(stderr, "What mode is \"%s\"?\n", *argv); 505 return -1; 506 } 507 508 NEXT_ARG(); 509 file = section = uds_name = NULL; 510 if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) { 511 file = *argv; 512 NEXT_ARG_FWD(); 513 514 if (*type == BPF_PROG_TYPE_UNSPEC) { 515 if (argc > 0 && matches(*argv, "type") == 0) { 516 NEXT_ARG(); 517 if (matches(*argv, "cls") == 0) { 518 *type = BPF_PROG_TYPE_SCHED_CLS; 519 } else if (matches(*argv, "act") == 0) { 520 *type = BPF_PROG_TYPE_SCHED_ACT; 521 } else { 522 fprintf(stderr, "What type is \"%s\"?\n", 523 *argv); 524 return -1; 525 } 526 NEXT_ARG_FWD(); 527 } else { 528 *type = BPF_PROG_TYPE_SCHED_CLS; 529 } 530 } 531 532 section = bpf_default_section(*type); 533 if (argc > 0 && matches(*argv, "section") == 0) { 534 NEXT_ARG(); 535 section = *argv; 536 NEXT_ARG_FWD(); 537 } 538 539 uds_name = getenv(BPF_ENV_UDS); 540 if (argc > 0 && !uds_name && 541 matches(*argv, "export") == 0) { 542 NEXT_ARG(); 543 uds_name = *argv; 544 NEXT_ARG_FWD(); 545 } 546 547 if (argc > 0 && matches(*argv, "verbose") == 0) { 548 verbose = true; 549 NEXT_ARG_FWD(); 550 } 551 552 PREV_ARG(); 553 } 554 555 if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE) 556 ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE); 557 else if (*mode == EBPF_OBJECT) 558 ret = bpf_obj_open(file, *type, section, verbose); 559 else if (*mode == EBPF_PINNED) 560 ret = bpf_obj_get(file); 561 else 562 return -1; 563 564 if (ptr_object) 565 *ptr_object = file; 566 if (ptr_section) 567 *ptr_section = section; 568 if (ptr_uds_name) 569 *ptr_uds_name = uds_name; 570 571 *ptr_argc = argc; 572 *ptr_argv = argv; 573 574 return ret; 575} 576 577int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl, 578 enum bpf_prog_type type, const char **ptr_object, 579 const char **ptr_uds_name, struct nlmsghdr *n) 580{ 581 struct sock_filter opcodes[BPF_MAXINSNS]; 582 const bool opt_tbl[BPF_MODE_MAX] = { 583 [CBPF_BYTECODE] = true, 584 [CBPF_FILE] = true, 585 [EBPF_OBJECT] = true, 586 [EBPF_PINNED] = true, 587 }; 588 char annotation[256]; 589 const char *section; 590 enum bpf_mode mode; 591 int ret; 592 593 ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode, 594 ptr_object, §ion, ptr_uds_name, opcodes); 595 if (ret < 0) 596 return ret; 597 598 if (mode == CBPF_BYTECODE || mode == CBPF_FILE) { 599 addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret); 600 addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes, 601 ret * sizeof(struct sock_filter)); 602 } 603 604 if (mode == EBPF_OBJECT || mode == EBPF_PINNED) { 605 snprintf(annotation, sizeof(annotation), "%s:[%s]", 606 basename(*ptr_object), mode == EBPF_PINNED ? 607 "*fsobj" : section); 608 609 addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret); 610 addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation); 611 } 612 613 return 0; 614} 615 616int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) 617{ 618 enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC; 619 const bool opt_tbl[BPF_MODE_MAX] = { 620 [CBPF_BYTECODE] = false, 621 [CBPF_FILE] = false, 622 [EBPF_OBJECT] = true, 623 [EBPF_PINNED] = true, 624 }; 625 const struct bpf_elf_map test = { 626 .type = BPF_MAP_TYPE_PROG_ARRAY, 627 .size_key = sizeof(int), 628 .size_value = sizeof(int), 629 }; 630 int ret, prog_fd, map_fd; 631 const char *section; 632 enum bpf_mode mode; 633 uint32_t map_key; 634 635 prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode, 636 NULL, §ion, NULL, NULL); 637 if (prog_fd < 0) 638 return prog_fd; 639 if (key) { 640 map_key = *key; 641 } else { 642 ret = sscanf(section, "%*i/%i", &map_key); 643 if (ret != 1) { 644 fprintf(stderr, "Couldn\'t infer map key from section " 645 "name! Please provide \'key\' argument!\n"); 646 ret = -EINVAL; 647 goto out_prog; 648 } 649 } 650 651 map_fd = bpf_obj_get(map_path); 652 if (map_fd < 0) { 653 fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", 654 map_path, strerror(errno)); 655 ret = map_fd; 656 goto out_prog; 657 } 658 659 ret = bpf_map_selfcheck_pinned(map_fd, &test, 660 offsetof(struct bpf_elf_map, max_elem)); 661 if (ret < 0) { 662 fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); 663 goto out_map; 664 } 665 666 ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); 667 if (ret < 0) 668 fprintf(stderr, "Map update failed: %s\n", strerror(errno)); 669out_map: 670 close(map_fd); 671out_prog: 672 close(prog_fd); 673 return ret; 674} 675 676#ifdef HAVE_ELF 677struct bpf_elf_prog { 678 enum bpf_prog_type type; 679 const struct bpf_insn *insns; 680 size_t size; 681 const char *license; 682}; 683 684struct bpf_hash_entry { 685 unsigned int pinning; 686 const char *subpath; 687 struct bpf_hash_entry *next; 688}; 689 690struct bpf_elf_ctx { 691 Elf *elf_fd; 692 GElf_Ehdr elf_hdr; 693 Elf_Data *sym_tab; 694 Elf_Data *str_tab; 695 int obj_fd; 696 int map_fds[ELF_MAX_MAPS]; 697 struct bpf_elf_map maps[ELF_MAX_MAPS]; 698 int sym_num; 699 int map_num; 700 bool *sec_done; 701 int sec_maps; 702 char license[ELF_MAX_LICENSE_LEN]; 703 enum bpf_prog_type type; 704 bool verbose; 705 struct bpf_elf_st stat; 706 struct bpf_hash_entry *ht[256]; 707}; 708 709struct bpf_elf_sec_data { 710 GElf_Shdr sec_hdr; 711 Elf_Data *sec_data; 712 const char *sec_name; 713}; 714 715struct bpf_map_data { 716 int *fds; 717 const char *obj; 718 struct bpf_elf_st *st; 719 struct bpf_elf_map *ent; 720}; 721 722/* If we provide a small buffer with log level enabled, the kernel 723 * could fail program load as no buffer space is available for the 724 * log and thus verifier fails. In case something doesn't pass the 725 * verifier we still want to hand something descriptive to the user. 726 */ 727static char bpf_log_buf[65536]; 728 729static __check_format_string(1, 2) void bpf_dump_error(const char *format, ...) 730{ 731 va_list vl; 732 733 va_start(vl, format); 734 vfprintf(stderr, format, vl); 735 va_end(vl); 736 737 if (bpf_log_buf[0]) { 738 fprintf(stderr, "%s\n", bpf_log_buf); 739 memset(bpf_log_buf, 0, sizeof(bpf_log_buf)); 740 } 741} 742 743static int bpf_map_create(enum bpf_map_type type, unsigned int size_key, 744 unsigned int size_value, unsigned int max_elem) 745{ 746 union bpf_attr attr = { 747 .map_type = type, 748 .key_size = size_key, 749 .value_size = size_value, 750 .max_entries = max_elem, 751 }; 752 753 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 754} 755 756static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, 757 size_t size, const char *license) 758{ 759 union bpf_attr attr = { 760 .prog_type = type, 761 .insns = bpf_ptr_to_u64(insns), 762 .insn_cnt = size / sizeof(struct bpf_insn), 763 .license = bpf_ptr_to_u64(license), 764 .log_buf = bpf_ptr_to_u64(bpf_log_buf), 765 .log_size = sizeof(bpf_log_buf), 766 .log_level = 1, 767 }; 768 769 if (getenv(BPF_ENV_NOLOG)) { 770 attr.log_buf = 0; 771 attr.log_size = 0; 772 attr.log_level = 0; 773 } 774 775 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 776} 777 778static int bpf_obj_pin(int fd, const char *pathname) 779{ 780 union bpf_attr attr = { 781 .pathname = bpf_ptr_to_u64(pathname), 782 .bpf_fd = fd, 783 }; 784 785 return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); 786} 787 788static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) 789{ 790 struct sockaddr_alg alg = { 791 .salg_family = AF_ALG, 792 .salg_type = "hash", 793 .salg_name = "sha1", 794 }; 795 int ret, cfd, ofd, ffd; 796 struct stat stbuff; 797 ssize_t size; 798 799 if (!object || len != 20) 800 return -EINVAL; 801 802 cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); 803 if (cfd < 0) { 804 fprintf(stderr, "Cannot get AF_ALG socket: %s\n", 805 strerror(errno)); 806 return cfd; 807 } 808 809 ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); 810 if (ret < 0) { 811 fprintf(stderr, "Error binding socket: %s\n", strerror(errno)); 812 goto out_cfd; 813 } 814 815 ofd = accept(cfd, NULL, 0); 816 if (ofd < 0) { 817 fprintf(stderr, "Error accepting socket: %s\n", 818 strerror(errno)); 819 ret = ofd; 820 goto out_cfd; 821 } 822 823 ffd = open(object, O_RDONLY); 824 if (ffd < 0) { 825 fprintf(stderr, "Error opening object %s: %s\n", 826 object, strerror(errno)); 827 ret = ffd; 828 goto out_ofd; 829 } 830 831 ret = fstat(ffd, &stbuff); 832 if (ret < 0) { 833 fprintf(stderr, "Error doing fstat: %s\n", 834 strerror(errno)); 835 goto out_ffd; 836 } 837 838 size = sendfile(ofd, ffd, NULL, stbuff.st_size); 839 if (size != stbuff.st_size) { 840 fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", 841 size, stbuff.st_size, strerror(errno)); 842 ret = -1; 843 goto out_ffd; 844 } 845 846 size = read(ofd, out, len); 847 if (size != len) { 848 fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", 849 size, len, strerror(errno)); 850 ret = -1; 851 } else { 852 ret = 0; 853 } 854out_ffd: 855 close(ffd); 856out_ofd: 857 close(ofd); 858out_cfd: 859 close(cfd); 860 return ret; 861} 862 863static const char *bpf_get_obj_uid(const char *pathname) 864{ 865 static bool bpf_uid_cached = false; 866 static char bpf_uid[64]; 867 uint8_t tmp[20]; 868 int ret; 869 870 if (bpf_uid_cached) 871 goto done; 872 873 ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); 874 if (ret) { 875 fprintf(stderr, "Object hashing failed!\n"); 876 return NULL; 877 } 878 879 hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid)); 880 bpf_uid_cached = true; 881done: 882 return bpf_uid; 883} 884 885static int bpf_init_env(const char *pathname) 886{ 887 struct rlimit limit = { 888 .rlim_cur = RLIM_INFINITY, 889 .rlim_max = RLIM_INFINITY, 890 }; 891 892 /* Don't bother in case we fail! */ 893 setrlimit(RLIMIT_MEMLOCK, &limit); 894 895 if (!bpf_get_tc_dir()) { 896 fprintf(stderr, "Continuing without mounted eBPF fs. " 897 "Too old kernel?\n"); 898 return 0; 899 } 900 901 if (!bpf_get_obj_uid(pathname)) 902 return -1; 903 904 return 0; 905} 906 907static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, 908 uint32_t pinning) 909{ 910 struct bpf_hash_entry *entry; 911 912 entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; 913 while (entry && entry->pinning != pinning) 914 entry = entry->next; 915 916 return entry ? entry->subpath : NULL; 917} 918 919static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, 920 uint32_t pinning) 921{ 922 switch (pinning) { 923 case PIN_OBJECT_NS: 924 case PIN_GLOBAL_NS: 925 return false; 926 case PIN_NONE: 927 return true; 928 default: 929 return !bpf_custom_pinning(ctx, pinning); 930 } 931} 932 933static void bpf_make_pathname(char *pathname, size_t len, const char *name, 934 const struct bpf_elf_ctx *ctx, uint32_t pinning) 935{ 936 switch (pinning) { 937 case PIN_OBJECT_NS: 938 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), 939 bpf_get_obj_uid(NULL), name); 940 break; 941 case PIN_GLOBAL_NS: 942 snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(), 943 BPF_DIR_GLOBALS, name); 944 break; 945 default: 946 snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(), 947 bpf_custom_pinning(ctx, pinning), name); 948 break; 949 } 950} 951 952static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, 953 uint32_t pinning) 954{ 955 char pathname[PATH_MAX]; 956 957 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) 958 return 0; 959 960 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); 961 return bpf_obj_get(pathname); 962} 963 964static int bpf_make_obj_path(void) 965{ 966 char tmp[PATH_MAX]; 967 int ret; 968 969 snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(), 970 bpf_get_obj_uid(NULL)); 971 972 ret = mkdir(tmp, S_IRWXU); 973 if (ret && errno != EEXIST) { 974 fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); 975 return ret; 976 } 977 978 return 0; 979} 980 981static int bpf_make_custom_path(const char *todo) 982{ 983 char tmp[PATH_MAX], rem[PATH_MAX], *sub; 984 int ret; 985 986 snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir()); 987 snprintf(rem, sizeof(rem), "%s/", todo); 988 sub = strtok(rem, "/"); 989 990 while (sub) { 991 if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) 992 return -EINVAL; 993 994 strcat(tmp, sub); 995 strcat(tmp, "/"); 996 997 ret = mkdir(tmp, S_IRWXU); 998 if (ret && errno != EEXIST) { 999 fprintf(stderr, "mkdir %s failed: %s\n", tmp, 1000 strerror(errno)); 1001 return ret; 1002 } 1003 1004 sub = strtok(NULL, "/"); 1005 } 1006 1007 return 0; 1008} 1009 1010static int bpf_place_pinned(int fd, const char *name, 1011 const struct bpf_elf_ctx *ctx, uint32_t pinning) 1012{ 1013 char pathname[PATH_MAX]; 1014 const char *tmp; 1015 int ret = 0; 1016 1017 if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir()) 1018 return 0; 1019 1020 if (pinning == PIN_OBJECT_NS) 1021 ret = bpf_make_obj_path(); 1022 else if ((tmp = bpf_custom_pinning(ctx, pinning))) 1023 ret = bpf_make_custom_path(tmp); 1024 if (ret < 0) 1025 return ret; 1026 1027 bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); 1028 return bpf_obj_pin(fd, pathname); 1029} 1030 1031static int bpf_prog_attach(const char *section, 1032 const struct bpf_elf_prog *prog, bool verbose) 1033{ 1034 int fd; 1035 1036 /* We can add pinning here later as well, same as bpf_map_attach(). */ 1037 errno = 0; 1038 fd = bpf_prog_load(prog->type, prog->insns, prog->size, 1039 prog->license); 1040 if (fd < 0 || verbose) { 1041 bpf_dump_error("Prog section \'%s\' (type:%u insns:%zu " 1042 "license:\'%s\') %s%s (%d)!\n\n", 1043 section, prog->type, 1044 prog->size / sizeof(struct bpf_insn), 1045 prog->license, fd < 0 ? "rejected: " : 1046 "loaded", fd < 0 ? strerror(errno) : "", 1047 fd < 0 ? errno : fd); 1048 } 1049 1050 return fd; 1051} 1052 1053static int bpf_map_attach(const char *name, const struct bpf_elf_map *map, 1054 const struct bpf_elf_ctx *ctx, bool verbose) 1055{ 1056 int fd, ret; 1057 1058 fd = bpf_probe_pinned(name, ctx, map->pinning); 1059 if (fd > 0) { 1060 ret = bpf_map_selfcheck_pinned(fd, map, 1061 offsetof(struct bpf_elf_map, 1062 id)); 1063 if (ret < 0) { 1064 close(fd); 1065 fprintf(stderr, "Map \'%s\' self-check failed!\n", 1066 name); 1067 return ret; 1068 } 1069 if (verbose) 1070 fprintf(stderr, "Map \'%s\' loaded as pinned!\n", 1071 name); 1072 return fd; 1073 } 1074 1075 errno = 0; 1076 fd = bpf_map_create(map->type, map->size_key, map->size_value, 1077 map->max_elem); 1078 if (fd < 0 || verbose) { 1079 bpf_dump_error("Map \'%s\' (type:%u id:%u pinning:%u " 1080 "ksize:%u vsize:%u max-elems:%u) %s%s (%d)!\n", 1081 name, map->type, map->id, map->pinning, 1082 map->size_key, map->size_value, map->max_elem, 1083 fd < 0 ? "rejected: " : "loaded", fd < 0 ? 1084 strerror(errno) : "", fd < 0 ? errno : fd); 1085 if (fd < 0) 1086 return fd; 1087 } 1088 1089 ret = bpf_place_pinned(fd, name, ctx, map->pinning); 1090 if (ret < 0 && errno != EEXIST) { 1091 fprintf(stderr, "Could not pin %s map: %s\n", name, 1092 strerror(errno)); 1093 close(fd); 1094 return ret; 1095 } 1096 1097 return fd; 1098} 1099 1100#define __ELF_ST_BIND(x) ((x) >> 4) 1101#define __ELF_ST_TYPE(x) (((unsigned int) x) & 0xf) 1102 1103static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, 1104 const GElf_Sym *sym) 1105{ 1106 return ctx->str_tab->d_buf + sym->st_name; 1107} 1108 1109static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) 1110{ 1111 GElf_Sym sym; 1112 int i; 1113 1114 for (i = 0; i < ctx->sym_num; i++) { 1115 if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) 1116 continue; 1117 1118 if (__ELF_ST_BIND(sym.st_info) != STB_GLOBAL || 1119 __ELF_ST_TYPE(sym.st_info) != STT_NOTYPE || 1120 sym.st_shndx != ctx->sec_maps || 1121 sym.st_value / sizeof(struct bpf_elf_map) != which) 1122 continue; 1123 1124 return bpf_str_tab_name(ctx, &sym); 1125 } 1126 1127 return NULL; 1128} 1129 1130static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) 1131{ 1132 const char *map_name; 1133 int i, fd; 1134 1135 for (i = 0; i < ctx->map_num; i++) { 1136 map_name = bpf_map_fetch_name(ctx, i); 1137 if (!map_name) 1138 return -EIO; 1139 1140 fd = bpf_map_attach(map_name, &ctx->maps[i], ctx, 1141 ctx->verbose); 1142 if (fd < 0) 1143 return fd; 1144 1145 ctx->map_fds[i] = fd; 1146 } 1147 1148 return 0; 1149} 1150 1151static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, 1152 struct bpf_elf_sec_data *data) 1153{ 1154 Elf_Data *sec_edata; 1155 GElf_Shdr sec_hdr; 1156 Elf_Scn *sec_fd; 1157 char *sec_name; 1158 1159 memset(data, 0, sizeof(*data)); 1160 1161 sec_fd = elf_getscn(ctx->elf_fd, section); 1162 if (!sec_fd) 1163 return -EINVAL; 1164 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) 1165 return -EIO; 1166 1167 sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, 1168 sec_hdr.sh_name); 1169 if (!sec_name || !sec_hdr.sh_size) 1170 return -ENOENT; 1171 1172 sec_edata = elf_getdata(sec_fd, NULL); 1173 if (!sec_edata || elf_getdata(sec_fd, sec_edata)) 1174 return -EIO; 1175 1176 memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); 1177 1178 data->sec_name = sec_name; 1179 data->sec_data = sec_edata; 1180 return 0; 1181} 1182 1183static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section, 1184 struct bpf_elf_sec_data *data) 1185{ 1186 if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0) 1187 return -EINVAL; 1188 1189 ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map); 1190 ctx->sec_maps = section; 1191 ctx->sec_done[section] = true; 1192 1193 if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) { 1194 fprintf(stderr, "Too many BPF maps in ELF section!\n"); 1195 return -ENOMEM; 1196 } 1197 1198 memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size); 1199 return 0; 1200} 1201 1202static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, 1203 struct bpf_elf_sec_data *data) 1204{ 1205 if (data->sec_data->d_size > sizeof(ctx->license)) 1206 return -ENOMEM; 1207 1208 memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); 1209 ctx->sec_done[section] = true; 1210 return 0; 1211} 1212 1213static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, 1214 struct bpf_elf_sec_data *data) 1215{ 1216 ctx->sym_tab = data->sec_data; 1217 ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; 1218 ctx->sec_done[section] = true; 1219 return 0; 1220} 1221 1222static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, 1223 struct bpf_elf_sec_data *data) 1224{ 1225 ctx->str_tab = data->sec_data; 1226 ctx->sec_done[section] = true; 1227 return 0; 1228} 1229 1230static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx) 1231{ 1232 struct bpf_elf_sec_data data; 1233 int i, ret = -1; 1234 1235 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1236 ret = bpf_fill_section_data(ctx, i, &data); 1237 if (ret < 0) 1238 continue; 1239 1240 if (!strcmp(data.sec_name, ELF_SECTION_MAPS)) 1241 ret = bpf_fetch_maps(ctx, i, &data); 1242 else if (!strcmp(data.sec_name, ELF_SECTION_LICENSE)) 1243 ret = bpf_fetch_license(ctx, i, &data); 1244 else if (data.sec_hdr.sh_type == SHT_SYMTAB) 1245 ret = bpf_fetch_symtab(ctx, i, &data); 1246 else if (data.sec_hdr.sh_type == SHT_STRTAB && 1247 i != ctx->elf_hdr.e_shstrndx) 1248 ret = bpf_fetch_strtab(ctx, i, &data); 1249 if (ret < 0) { 1250 fprintf(stderr, "Error parsing section %d! Perhaps" 1251 "check with readelf -a?\n", i); 1252 break; 1253 } 1254 } 1255 1256 if (ctx->sym_tab && ctx->str_tab && ctx->sec_maps) { 1257 ret = bpf_maps_attach_all(ctx); 1258 if (ret < 0) { 1259 fprintf(stderr, "Error loading maps into kernel!\n"); 1260 return ret; 1261 } 1262 } 1263 1264 return ret; 1265} 1266 1267static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section) 1268{ 1269 struct bpf_elf_sec_data data; 1270 struct bpf_elf_prog prog; 1271 int ret, i, fd = -1; 1272 1273 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1274 if (ctx->sec_done[i]) 1275 continue; 1276 1277 ret = bpf_fill_section_data(ctx, i, &data); 1278 if (ret < 0 || strcmp(data.sec_name, section)) 1279 continue; 1280 1281 memset(&prog, 0, sizeof(prog)); 1282 prog.type = ctx->type; 1283 prog.insns = data.sec_data->d_buf; 1284 prog.size = data.sec_data->d_size; 1285 prog.license = ctx->license; 1286 1287 fd = bpf_prog_attach(section, &prog, ctx->verbose); 1288 if (fd < 0) 1289 continue; 1290 1291 ctx->sec_done[i] = true; 1292 break; 1293 } 1294 1295 return fd; 1296} 1297 1298static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, 1299 struct bpf_elf_sec_data *data_relo, 1300 struct bpf_elf_sec_data *data_insn) 1301{ 1302 Elf_Data *idata = data_insn->sec_data; 1303 GElf_Shdr *rhdr = &data_relo->sec_hdr; 1304 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; 1305 struct bpf_insn *insns = idata->d_buf; 1306 unsigned int num_insns = idata->d_size / sizeof(*insns); 1307 1308 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { 1309 unsigned int ioff, rmap; 1310 GElf_Rel relo; 1311 GElf_Sym sym; 1312 1313 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) 1314 return -EIO; 1315 1316 ioff = relo.r_offset / sizeof(struct bpf_insn); 1317 if (ioff >= num_insns || 1318 insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) 1319 return -EINVAL; 1320 1321 if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) 1322 return -EIO; 1323 1324 rmap = sym.st_value / sizeof(struct bpf_elf_map); 1325 if (rmap >= ARRAY_SIZE(ctx->map_fds)) 1326 return -EINVAL; 1327 if (!ctx->map_fds[rmap]) 1328 return -EINVAL; 1329 1330 if (ctx->verbose) 1331 fprintf(stderr, "Map \'%s\' (%d) injected into prog " 1332 "section \'%s\' at offset %u!\n", 1333 bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap], 1334 data_insn->sec_name, ioff); 1335 1336 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; 1337 insns[ioff].imm = ctx->map_fds[rmap]; 1338 } 1339 1340 return 0; 1341} 1342 1343static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section) 1344{ 1345 struct bpf_elf_sec_data data_relo, data_insn; 1346 struct bpf_elf_prog prog; 1347 int ret, idx, i, fd = -1; 1348 1349 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1350 ret = bpf_fill_section_data(ctx, i, &data_relo); 1351 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) 1352 continue; 1353 1354 idx = data_relo.sec_hdr.sh_info; 1355 ret = bpf_fill_section_data(ctx, idx, &data_insn); 1356 if (ret < 0 || strcmp(data_insn.sec_name, section)) 1357 continue; 1358 1359 ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn); 1360 if (ret < 0) 1361 continue; 1362 1363 memset(&prog, 0, sizeof(prog)); 1364 prog.type = ctx->type; 1365 prog.insns = data_insn.sec_data->d_buf; 1366 prog.size = data_insn.sec_data->d_size; 1367 prog.license = ctx->license; 1368 1369 fd = bpf_prog_attach(section, &prog, ctx->verbose); 1370 if (fd < 0) 1371 continue; 1372 1373 ctx->sec_done[i] = true; 1374 ctx->sec_done[idx] = true; 1375 break; 1376 } 1377 1378 return fd; 1379} 1380 1381static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) 1382{ 1383 int ret = -1; 1384 1385 if (ctx->sym_tab) 1386 ret = bpf_fetch_prog_relo(ctx, section); 1387 if (ret < 0) 1388 ret = bpf_fetch_prog(ctx, section); 1389 1390 return ret; 1391} 1392 1393static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) 1394{ 1395 int i; 1396 1397 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) 1398 if (ctx->map_fds[i] && ctx->maps[i].id == id && 1399 ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) 1400 return i; 1401 return -1; 1402} 1403 1404static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) 1405{ 1406 struct bpf_elf_sec_data data; 1407 uint32_t map_id, key_id; 1408 int fd, i, ret, idx; 1409 1410 for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { 1411 if (ctx->sec_done[i]) 1412 continue; 1413 1414 ret = bpf_fill_section_data(ctx, i, &data); 1415 if (ret < 0) 1416 continue; 1417 1418 ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); 1419 if (ret != 2) 1420 continue; 1421 1422 idx = bpf_find_map_by_id(ctx, map_id); 1423 if (idx < 0) 1424 continue; 1425 1426 fd = bpf_fetch_prog_sec(ctx, data.sec_name); 1427 if (fd < 0) 1428 return -EIO; 1429 1430 ret = bpf_map_update(ctx->map_fds[idx], &key_id, 1431 &fd, BPF_ANY); 1432 if (ret < 0) 1433 return -ENOENT; 1434 1435 ctx->sec_done[i] = true; 1436 } 1437 1438 return 0; 1439} 1440 1441static void bpf_save_finfo(struct bpf_elf_ctx *ctx) 1442{ 1443 struct stat st; 1444 int ret; 1445 1446 memset(&ctx->stat, 0, sizeof(ctx->stat)); 1447 1448 ret = fstat(ctx->obj_fd, &st); 1449 if (ret < 0) { 1450 fprintf(stderr, "Stat of elf file failed: %s\n", 1451 strerror(errno)); 1452 return; 1453 } 1454 1455 ctx->stat.st_dev = st.st_dev; 1456 ctx->stat.st_ino = st.st_ino; 1457} 1458 1459static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) 1460{ 1461 char buff[PATH_MAX]; 1462 1463 while (fgets(buff, sizeof(buff), fp)) { 1464 char *ptr = buff; 1465 1466 while (*ptr == ' ' || *ptr == '\t') 1467 ptr++; 1468 1469 if (*ptr == '#' || *ptr == '\n' || *ptr == 0) 1470 continue; 1471 1472 if (sscanf(ptr, "%i %s\n", id, path) != 2 && 1473 sscanf(ptr, "%i %s #", id, path) != 2) { 1474 strcpy(path, ptr); 1475 return -1; 1476 } 1477 1478 return 1; 1479 } 1480 1481 return 0; 1482} 1483 1484static bool bpf_pinning_reserved(uint32_t pinning) 1485{ 1486 switch (pinning) { 1487 case PIN_NONE: 1488 case PIN_OBJECT_NS: 1489 case PIN_GLOBAL_NS: 1490 return true; 1491 default: 1492 return false; 1493 } 1494} 1495 1496static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) 1497{ 1498 struct bpf_hash_entry *entry; 1499 char subpath[PATH_MAX]; 1500 uint32_t pinning; 1501 FILE *fp; 1502 int ret; 1503 1504 fp = fopen(db_file, "r"); 1505 if (!fp) 1506 return; 1507 1508 memset(subpath, 0, sizeof(subpath)); 1509 while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { 1510 if (ret == -1) { 1511 fprintf(stderr, "Database %s is corrupted at: %s\n", 1512 db_file, subpath); 1513 fclose(fp); 1514 return; 1515 } 1516 1517 if (bpf_pinning_reserved(pinning)) { 1518 fprintf(stderr, "Database %s, id %u is reserved - " 1519 "ignoring!\n", db_file, pinning); 1520 continue; 1521 } 1522 1523 entry = malloc(sizeof(*entry)); 1524 if (!entry) { 1525 fprintf(stderr, "No memory left for db entry!\n"); 1526 continue; 1527 } 1528 1529 entry->pinning = pinning; 1530 entry->subpath = strdup(subpath); 1531 if (!entry->subpath) { 1532 fprintf(stderr, "No memory left for db entry!\n"); 1533 free(entry); 1534 continue; 1535 } 1536 1537 entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; 1538 ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; 1539 } 1540 1541 fclose(fp); 1542} 1543 1544static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) 1545{ 1546 struct bpf_hash_entry *entry; 1547 int i; 1548 1549 for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { 1550 while ((entry = ctx->ht[i]) != NULL) { 1551 ctx->ht[i] = entry->next; 1552 free((char *)entry->subpath); 1553 free(entry); 1554 } 1555 } 1556} 1557 1558static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, 1559 enum bpf_prog_type type, bool verbose) 1560{ 1561 int ret = -EINVAL; 1562 1563 if (elf_version(EV_CURRENT) == EV_NONE || 1564 bpf_init_env(pathname)) 1565 return ret; 1566 1567 memset(ctx, 0, sizeof(*ctx)); 1568 ctx->verbose = verbose; 1569 ctx->type = type; 1570 1571 ctx->obj_fd = open(pathname, O_RDONLY); 1572 if (ctx->obj_fd < 0) 1573 return ctx->obj_fd; 1574 1575 ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); 1576 if (!ctx->elf_fd) { 1577 ret = -EINVAL; 1578 goto out_fd; 1579 } 1580 1581 if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != 1582 &ctx->elf_hdr) { 1583 ret = -EIO; 1584 goto out_elf; 1585 } 1586 1587 ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, 1588 sizeof(*(ctx->sec_done))); 1589 if (!ctx->sec_done) { 1590 ret = -ENOMEM; 1591 goto out_elf; 1592 } 1593 1594 bpf_save_finfo(ctx); 1595 bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); 1596 1597 return 0; 1598out_elf: 1599 elf_end(ctx->elf_fd); 1600out_fd: 1601 close(ctx->obj_fd); 1602 return ret; 1603} 1604 1605static int bpf_maps_count(struct bpf_elf_ctx *ctx) 1606{ 1607 int i, count = 0; 1608 1609 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { 1610 if (!ctx->map_fds[i]) 1611 break; 1612 count++; 1613 } 1614 1615 return count; 1616} 1617 1618static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) 1619{ 1620 int i; 1621 1622 for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { 1623 if (ctx->map_fds[i]) 1624 close(ctx->map_fds[i]); 1625 } 1626} 1627 1628static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) 1629{ 1630 if (failure) 1631 bpf_maps_teardown(ctx); 1632 1633 bpf_hash_destroy(ctx); 1634 free(ctx->sec_done); 1635 elf_end(ctx->elf_fd); 1636 close(ctx->obj_fd); 1637} 1638 1639static struct bpf_elf_ctx __ctx; 1640 1641static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, 1642 const char *section, bool verbose) 1643{ 1644 struct bpf_elf_ctx *ctx = &__ctx; 1645 int fd = 0, ret; 1646 1647 ret = bpf_elf_ctx_init(ctx, pathname, type, verbose); 1648 if (ret < 0) { 1649 fprintf(stderr, "Cannot initialize ELF context!\n"); 1650 return ret; 1651 } 1652 1653 ret = bpf_fetch_ancillary(ctx); 1654 if (ret < 0) { 1655 fprintf(stderr, "Error fetching ELF ancillary data!\n"); 1656 goto out; 1657 } 1658 1659 fd = bpf_fetch_prog_sec(ctx, section); 1660 if (fd < 0) { 1661 fprintf(stderr, "Error fetching program/map!\n"); 1662 ret = fd; 1663 goto out; 1664 } 1665 1666 ret = bpf_fill_prog_arrays(ctx); 1667 if (ret < 0) 1668 fprintf(stderr, "Error filling program arrays!\n"); 1669out: 1670 bpf_elf_ctx_destroy(ctx, ret < 0); 1671 if (ret < 0) { 1672 if (fd) 1673 close(fd); 1674 return ret; 1675 } 1676 1677 return fd; 1678} 1679 1680static int 1681bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len, 1682 const struct bpf_map_data *aux, unsigned int entries) 1683{ 1684 struct bpf_map_set_msg msg; 1685 int *cmsg_buf, min_fd; 1686 char *amsg_buf; 1687 int i; 1688 1689 memset(&msg, 0, sizeof(msg)); 1690 1691 msg.aux.uds_ver = BPF_SCM_AUX_VER; 1692 msg.aux.num_ent = entries; 1693 1694 strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name)); 1695 memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st)); 1696 1697 cmsg_buf = bpf_map_set_init(&msg, addr, addr_len); 1698 amsg_buf = (char *)msg.aux.ent; 1699 1700 for (i = 0; i < entries; i += min_fd) { 1701 int ret; 1702 1703 min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); 1704 bpf_map_set_init_single(&msg, min_fd); 1705 1706 memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd); 1707 memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd); 1708 1709 ret = sendmsg(fd, &msg.hdr, 0); 1710 if (ret <= 0) 1711 return ret ? : -1; 1712 } 1713 1714 return 0; 1715} 1716 1717static int 1718bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, 1719 unsigned int entries) 1720{ 1721 struct bpf_map_set_msg msg; 1722 int *cmsg_buf, min_fd; 1723 char *amsg_buf, *mmsg_buf; 1724 unsigned int needed = 1; 1725 int i; 1726 1727 cmsg_buf = bpf_map_set_init(&msg, NULL, 0); 1728 amsg_buf = (char *)msg.aux.ent; 1729 mmsg_buf = (char *)&msg.aux; 1730 1731 for (i = 0; i < min(entries, needed); i += min_fd) { 1732 struct cmsghdr *cmsg; 1733 int ret; 1734 1735 min_fd = min(entries, entries - i); 1736 bpf_map_set_init_single(&msg, min_fd); 1737 1738 ret = recvmsg(fd, &msg.hdr, 0); 1739 if (ret <= 0) 1740 return ret ? : -1; 1741 1742 cmsg = CMSG_FIRSTHDR(&msg.hdr); 1743 if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) 1744 return -EINVAL; 1745 if (msg.hdr.msg_flags & MSG_CTRUNC) 1746 return -EIO; 1747 if (msg.aux.uds_ver != BPF_SCM_AUX_VER) 1748 return -ENOSYS; 1749 1750 min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); 1751 if (min_fd > entries || min_fd <= 0) 1752 return -EINVAL; 1753 1754 memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); 1755 memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); 1756 memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); 1757 1758 needed = aux->num_ent; 1759 } 1760 1761 return 0; 1762} 1763 1764int bpf_send_map_fds(const char *path, const char *obj) 1765{ 1766 struct bpf_elf_ctx *ctx = &__ctx; 1767 struct sockaddr_un addr; 1768 struct bpf_map_data bpf_aux; 1769 int fd, ret; 1770 1771 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 1772 if (fd < 0) { 1773 fprintf(stderr, "Cannot open socket: %s\n", 1774 strerror(errno)); 1775 return -1; 1776 } 1777 1778 memset(&addr, 0, sizeof(addr)); 1779 addr.sun_family = AF_UNIX; 1780 strncpy(addr.sun_path, path, sizeof(addr.sun_path)); 1781 1782 ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); 1783 if (ret < 0) { 1784 fprintf(stderr, "Cannot connect to %s: %s\n", 1785 path, strerror(errno)); 1786 return -1; 1787 } 1788 1789 memset(&bpf_aux, 0, sizeof(bpf_aux)); 1790 1791 bpf_aux.fds = ctx->map_fds; 1792 bpf_aux.ent = ctx->maps; 1793 bpf_aux.st = &ctx->stat; 1794 bpf_aux.obj = obj; 1795 1796 ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, 1797 bpf_maps_count(ctx)); 1798 if (ret < 0) 1799 fprintf(stderr, "Cannot send fds to %s: %s\n", 1800 path, strerror(errno)); 1801 1802 bpf_maps_teardown(ctx); 1803 close(fd); 1804 return ret; 1805} 1806 1807int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, 1808 unsigned int entries) 1809{ 1810 struct sockaddr_un addr; 1811 int fd, ret; 1812 1813 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 1814 if (fd < 0) { 1815 fprintf(stderr, "Cannot open socket: %s\n", 1816 strerror(errno)); 1817 return -1; 1818 } 1819 1820 memset(&addr, 0, sizeof(addr)); 1821 addr.sun_family = AF_UNIX; 1822 strncpy(addr.sun_path, path, sizeof(addr.sun_path)); 1823 1824 ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); 1825 if (ret < 0) { 1826 fprintf(stderr, "Cannot bind to socket: %s\n", 1827 strerror(errno)); 1828 return -1; 1829 } 1830 1831 ret = bpf_map_set_recv(fd, fds, aux, entries); 1832 if (ret < 0) 1833 fprintf(stderr, "Cannot recv fds from %s: %s\n", 1834 path, strerror(errno)); 1835 1836 unlink(addr.sun_path); 1837 close(fd); 1838 return ret; 1839} 1840#endif /* HAVE_ELF */ 1841