tc_bpf.c revision 11c39b5e98a163889fe5e1840e1b2a105bc33680
1/* 2 * tc_bpf.c BPF common code 3 * 4 * This program is free software; you can distribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Daniel Borkmann <dborkman@redhat.com> 10 * Jiri Pirko <jiri@resnulli.us> 11 * Alexei Starovoitov <ast@plumgrid.com> 12 */ 13 14#include <stdio.h> 15#include <stdlib.h> 16#include <unistd.h> 17#include <string.h> 18#include <stdbool.h> 19#include <errno.h> 20#include <fcntl.h> 21#include <stdarg.h> 22#include <sys/types.h> 23#include <sys/stat.h> 24#include <linux/filter.h> 25#include <linux/netlink.h> 26#include <linux/rtnetlink.h> 27 28#ifdef HAVE_ELF 29#include <libelf.h> 30#include <gelf.h> 31#endif 32 33#include "utils.h" 34#include "tc_util.h" 35#include "tc_bpf.h" 36 37int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, 38 char **bpf_string, bool *need_release, 39 const char separator) 40{ 41 char sp; 42 43 if (from_file) { 44 size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); 45 char *tmp_string; 46 FILE *fp; 47 48 tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; 49 tmp_string = malloc(tmp_len); 50 if (tmp_string == NULL) 51 return -ENOMEM; 52 53 memset(tmp_string, 0, tmp_len); 54 55 fp = fopen(arg, "r"); 56 if (fp == NULL) { 57 perror("Cannot fopen"); 58 free(tmp_string); 59 return -ENOENT; 60 } 61 62 if (!fgets(tmp_string, tmp_len, fp)) { 63 free(tmp_string); 64 fclose(fp); 65 return -EIO; 66 } 67 68 fclose(fp); 69 70 *need_release = true; 71 *bpf_string = tmp_string; 72 } else { 73 *need_release = false; 74 *bpf_string = arg; 75 } 76 77 if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || 78 sp != separator) { 79 if (*need_release) 80 free(*bpf_string); 81 return -EINVAL; 82 } 83 84 return 0; 85} 86 87int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops, 88 bool from_file) 89{ 90 char *bpf_string, *token, separator = ','; 91 int ret = 0, i = 0; 92 bool need_release; 93 __u16 bpf_len = 0; 94 95 if (argc < 1) 96 return -EINVAL; 97 if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, 98 &need_release, separator)) 99 return -EINVAL; 100 if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { 101 ret = -EINVAL; 102 goto out; 103 } 104 105 token = bpf_string; 106 while ((token = strchr(token, separator)) && (++token)[0]) { 107 if (i >= bpf_len) { 108 fprintf(stderr, "Real program length exceeds encoded " 109 "length parameter!\n"); 110 ret = -EINVAL; 111 goto out; 112 } 113 114 if (sscanf(token, "%hu %hhu %hhu %u,", 115 &bpf_ops[i].code, &bpf_ops[i].jt, 116 &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { 117 fprintf(stderr, "Error at instruction %d!\n", i); 118 ret = -EINVAL; 119 goto out; 120 } 121 122 i++; 123 } 124 125 if (i != bpf_len) { 126 fprintf(stderr, "Parsed program length is less than encoded" 127 "length parameter!\n"); 128 ret = -EINVAL; 129 goto out; 130 } 131 ret = bpf_len; 132 133out: 134 if (need_release) 135 free(bpf_string); 136 137 return ret; 138} 139 140void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len) 141{ 142 struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops); 143 int i; 144 145 if (len == 0) 146 return; 147 148 fprintf(f, "bytecode \'%u,", len); 149 150 for (i = 0; i < len - 1; i++) 151 fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt, 152 ops[i].jf, ops[i].k); 153 154 fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt, 155 ops[i].jf, ops[i].k); 156} 157 158#ifdef HAVE_ELF 159struct bpf_elf_sec_data { 160 GElf_Shdr sec_hdr; 161 char *sec_name; 162 Elf_Data *sec_data; 163}; 164 165static char bpf_log_buf[8192]; 166 167static const char *prog_type_section(enum bpf_prog_type type) 168{ 169 switch (type) { 170 case BPF_PROG_TYPE_SCHED_CLS: 171 return ELF_SECTION_CLASSIFIER; 172 /* case BPF_PROG_TYPE_SCHED_ACT: */ 173 /* return ELF_SECTION_ACTION; */ 174 default: 175 return NULL; 176 } 177} 178 179static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2); 180static void bpf_dump_error(const char *format, ...) 181{ 182 va_list vl; 183 184 va_start(vl, format); 185 vfprintf(stderr, format, vl); 186 va_end(vl); 187 188 fprintf(stderr, "%s", bpf_log_buf); 189 memset(bpf_log_buf, 0, sizeof(bpf_log_buf)); 190} 191 192static int bpf_create_map(enum bpf_map_type type, unsigned int size_key, 193 unsigned int size_value, unsigned int max_elem) 194{ 195 union bpf_attr attr = { 196 .map_type = type, 197 .key_size = size_key, 198 .value_size = size_value, 199 .max_entries = max_elem, 200 }; 201 202 return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); 203} 204 205static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns, 206 unsigned int len, const char *license) 207{ 208 union bpf_attr attr = { 209 .prog_type = type, 210 .insns = bpf_ptr_to_u64(insns), 211 .insn_cnt = len / sizeof(struct bpf_insn), 212 .license = bpf_ptr_to_u64(license), 213 .log_buf = bpf_ptr_to_u64(bpf_log_buf), 214 .log_size = sizeof(bpf_log_buf), 215 .log_level = 1, 216 }; 217 218 return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); 219} 220 221static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns, 222 unsigned int size, const char *license) 223{ 224 int prog_fd = bpf_prog_load(type, insns, size, license); 225 226 if (prog_fd < 0) 227 bpf_dump_error("BPF program rejected: %s\n", strerror(errno)); 228 229 return prog_fd; 230} 231 232static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key, 233 unsigned int size_value, unsigned int max_elem) 234{ 235 int map_fd = bpf_create_map(type, size_key, size_value, max_elem); 236 237 if (map_fd < 0) 238 bpf_dump_error("BPF map rejected: %s\n", strerror(errno)); 239 240 return map_fd; 241} 242 243static void bpf_maps_init(int *map_fds, unsigned int max_fds) 244{ 245 int i; 246 247 for (i = 0; i < max_fds; i++) 248 map_fds[i] = -1; 249} 250 251static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds) 252{ 253 int i; 254 255 for (i = 0; i < max_fds; i++) { 256 if (map_fds[i] >= 0) 257 close(map_fds[i]); 258 } 259} 260 261static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps, 262 int *map_fds, unsigned int max_fds) 263{ 264 int i, ret; 265 266 for (i = 0; i < num_maps && num_maps <= max_fds; i++) { 267 struct bpf_elf_map *map = &maps[i]; 268 269 ret = bpf_map_attach(map->type, map->size_key, 270 map->size_value, map->max_elem); 271 if (ret < 0) 272 goto err_unwind; 273 274 map_fds[i] = ret; 275 } 276 277 return 0; 278 279err_unwind: 280 bpf_maps_destroy(map_fds, i); 281 return ret; 282} 283 284static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index, 285 struct bpf_elf_sec_data *sec_data) 286{ 287 GElf_Shdr sec_hdr; 288 Elf_Scn *sec_fd; 289 Elf_Data *sec_edata; 290 char *sec_name; 291 292 memset(sec_data, 0, sizeof(*sec_data)); 293 294 sec_fd = elf_getscn(elf_fd, sec_index); 295 if (!sec_fd) 296 return -EINVAL; 297 298 if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) 299 return -EIO; 300 301 sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx, 302 sec_hdr.sh_name); 303 if (!sec_name || !sec_hdr.sh_size) 304 return -ENOENT; 305 306 sec_edata = elf_getdata(sec_fd, NULL); 307 if (!sec_edata || elf_getdata(sec_fd, sec_edata)) 308 return -EIO; 309 310 memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); 311 sec_data->sec_name = sec_name; 312 sec_data->sec_data = sec_edata; 313 314 return 0; 315} 316 317static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo, 318 struct bpf_elf_sec_data *data_insn, 319 Elf_Data *sym_tab, int *map_fds, int max_fds) 320{ 321 Elf_Data *idata = data_insn->sec_data; 322 GElf_Shdr *rhdr = &data_relo->sec_hdr; 323 int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; 324 struct bpf_insn *insns = idata->d_buf; 325 unsigned int num_insns = idata->d_size / sizeof(*insns); 326 327 for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { 328 unsigned int ioff, fnum; 329 GElf_Rel relo; 330 GElf_Sym sym; 331 332 if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) 333 return -EIO; 334 335 ioff = relo.r_offset / sizeof(struct bpf_insn); 336 if (ioff >= num_insns) 337 return -EINVAL; 338 if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) 339 return -EINVAL; 340 341 if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) 342 return -EIO; 343 344 fnum = sym.st_value / sizeof(struct bpf_elf_map); 345 if (fnum >= max_fds) 346 return -EINVAL; 347 348 insns[ioff].src_reg = BPF_PSEUDO_MAP_FD; 349 insns[ioff].imm = map_fds[fnum]; 350 } 351 352 return 0; 353} 354 355static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen, 356 int *map_fds, unsigned int max_fds, 357 char *license, unsigned int lic_len, 358 Elf_Data **sym_tab) 359{ 360 int sec_index, ret = -1; 361 362 for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { 363 struct bpf_elf_sec_data data_anc; 364 365 ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, 366 &data_anc); 367 if (ret < 0) 368 continue; 369 370 /* Extract and load eBPF map fds. */ 371 if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) { 372 struct bpf_elf_map *maps = data_anc.sec_data->d_buf; 373 unsigned int maps_num = data_anc.sec_data->d_size / 374 sizeof(*maps); 375 376 sec_seen[sec_index] = true; 377 ret = bpf_maps_attach(maps, maps_num, map_fds, 378 max_fds); 379 if (ret < 0) 380 return ret; 381 } 382 /* Extract eBPF license. */ 383 else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) { 384 if (data_anc.sec_data->d_size > lic_len) 385 return -ENOMEM; 386 387 sec_seen[sec_index] = true; 388 memcpy(license, data_anc.sec_data->d_buf, 389 data_anc.sec_data->d_size); 390 } 391 /* Extract symbol table for relocations (map fd fixups). */ 392 else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) { 393 sec_seen[sec_index] = true; 394 *sym_tab = data_anc.sec_data; 395 } 396 } 397 398 return ret; 399} 400 401static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen, 402 enum bpf_prog_type type, char *license, 403 Elf_Data *sym_tab, int *map_fds, unsigned int max_fds) 404{ 405 int sec_index, prog_fd = -1; 406 407 for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { 408 struct bpf_elf_sec_data data_relo, data_insn; 409 int ins_index, ret; 410 411 /* Attach eBPF programs with relocation data (maps). */ 412 ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, 413 &data_relo); 414 if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) 415 continue; 416 417 ins_index = data_relo.sec_hdr.sh_info; 418 419 ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index, 420 &data_insn); 421 if (ret < 0) 422 continue; 423 if (strcmp(data_insn.sec_name, prog_type_section(type))) 424 continue; 425 426 sec_seen[sec_index] = true; 427 sec_seen[ins_index] = true; 428 429 ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab, 430 map_fds, max_fds); 431 if (ret < 0) 432 continue; 433 434 prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf, 435 data_insn.sec_data->d_size, license); 436 if (prog_fd < 0) 437 continue; 438 439 break; 440 } 441 442 return prog_fd; 443} 444 445static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen, 446 enum bpf_prog_type type, char *license) 447{ 448 int sec_index, prog_fd = -1; 449 450 for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) { 451 struct bpf_elf_sec_data data_insn; 452 int ret; 453 454 /* Attach eBPF programs without relocation data. */ 455 if (sec_seen[sec_index]) 456 continue; 457 458 ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index, 459 &data_insn); 460 if (ret < 0) 461 continue; 462 if (strcmp(data_insn.sec_name, prog_type_section(type))) 463 continue; 464 465 prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf, 466 data_insn.sec_data->d_size, license); 467 if (prog_fd < 0) 468 continue; 469 470 break; 471 } 472 473 return prog_fd; 474} 475 476int bpf_open_object(const char *path, enum bpf_prog_type type) 477{ 478 int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds); 479 char license[ELF_MAX_LICENSE_LEN]; 480 int file_fd, prog_fd = -1, ret; 481 Elf_Data *sym_tab = NULL; 482 GElf_Ehdr elf_hdr; 483 bool *sec_seen; 484 Elf *elf_fd; 485 486 if (elf_version(EV_CURRENT) == EV_NONE) 487 return -EINVAL; 488 489 file_fd = open(path, O_RDONLY, 0); 490 if (file_fd < 0) 491 return -errno; 492 493 elf_fd = elf_begin(file_fd, ELF_C_READ, NULL); 494 if (!elf_fd) { 495 ret = -EINVAL; 496 goto out; 497 } 498 499 if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) { 500 ret = -EIO; 501 goto out_elf; 502 } 503 504 sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen)); 505 if (!sec_seen) { 506 ret = -ENOMEM; 507 goto out_elf; 508 } 509 510 memset(license, 0, sizeof(license)); 511 bpf_maps_init(map_fds, max_fds); 512 513 ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds, 514 license, sizeof(license), &sym_tab); 515 if (ret < 0) 516 goto out_maps; 517 if (sym_tab) 518 prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type, 519 license, sym_tab, map_fds, max_fds); 520 if (prog_fd < 0) 521 prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type, 522 license); 523 if (prog_fd < 0) 524 goto out_maps; 525out_sec: 526 free(sec_seen); 527out_elf: 528 elf_end(elf_fd); 529out: 530 close(file_fd); 531 return prog_fd; 532 533out_maps: 534 bpf_maps_destroy(map_fds, max_fds); 535 goto out_sec; 536} 537 538#endif /* HAVE_ELF */ 539