1/* 2 * Just-In-Time compiler for BPF filters on MIPS 3 * 4 * Copyright (c) 2014 Imagination Technologies Ltd. 5 * Author: Markos Chandras <markos.chandras@imgtec.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License as published by the 9 * Free Software Foundation; version 2 of the License. 10 */ 11 12#include <linux/bitops.h> 13#include <linux/compiler.h> 14#include <linux/errno.h> 15#include <linux/filter.h> 16#include <linux/if_vlan.h> 17#include <linux/kconfig.h> 18#include <linux/moduleloader.h> 19#include <linux/netdevice.h> 20#include <linux/string.h> 21#include <linux/slab.h> 22#include <linux/types.h> 23#include <asm/bitops.h> 24#include <asm/cacheflush.h> 25#include <asm/cpu-features.h> 26#include <asm/uasm.h> 27 28#include "bpf_jit.h" 29 30/* ABI 31 * 32 * s0 1st scratch register 33 * s1 2nd scratch register 34 * s2 offset register 35 * s3 BPF register A 36 * s4 BPF register X 37 * s5 *skb 38 * s6 *scratch memory 39 * 40 * On entry (*bpf_func)(*skb, *filter) 41 * a0 = MIPS_R_A0 = skb; 42 * a1 = MIPS_R_A1 = filter; 43 * 44 * Stack 45 * ... 46 * M[15] 47 * M[14] 48 * M[13] 49 * ... 50 * M[0] <-- r_M 51 * saved reg k-1 52 * saved reg k-2 53 * ... 54 * saved reg 0 <-- r_sp 55 * <no argument area> 56 * 57 * Packet layout 58 * 59 * <--------------------- len ------------------------> 60 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> 61 * ---------------------------------------------------- 62 * | skb->data | 63 * ---------------------------------------------------- 64 */ 65 66#define RSIZE (sizeof(unsigned long)) 67#define ptr typeof(unsigned long) 68 69/* ABI specific return values */ 70#ifdef CONFIG_32BIT /* O32 */ 71#ifdef CONFIG_CPU_LITTLE_ENDIAN 72#define r_err MIPS_R_V1 73#define r_val MIPS_R_V0 74#else /* CONFIG_CPU_LITTLE_ENDIAN */ 75#define r_err MIPS_R_V0 76#define r_val MIPS_R_V1 77#endif 78#else /* N64 */ 79#define r_err MIPS_R_V0 80#define r_val MIPS_R_V0 81#endif 82 83#define r_ret MIPS_R_V0 84 85/* 86 * Use 2 scratch registers to avoid pipeline interlocks. 87 * There is no overhead during epilogue and prologue since 88 * any of the $s0-$s6 registers will only be preserved if 89 * they are going to actually be used. 90 */ 91#define r_s0 MIPS_R_S0 /* scratch reg 1 */ 92#define r_s1 MIPS_R_S1 /* scratch reg 2 */ 93#define r_off MIPS_R_S2 94#define r_A MIPS_R_S3 95#define r_X MIPS_R_S4 96#define r_skb MIPS_R_S5 97#define r_M MIPS_R_S6 98#define r_tmp_imm MIPS_R_T6 /* No need to preserve this */ 99#define r_tmp MIPS_R_T7 /* No need to preserve this */ 100#define r_zero MIPS_R_ZERO 101#define r_sp MIPS_R_SP 102#define r_ra MIPS_R_RA 103 104#define SCRATCH_OFF(k) (4 * (k)) 105 106/* JIT flags */ 107#define SEEN_CALL (1 << BPF_MEMWORDS) 108#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) 109#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) 110#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) 111#define SEEN_S0 SEEN_SREG(0) 112#define SEEN_S1 SEEN_SREG(1) 113#define SEEN_OFF SEEN_SREG(2) 114#define SEEN_A SEEN_SREG(3) 115#define SEEN_X SEEN_SREG(4) 116#define SEEN_SKB SEEN_SREG(5) 117#define SEEN_MEM SEEN_SREG(6) 118 119/* Arguments used by JIT */ 120#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ 121 122#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ 123 124/** 125 * struct jit_ctx - JIT context 126 * @skf: The sk_filter 127 * @prologue_bytes: Number of bytes for prologue 128 * @idx: Instruction index 129 * @flags: JIT flags 130 * @offsets: Instruction offsets 131 * @target: Memory location for the compiled filter 132 */ 133struct jit_ctx { 134 const struct bpf_prog *skf; 135 unsigned int prologue_bytes; 136 u32 idx; 137 u32 flags; 138 u32 *offsets; 139 u32 *target; 140}; 141 142 143static inline int optimize_div(u32 *k) 144{ 145 /* power of 2 divides can be implemented with right shift */ 146 if (!(*k & (*k-1))) { 147 *k = ilog2(*k); 148 return 1; 149 } 150 151 return 0; 152} 153 154static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); 155 156/* Simply emit the instruction if the JIT memory space has been allocated */ 157#define emit_instr(ctx, func, ...) \ 158do { \ 159 if ((ctx)->target != NULL) { \ 160 u32 *p = &(ctx)->target[ctx->idx]; \ 161 uasm_i_##func(&p, ##__VA_ARGS__); \ 162 } \ 163 (ctx)->idx++; \ 164} while (0) 165 166/* 167 * Similar to emit_instr but it must be used when we need to emit 168 * 32-bit or 64-bit instructions 169 */ 170#define emit_long_instr(ctx, func, ...) \ 171do { \ 172 if ((ctx)->target != NULL) { \ 173 u32 *p = &(ctx)->target[ctx->idx]; \ 174 UASM_i_##func(&p, ##__VA_ARGS__); \ 175 } \ 176 (ctx)->idx++; \ 177} while (0) 178 179/* Determine if immediate is within the 16-bit signed range */ 180static inline bool is_range16(s32 imm) 181{ 182 return !(imm >= SBIT(15) || imm < -SBIT(15)); 183} 184 185static inline void emit_addu(unsigned int dst, unsigned int src1, 186 unsigned int src2, struct jit_ctx *ctx) 187{ 188 emit_instr(ctx, addu, dst, src1, src2); 189} 190 191static inline void emit_nop(struct jit_ctx *ctx) 192{ 193 emit_instr(ctx, nop); 194} 195 196/* Load a u32 immediate to a register */ 197static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) 198{ 199 if (ctx->target != NULL) { 200 /* addiu can only handle s16 */ 201 if (!is_range16(imm)) { 202 u32 *p = &ctx->target[ctx->idx]; 203 uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); 204 p = &ctx->target[ctx->idx + 1]; 205 uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); 206 } else { 207 u32 *p = &ctx->target[ctx->idx]; 208 uasm_i_addiu(&p, dst, r_zero, imm); 209 } 210 } 211 ctx->idx++; 212 213 if (!is_range16(imm)) 214 ctx->idx++; 215} 216 217static inline void emit_or(unsigned int dst, unsigned int src1, 218 unsigned int src2, struct jit_ctx *ctx) 219{ 220 emit_instr(ctx, or, dst, src1, src2); 221} 222 223static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, 224 struct jit_ctx *ctx) 225{ 226 if (imm >= BIT(16)) { 227 emit_load_imm(r_tmp, imm, ctx); 228 emit_or(dst, src, r_tmp, ctx); 229 } else { 230 emit_instr(ctx, ori, dst, src, imm); 231 } 232} 233 234static inline void emit_daddiu(unsigned int dst, unsigned int src, 235 int imm, struct jit_ctx *ctx) 236{ 237 /* 238 * Only used for stack, so the imm is relatively small 239 * and it fits in 15-bits 240 */ 241 emit_instr(ctx, daddiu, dst, src, imm); 242} 243 244static inline void emit_addiu(unsigned int dst, unsigned int src, 245 u32 imm, struct jit_ctx *ctx) 246{ 247 if (!is_range16(imm)) { 248 emit_load_imm(r_tmp, imm, ctx); 249 emit_addu(dst, r_tmp, src, ctx); 250 } else { 251 emit_instr(ctx, addiu, dst, src, imm); 252 } 253} 254 255static inline void emit_and(unsigned int dst, unsigned int src1, 256 unsigned int src2, struct jit_ctx *ctx) 257{ 258 emit_instr(ctx, and, dst, src1, src2); 259} 260 261static inline void emit_andi(unsigned int dst, unsigned int src, 262 u32 imm, struct jit_ctx *ctx) 263{ 264 /* If imm does not fit in u16 then load it to register */ 265 if (imm >= BIT(16)) { 266 emit_load_imm(r_tmp, imm, ctx); 267 emit_and(dst, src, r_tmp, ctx); 268 } else { 269 emit_instr(ctx, andi, dst, src, imm); 270 } 271} 272 273static inline void emit_xor(unsigned int dst, unsigned int src1, 274 unsigned int src2, struct jit_ctx *ctx) 275{ 276 emit_instr(ctx, xor, dst, src1, src2); 277} 278 279static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) 280{ 281 /* If imm does not fit in u16 then load it to register */ 282 if (imm >= BIT(16)) { 283 emit_load_imm(r_tmp, imm, ctx); 284 emit_xor(dst, src, r_tmp, ctx); 285 } else { 286 emit_instr(ctx, xori, dst, src, imm); 287 } 288} 289 290static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) 291{ 292 emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); 293} 294 295static inline void emit_subu(unsigned int dst, unsigned int src1, 296 unsigned int src2, struct jit_ctx *ctx) 297{ 298 emit_instr(ctx, subu, dst, src1, src2); 299} 300 301static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) 302{ 303 emit_subu(reg, r_zero, reg, ctx); 304} 305 306static inline void emit_sllv(unsigned int dst, unsigned int src, 307 unsigned int sa, struct jit_ctx *ctx) 308{ 309 emit_instr(ctx, sllv, dst, src, sa); 310} 311 312static inline void emit_sll(unsigned int dst, unsigned int src, 313 unsigned int sa, struct jit_ctx *ctx) 314{ 315 /* sa is 5-bits long */ 316 if (sa >= BIT(5)) 317 /* Shifting >= 32 results in zero */ 318 emit_jit_reg_move(dst, r_zero, ctx); 319 else 320 emit_instr(ctx, sll, dst, src, sa); 321} 322 323static inline void emit_srlv(unsigned int dst, unsigned int src, 324 unsigned int sa, struct jit_ctx *ctx) 325{ 326 emit_instr(ctx, srlv, dst, src, sa); 327} 328 329static inline void emit_srl(unsigned int dst, unsigned int src, 330 unsigned int sa, struct jit_ctx *ctx) 331{ 332 /* sa is 5-bits long */ 333 if (sa >= BIT(5)) 334 /* Shifting >= 32 results in zero */ 335 emit_jit_reg_move(dst, r_zero, ctx); 336 else 337 emit_instr(ctx, srl, dst, src, sa); 338} 339 340static inline void emit_slt(unsigned int dst, unsigned int src1, 341 unsigned int src2, struct jit_ctx *ctx) 342{ 343 emit_instr(ctx, slt, dst, src1, src2); 344} 345 346static inline void emit_sltu(unsigned int dst, unsigned int src1, 347 unsigned int src2, struct jit_ctx *ctx) 348{ 349 emit_instr(ctx, sltu, dst, src1, src2); 350} 351 352static inline void emit_sltiu(unsigned dst, unsigned int src, 353 unsigned int imm, struct jit_ctx *ctx) 354{ 355 /* 16 bit immediate */ 356 if (!is_range16((s32)imm)) { 357 emit_load_imm(r_tmp, imm, ctx); 358 emit_sltu(dst, src, r_tmp, ctx); 359 } else { 360 emit_instr(ctx, sltiu, dst, src, imm); 361 } 362 363} 364 365/* Store register on the stack */ 366static inline void emit_store_stack_reg(ptr reg, ptr base, 367 unsigned int offset, 368 struct jit_ctx *ctx) 369{ 370 emit_long_instr(ctx, SW, reg, offset, base); 371} 372 373static inline void emit_store(ptr reg, ptr base, unsigned int offset, 374 struct jit_ctx *ctx) 375{ 376 emit_instr(ctx, sw, reg, offset, base); 377} 378 379static inline void emit_load_stack_reg(ptr reg, ptr base, 380 unsigned int offset, 381 struct jit_ctx *ctx) 382{ 383 emit_long_instr(ctx, LW, reg, offset, base); 384} 385 386static inline void emit_load(unsigned int reg, unsigned int base, 387 unsigned int offset, struct jit_ctx *ctx) 388{ 389 emit_instr(ctx, lw, reg, offset, base); 390} 391 392static inline void emit_load_byte(unsigned int reg, unsigned int base, 393 unsigned int offset, struct jit_ctx *ctx) 394{ 395 emit_instr(ctx, lb, reg, offset, base); 396} 397 398static inline void emit_half_load(unsigned int reg, unsigned int base, 399 unsigned int offset, struct jit_ctx *ctx) 400{ 401 emit_instr(ctx, lh, reg, offset, base); 402} 403 404static inline void emit_mul(unsigned int dst, unsigned int src1, 405 unsigned int src2, struct jit_ctx *ctx) 406{ 407 emit_instr(ctx, mul, dst, src1, src2); 408} 409 410static inline void emit_div(unsigned int dst, unsigned int src, 411 struct jit_ctx *ctx) 412{ 413 if (ctx->target != NULL) { 414 u32 *p = &ctx->target[ctx->idx]; 415 uasm_i_divu(&p, dst, src); 416 p = &ctx->target[ctx->idx + 1]; 417 uasm_i_mflo(&p, dst); 418 } 419 ctx->idx += 2; /* 2 insts */ 420} 421 422static inline void emit_mod(unsigned int dst, unsigned int src, 423 struct jit_ctx *ctx) 424{ 425 if (ctx->target != NULL) { 426 u32 *p = &ctx->target[ctx->idx]; 427 uasm_i_divu(&p, dst, src); 428 p = &ctx->target[ctx->idx + 1]; 429 uasm_i_mflo(&p, dst); 430 } 431 ctx->idx += 2; /* 2 insts */ 432} 433 434static inline void emit_dsll(unsigned int dst, unsigned int src, 435 unsigned int sa, struct jit_ctx *ctx) 436{ 437 emit_instr(ctx, dsll, dst, src, sa); 438} 439 440static inline void emit_dsrl32(unsigned int dst, unsigned int src, 441 unsigned int sa, struct jit_ctx *ctx) 442{ 443 emit_instr(ctx, dsrl32, dst, src, sa); 444} 445 446static inline void emit_wsbh(unsigned int dst, unsigned int src, 447 struct jit_ctx *ctx) 448{ 449 emit_instr(ctx, wsbh, dst, src); 450} 451 452/* load pointer to register */ 453static inline void emit_load_ptr(unsigned int dst, unsigned int src, 454 int imm, struct jit_ctx *ctx) 455{ 456 /* src contains the base addr of the 32/64-pointer */ 457 emit_long_instr(ctx, LW, dst, imm, src); 458} 459 460/* load a function pointer to register */ 461static inline void emit_load_func(unsigned int reg, ptr imm, 462 struct jit_ctx *ctx) 463{ 464 if (config_enabled(CONFIG_64BIT)) { 465 /* At this point imm is always 64-bit */ 466 emit_load_imm(r_tmp, (u64)imm >> 32, ctx); 467 emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ 468 emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); 469 emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ 470 emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); 471 } else { 472 emit_load_imm(reg, imm, ctx); 473 } 474} 475 476/* Move to real MIPS register */ 477static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) 478{ 479 emit_long_instr(ctx, ADDU, dst, src, r_zero); 480} 481 482/* Move to JIT (32-bit) register */ 483static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) 484{ 485 emit_addu(dst, src, r_zero, ctx); 486} 487 488/* Compute the immediate value for PC-relative branches. */ 489static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) 490{ 491 if (ctx->target == NULL) 492 return 0; 493 494 /* 495 * We want a pc-relative branch. We only do forward branches 496 * so tgt is always after pc. tgt is the instruction offset 497 * we want to jump to. 498 499 * Branch on MIPS: 500 * I: target_offset <- sign_extend(offset) 501 * I+1: PC += target_offset (delay slot) 502 * 503 * ctx->idx currently points to the branch instruction 504 * but the offset is added to the delay slot so we need 505 * to subtract 4. 506 */ 507 return ctx->offsets[tgt] - 508 (ctx->idx * 4 - ctx->prologue_bytes) - 4; 509} 510 511static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, 512 unsigned int imm, struct jit_ctx *ctx) 513{ 514 if (ctx->target != NULL) { 515 u32 *p = &ctx->target[ctx->idx]; 516 517 switch (cond) { 518 case MIPS_COND_EQ: 519 uasm_i_beq(&p, reg1, reg2, imm); 520 break; 521 case MIPS_COND_NE: 522 uasm_i_bne(&p, reg1, reg2, imm); 523 break; 524 case MIPS_COND_ALL: 525 uasm_i_b(&p, imm); 526 break; 527 default: 528 pr_warn("%s: Unhandled branch conditional: %d\n", 529 __func__, cond); 530 } 531 } 532 ctx->idx++; 533} 534 535static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) 536{ 537 emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); 538} 539 540static inline void emit_jalr(unsigned int link, unsigned int reg, 541 struct jit_ctx *ctx) 542{ 543 emit_instr(ctx, jalr, link, reg); 544} 545 546static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) 547{ 548 emit_instr(ctx, jr, reg); 549} 550 551static inline u16 align_sp(unsigned int num) 552{ 553 /* Double word alignment for 32-bit, quadword for 64-bit */ 554 unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8; 555 num = (num + (align - 1)) & -align; 556 return num; 557} 558 559static bool is_load_to_a(u16 inst) 560{ 561 switch (inst) { 562 case BPF_LD | BPF_W | BPF_LEN: 563 case BPF_LD | BPF_W | BPF_ABS: 564 case BPF_LD | BPF_H | BPF_ABS: 565 case BPF_LD | BPF_B | BPF_ABS: 566 return true; 567 default: 568 return false; 569 } 570} 571 572static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) 573{ 574 int i = 0, real_off = 0; 575 u32 sflags, tmp_flags; 576 577 /* Adjust the stack pointer */ 578 emit_stack_offset(-align_sp(offset), ctx); 579 580 if (ctx->flags & SEEN_CALL) { 581 /* Argument save area */ 582 if (config_enabled(CONFIG_64BIT)) 583 /* Bottom of current frame */ 584 real_off = align_sp(offset) - RSIZE; 585 else 586 /* Top of previous frame */ 587 real_off = align_sp(offset) + RSIZE; 588 emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); 589 emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); 590 591 real_off = 0; 592 } 593 594 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 595 /* sflags is essentially a bitmap */ 596 while (tmp_flags) { 597 if ((sflags >> i) & 0x1) { 598 emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, 599 ctx); 600 real_off += RSIZE; 601 } 602 i++; 603 tmp_flags >>= 1; 604 } 605 606 /* save return address */ 607 if (ctx->flags & SEEN_CALL) { 608 emit_store_stack_reg(r_ra, r_sp, real_off, ctx); 609 real_off += RSIZE; 610 } 611 612 /* Setup r_M leaving the alignment gap if necessary */ 613 if (ctx->flags & SEEN_MEM) { 614 if (real_off % (RSIZE * 2)) 615 real_off += RSIZE; 616 emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); 617 } 618} 619 620static void restore_bpf_jit_regs(struct jit_ctx *ctx, 621 unsigned int offset) 622{ 623 int i, real_off = 0; 624 u32 sflags, tmp_flags; 625 626 if (ctx->flags & SEEN_CALL) { 627 if (config_enabled(CONFIG_64BIT)) 628 /* Bottom of current frame */ 629 real_off = align_sp(offset) - RSIZE; 630 else 631 /* Top of previous frame */ 632 real_off = align_sp(offset) + RSIZE; 633 emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx); 634 emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx); 635 636 real_off = 0; 637 } 638 639 tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; 640 /* sflags is a bitmap */ 641 i = 0; 642 while (tmp_flags) { 643 if ((sflags >> i) & 0x1) { 644 emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, 645 ctx); 646 real_off += RSIZE; 647 } 648 i++; 649 tmp_flags >>= 1; 650 } 651 652 /* restore return address */ 653 if (ctx->flags & SEEN_CALL) 654 emit_load_stack_reg(r_ra, r_sp, real_off, ctx); 655 656 /* Restore the sp and discard the scrach memory */ 657 emit_stack_offset(align_sp(offset), ctx); 658} 659 660static unsigned int get_stack_depth(struct jit_ctx *ctx) 661{ 662 int sp_off = 0; 663 664 665 /* How may s* regs do we need to preserved? */ 666 sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE; 667 668 if (ctx->flags & SEEN_MEM) 669 sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ 670 671 if (ctx->flags & SEEN_CALL) 672 /* 673 * The JIT code make calls to external functions using 2 674 * arguments. Therefore, for o32 we don't need to allocate 675 * space because we don't care if the argumetns are lost 676 * across calls. We do need however to preserve incoming 677 * arguments but the space is already allocated for us by 678 * the caller. On the other hand, for n64, we need to allocate 679 * this space ourselves. We need to preserve $ra as well. 680 */ 681 sp_off += config_enabled(CONFIG_64BIT) ? 682 (ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE; 683 684 /* 685 * Subtract the bytes for the last registers since we only care about 686 * the location on the stack pointer. 687 */ 688 return sp_off - RSIZE; 689} 690 691static void build_prologue(struct jit_ctx *ctx) 692{ 693 u16 first_inst = ctx->skf->insns[0].code; 694 int sp_off; 695 696 /* Calculate the total offset for the stack pointer */ 697 sp_off = get_stack_depth(ctx); 698 save_bpf_jit_regs(ctx, sp_off); 699 700 if (ctx->flags & SEEN_SKB) 701 emit_reg_move(r_skb, MIPS_R_A0, ctx); 702 703 if (ctx->flags & SEEN_X) 704 emit_jit_reg_move(r_X, r_zero, ctx); 705 706 /* Do not leak kernel data to userspace */ 707 if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) 708 emit_jit_reg_move(r_A, r_zero, ctx); 709} 710 711static void build_epilogue(struct jit_ctx *ctx) 712{ 713 unsigned int sp_off; 714 715 /* Calculate the total offset for the stack pointer */ 716 717 sp_off = get_stack_depth(ctx); 718 restore_bpf_jit_regs(ctx, sp_off); 719 720 /* Return */ 721 emit_jr(r_ra, ctx); 722 emit_nop(ctx); 723} 724 725static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset) 726{ 727 u8 ret; 728 int err; 729 730 err = skb_copy_bits(skb, offset, &ret, 1); 731 732 return (u64)err << 32 | ret; 733} 734 735static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset) 736{ 737 u16 ret; 738 int err; 739 740 err = skb_copy_bits(skb, offset, &ret, 2); 741 742 return (u64)err << 32 | ntohs(ret); 743} 744 745static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) 746{ 747 u32 ret; 748 int err; 749 750 err = skb_copy_bits(skb, offset, &ret, 4); 751 752 return (u64)err << 32 | ntohl(ret); 753} 754 755static int build_body(struct jit_ctx *ctx) 756{ 757 void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; 758 const struct bpf_prog *prog = ctx->skf; 759 const struct sock_filter *inst; 760 unsigned int i, off, load_order, condt; 761 u32 k, b_off __maybe_unused; 762 763 for (i = 0; i < prog->len; i++) { 764 u16 code; 765 766 inst = &(prog->insns[i]); 767 pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", 768 __func__, inst->code, inst->jt, inst->jf, inst->k); 769 k = inst->k; 770 code = bpf_anc_helper(inst); 771 772 if (ctx->target == NULL) 773 ctx->offsets[i] = ctx->idx * 4; 774 775 switch (code) { 776 case BPF_LD | BPF_IMM: 777 /* A <- k ==> li r_A, k */ 778 ctx->flags |= SEEN_A; 779 emit_load_imm(r_A, k, ctx); 780 break; 781 case BPF_LD | BPF_W | BPF_LEN: 782 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); 783 /* A <- len ==> lw r_A, offset(skb) */ 784 ctx->flags |= SEEN_SKB | SEEN_A; 785 off = offsetof(struct sk_buff, len); 786 emit_load(r_A, r_skb, off, ctx); 787 break; 788 case BPF_LD | BPF_MEM: 789 /* A <- M[k] ==> lw r_A, offset(M) */ 790 ctx->flags |= SEEN_MEM | SEEN_A; 791 emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); 792 break; 793 case BPF_LD | BPF_W | BPF_ABS: 794 /* A <- P[k:4] */ 795 load_order = 2; 796 goto load; 797 case BPF_LD | BPF_H | BPF_ABS: 798 /* A <- P[k:2] */ 799 load_order = 1; 800 goto load; 801 case BPF_LD | BPF_B | BPF_ABS: 802 /* A <- P[k:1] */ 803 load_order = 0; 804load: 805 /* the interpreter will deal with the negative K */ 806 if ((int)k < 0) 807 return -ENOTSUPP; 808 809 emit_load_imm(r_off, k, ctx); 810load_common: 811 /* 812 * We may got here from the indirect loads so 813 * return if offset is negative. 814 */ 815 emit_slt(r_s0, r_off, r_zero, ctx); 816 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 817 b_imm(prog->len, ctx), ctx); 818 emit_reg_move(r_ret, r_zero, ctx); 819 820 ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 | 821 SEEN_SKB | SEEN_A; 822 823 emit_load_func(r_s0, (ptr)load_func[load_order], 824 ctx); 825 emit_reg_move(MIPS_R_A0, r_skb, ctx); 826 emit_jalr(MIPS_R_RA, r_s0, ctx); 827 /* Load second argument to delay slot */ 828 emit_reg_move(MIPS_R_A1, r_off, ctx); 829 /* Check the error value */ 830 if (config_enabled(CONFIG_64BIT)) { 831 /* Get error code from the top 32-bits */ 832 emit_dsrl32(r_s0, r_val, 0, ctx); 833 /* Branch to 3 instructions ahead */ 834 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2, 835 ctx); 836 } else { 837 /* Branch to 3 instructions ahead */ 838 emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2, 839 ctx); 840 } 841 emit_nop(ctx); 842 /* We are good */ 843 emit_b(b_imm(i + 1, ctx), ctx); 844 emit_jit_reg_move(r_A, r_val, ctx); 845 /* Return with error */ 846 emit_b(b_imm(prog->len, ctx), ctx); 847 emit_reg_move(r_ret, r_zero, ctx); 848 break; 849 case BPF_LD | BPF_W | BPF_IND: 850 /* A <- P[X + k:4] */ 851 load_order = 2; 852 goto load_ind; 853 case BPF_LD | BPF_H | BPF_IND: 854 /* A <- P[X + k:2] */ 855 load_order = 1; 856 goto load_ind; 857 case BPF_LD | BPF_B | BPF_IND: 858 /* A <- P[X + k:1] */ 859 load_order = 0; 860load_ind: 861 ctx->flags |= SEEN_OFF | SEEN_X; 862 emit_addiu(r_off, r_X, k, ctx); 863 goto load_common; 864 case BPF_LDX | BPF_IMM: 865 /* X <- k */ 866 ctx->flags |= SEEN_X; 867 emit_load_imm(r_X, k, ctx); 868 break; 869 case BPF_LDX | BPF_MEM: 870 /* X <- M[k] */ 871 ctx->flags |= SEEN_X | SEEN_MEM; 872 emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); 873 break; 874 case BPF_LDX | BPF_W | BPF_LEN: 875 /* X <- len */ 876 ctx->flags |= SEEN_X | SEEN_SKB; 877 off = offsetof(struct sk_buff, len); 878 emit_load(r_X, r_skb, off, ctx); 879 break; 880 case BPF_LDX | BPF_B | BPF_MSH: 881 /* the interpreter will deal with the negative K */ 882 if ((int)k < 0) 883 return -ENOTSUPP; 884 885 /* X <- 4 * (P[k:1] & 0xf) */ 886 ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB; 887 /* Load offset to a1 */ 888 emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx); 889 /* 890 * This may emit two instructions so it may not fit 891 * in the delay slot. So use a0 in the delay slot. 892 */ 893 emit_load_imm(MIPS_R_A1, k, ctx); 894 emit_jalr(MIPS_R_RA, r_s0, ctx); 895 emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ 896 /* Check the error value */ 897 if (config_enabled(CONFIG_64BIT)) { 898 /* Top 32-bits of $v0 on 64-bit */ 899 emit_dsrl32(r_s0, r_val, 0, ctx); 900 emit_bcond(MIPS_COND_NE, r_s0, r_zero, 901 3 << 2, ctx); 902 } else { 903 emit_bcond(MIPS_COND_NE, r_err, r_zero, 904 3 << 2, ctx); 905 } 906 /* No need for delay slot */ 907 /* We are good */ 908 /* X <- P[1:K] & 0xf */ 909 emit_andi(r_X, r_val, 0xf, ctx); 910 /* X << 2 */ 911 emit_b(b_imm(i + 1, ctx), ctx); 912 emit_sll(r_X, r_X, 2, ctx); /* delay slot */ 913 /* Return with error */ 914 emit_b(b_imm(prog->len, ctx), ctx); 915 emit_load_imm(r_ret, 0, ctx); /* delay slot */ 916 break; 917 case BPF_ST: 918 /* M[k] <- A */ 919 ctx->flags |= SEEN_MEM | SEEN_A; 920 emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); 921 break; 922 case BPF_STX: 923 /* M[k] <- X */ 924 ctx->flags |= SEEN_MEM | SEEN_X; 925 emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); 926 break; 927 case BPF_ALU | BPF_ADD | BPF_K: 928 /* A += K */ 929 ctx->flags |= SEEN_A; 930 emit_addiu(r_A, r_A, k, ctx); 931 break; 932 case BPF_ALU | BPF_ADD | BPF_X: 933 /* A += X */ 934 ctx->flags |= SEEN_A | SEEN_X; 935 emit_addu(r_A, r_A, r_X, ctx); 936 break; 937 case BPF_ALU | BPF_SUB | BPF_K: 938 /* A -= K */ 939 ctx->flags |= SEEN_A; 940 emit_addiu(r_A, r_A, -k, ctx); 941 break; 942 case BPF_ALU | BPF_SUB | BPF_X: 943 /* A -= X */ 944 ctx->flags |= SEEN_A | SEEN_X; 945 emit_subu(r_A, r_A, r_X, ctx); 946 break; 947 case BPF_ALU | BPF_MUL | BPF_K: 948 /* A *= K */ 949 /* Load K to scratch register before MUL */ 950 ctx->flags |= SEEN_A | SEEN_S0; 951 emit_load_imm(r_s0, k, ctx); 952 emit_mul(r_A, r_A, r_s0, ctx); 953 break; 954 case BPF_ALU | BPF_MUL | BPF_X: 955 /* A *= X */ 956 ctx->flags |= SEEN_A | SEEN_X; 957 emit_mul(r_A, r_A, r_X, ctx); 958 break; 959 case BPF_ALU | BPF_DIV | BPF_K: 960 /* A /= k */ 961 if (k == 1) 962 break; 963 if (optimize_div(&k)) { 964 ctx->flags |= SEEN_A; 965 emit_srl(r_A, r_A, k, ctx); 966 break; 967 } 968 ctx->flags |= SEEN_A | SEEN_S0; 969 emit_load_imm(r_s0, k, ctx); 970 emit_div(r_A, r_s0, ctx); 971 break; 972 case BPF_ALU | BPF_MOD | BPF_K: 973 /* A %= k */ 974 if (k == 1 || optimize_div(&k)) { 975 ctx->flags |= SEEN_A; 976 emit_jit_reg_move(r_A, r_zero, ctx); 977 } else { 978 ctx->flags |= SEEN_A | SEEN_S0; 979 emit_load_imm(r_s0, k, ctx); 980 emit_mod(r_A, r_s0, ctx); 981 } 982 break; 983 case BPF_ALU | BPF_DIV | BPF_X: 984 /* A /= X */ 985 ctx->flags |= SEEN_X | SEEN_A; 986 /* Check if r_X is zero */ 987 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 988 b_imm(prog->len, ctx), ctx); 989 emit_load_imm(r_val, 0, ctx); /* delay slot */ 990 emit_div(r_A, r_X, ctx); 991 break; 992 case BPF_ALU | BPF_MOD | BPF_X: 993 /* A %= X */ 994 ctx->flags |= SEEN_X | SEEN_A; 995 /* Check if r_X is zero */ 996 emit_bcond(MIPS_COND_EQ, r_X, r_zero, 997 b_imm(prog->len, ctx), ctx); 998 emit_load_imm(r_val, 0, ctx); /* delay slot */ 999 emit_mod(r_A, r_X, ctx); 1000 break; 1001 case BPF_ALU | BPF_OR | BPF_K: 1002 /* A |= K */ 1003 ctx->flags |= SEEN_A; 1004 emit_ori(r_A, r_A, k, ctx); 1005 break; 1006 case BPF_ALU | BPF_OR | BPF_X: 1007 /* A |= X */ 1008 ctx->flags |= SEEN_A; 1009 emit_ori(r_A, r_A, r_X, ctx); 1010 break; 1011 case BPF_ALU | BPF_XOR | BPF_K: 1012 /* A ^= k */ 1013 ctx->flags |= SEEN_A; 1014 emit_xori(r_A, r_A, k, ctx); 1015 break; 1016 case BPF_ANC | SKF_AD_ALU_XOR_X: 1017 case BPF_ALU | BPF_XOR | BPF_X: 1018 /* A ^= X */ 1019 ctx->flags |= SEEN_A; 1020 emit_xor(r_A, r_A, r_X, ctx); 1021 break; 1022 case BPF_ALU | BPF_AND | BPF_K: 1023 /* A &= K */ 1024 ctx->flags |= SEEN_A; 1025 emit_andi(r_A, r_A, k, ctx); 1026 break; 1027 case BPF_ALU | BPF_AND | BPF_X: 1028 /* A &= X */ 1029 ctx->flags |= SEEN_A | SEEN_X; 1030 emit_and(r_A, r_A, r_X, ctx); 1031 break; 1032 case BPF_ALU | BPF_LSH | BPF_K: 1033 /* A <<= K */ 1034 ctx->flags |= SEEN_A; 1035 emit_sll(r_A, r_A, k, ctx); 1036 break; 1037 case BPF_ALU | BPF_LSH | BPF_X: 1038 /* A <<= X */ 1039 ctx->flags |= SEEN_A | SEEN_X; 1040 emit_sllv(r_A, r_A, r_X, ctx); 1041 break; 1042 case BPF_ALU | BPF_RSH | BPF_K: 1043 /* A >>= K */ 1044 ctx->flags |= SEEN_A; 1045 emit_srl(r_A, r_A, k, ctx); 1046 break; 1047 case BPF_ALU | BPF_RSH | BPF_X: 1048 ctx->flags |= SEEN_A | SEEN_X; 1049 emit_srlv(r_A, r_A, r_X, ctx); 1050 break; 1051 case BPF_ALU | BPF_NEG: 1052 /* A = -A */ 1053 ctx->flags |= SEEN_A; 1054 emit_neg(r_A, ctx); 1055 break; 1056 case BPF_JMP | BPF_JA: 1057 /* pc += K */ 1058 emit_b(b_imm(i + k + 1, ctx), ctx); 1059 emit_nop(ctx); 1060 break; 1061 case BPF_JMP | BPF_JEQ | BPF_K: 1062 /* pc += ( A == K ) ? pc->jt : pc->jf */ 1063 condt = MIPS_COND_EQ | MIPS_COND_K; 1064 goto jmp_cmp; 1065 case BPF_JMP | BPF_JEQ | BPF_X: 1066 ctx->flags |= SEEN_X; 1067 /* pc += ( A == X ) ? pc->jt : pc->jf */ 1068 condt = MIPS_COND_EQ | MIPS_COND_X; 1069 goto jmp_cmp; 1070 case BPF_JMP | BPF_JGE | BPF_K: 1071 /* pc += ( A >= K ) ? pc->jt : pc->jf */ 1072 condt = MIPS_COND_GE | MIPS_COND_K; 1073 goto jmp_cmp; 1074 case BPF_JMP | BPF_JGE | BPF_X: 1075 ctx->flags |= SEEN_X; 1076 /* pc += ( A >= X ) ? pc->jt : pc->jf */ 1077 condt = MIPS_COND_GE | MIPS_COND_X; 1078 goto jmp_cmp; 1079 case BPF_JMP | BPF_JGT | BPF_K: 1080 /* pc += ( A > K ) ? pc->jt : pc->jf */ 1081 condt = MIPS_COND_GT | MIPS_COND_K; 1082 goto jmp_cmp; 1083 case BPF_JMP | BPF_JGT | BPF_X: 1084 ctx->flags |= SEEN_X; 1085 /* pc += ( A > X ) ? pc->jt : pc->jf */ 1086 condt = MIPS_COND_GT | MIPS_COND_X; 1087jmp_cmp: 1088 /* Greater or Equal */ 1089 if ((condt & MIPS_COND_GE) || 1090 (condt & MIPS_COND_GT)) { 1091 if (condt & MIPS_COND_K) { /* K */ 1092 ctx->flags |= SEEN_S0 | SEEN_A; 1093 emit_sltiu(r_s0, r_A, k, ctx); 1094 } else { /* X */ 1095 ctx->flags |= SEEN_S0 | SEEN_A | 1096 SEEN_X; 1097 emit_sltu(r_s0, r_A, r_X, ctx); 1098 } 1099 /* A < (K|X) ? r_scrach = 1 */ 1100 b_off = b_imm(i + inst->jf + 1, ctx); 1101 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, 1102 ctx); 1103 emit_nop(ctx); 1104 /* A > (K|X) ? scratch = 0 */ 1105 if (condt & MIPS_COND_GT) { 1106 /* Checking for equality */ 1107 ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X; 1108 if (condt & MIPS_COND_K) 1109 emit_load_imm(r_s0, k, ctx); 1110 else 1111 emit_jit_reg_move(r_s0, r_X, 1112 ctx); 1113 b_off = b_imm(i + inst->jf + 1, ctx); 1114 emit_bcond(MIPS_COND_EQ, r_A, r_s0, 1115 b_off, ctx); 1116 emit_nop(ctx); 1117 /* Finally, A > K|X */ 1118 b_off = b_imm(i + inst->jt + 1, ctx); 1119 emit_b(b_off, ctx); 1120 emit_nop(ctx); 1121 } else { 1122 /* A >= (K|X) so jump */ 1123 b_off = b_imm(i + inst->jt + 1, ctx); 1124 emit_b(b_off, ctx); 1125 emit_nop(ctx); 1126 } 1127 } else { 1128 /* A == K|X */ 1129 if (condt & MIPS_COND_K) { /* K */ 1130 ctx->flags |= SEEN_S0 | SEEN_A; 1131 emit_load_imm(r_s0, k, ctx); 1132 /* jump true */ 1133 b_off = b_imm(i + inst->jt + 1, ctx); 1134 emit_bcond(MIPS_COND_EQ, r_A, r_s0, 1135 b_off, ctx); 1136 emit_nop(ctx); 1137 /* jump false */ 1138 b_off = b_imm(i + inst->jf + 1, 1139 ctx); 1140 emit_bcond(MIPS_COND_NE, r_A, r_s0, 1141 b_off, ctx); 1142 emit_nop(ctx); 1143 } else { /* X */ 1144 /* jump true */ 1145 ctx->flags |= SEEN_A | SEEN_X; 1146 b_off = b_imm(i + inst->jt + 1, 1147 ctx); 1148 emit_bcond(MIPS_COND_EQ, r_A, r_X, 1149 b_off, ctx); 1150 emit_nop(ctx); 1151 /* jump false */ 1152 b_off = b_imm(i + inst->jf + 1, ctx); 1153 emit_bcond(MIPS_COND_NE, r_A, r_X, 1154 b_off, ctx); 1155 emit_nop(ctx); 1156 } 1157 } 1158 break; 1159 case BPF_JMP | BPF_JSET | BPF_K: 1160 ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A; 1161 /* pc += (A & K) ? pc -> jt : pc -> jf */ 1162 emit_load_imm(r_s1, k, ctx); 1163 emit_and(r_s0, r_A, r_s1, ctx); 1164 /* jump true */ 1165 b_off = b_imm(i + inst->jt + 1, ctx); 1166 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); 1167 emit_nop(ctx); 1168 /* jump false */ 1169 b_off = b_imm(i + inst->jf + 1, ctx); 1170 emit_b(b_off, ctx); 1171 emit_nop(ctx); 1172 break; 1173 case BPF_JMP | BPF_JSET | BPF_X: 1174 ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A; 1175 /* pc += (A & X) ? pc -> jt : pc -> jf */ 1176 emit_and(r_s0, r_A, r_X, ctx); 1177 /* jump true */ 1178 b_off = b_imm(i + inst->jt + 1, ctx); 1179 emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); 1180 emit_nop(ctx); 1181 /* jump false */ 1182 b_off = b_imm(i + inst->jf + 1, ctx); 1183 emit_b(b_off, ctx); 1184 emit_nop(ctx); 1185 break; 1186 case BPF_RET | BPF_A: 1187 ctx->flags |= SEEN_A; 1188 if (i != prog->len - 1) 1189 /* 1190 * If this is not the last instruction 1191 * then jump to the epilogue 1192 */ 1193 emit_b(b_imm(prog->len, ctx), ctx); 1194 emit_reg_move(r_ret, r_A, ctx); /* delay slot */ 1195 break; 1196 case BPF_RET | BPF_K: 1197 /* 1198 * It can emit two instructions so it does not fit on 1199 * the delay slot. 1200 */ 1201 emit_load_imm(r_ret, k, ctx); 1202 if (i != prog->len - 1) { 1203 /* 1204 * If this is not the last instruction 1205 * then jump to the epilogue 1206 */ 1207 emit_b(b_imm(prog->len, ctx), ctx); 1208 emit_nop(ctx); 1209 } 1210 break; 1211 case BPF_MISC | BPF_TAX: 1212 /* X = A */ 1213 ctx->flags |= SEEN_X | SEEN_A; 1214 emit_jit_reg_move(r_X, r_A, ctx); 1215 break; 1216 case BPF_MISC | BPF_TXA: 1217 /* A = X */ 1218 ctx->flags |= SEEN_A | SEEN_X; 1219 emit_jit_reg_move(r_A, r_X, ctx); 1220 break; 1221 /* AUX */ 1222 case BPF_ANC | SKF_AD_PROTOCOL: 1223 /* A = ntohs(skb->protocol */ 1224 ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; 1225 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1226 protocol) != 2); 1227 off = offsetof(struct sk_buff, protocol); 1228 emit_half_load(r_A, r_skb, off, ctx); 1229#ifdef CONFIG_CPU_LITTLE_ENDIAN 1230 /* This needs little endian fixup */ 1231 if (cpu_has_wsbh) { 1232 /* R2 and later have the wsbh instruction */ 1233 emit_wsbh(r_A, r_A, ctx); 1234 } else { 1235 /* Get first byte */ 1236 emit_andi(r_tmp_imm, r_A, 0xff, ctx); 1237 /* Shift it */ 1238 emit_sll(r_tmp, r_tmp_imm, 8, ctx); 1239 /* Get second byte */ 1240 emit_srl(r_tmp_imm, r_A, 8, ctx); 1241 emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); 1242 /* Put everyting together in r_A */ 1243 emit_or(r_A, r_tmp, r_tmp_imm, ctx); 1244 } 1245#endif 1246 break; 1247 case BPF_ANC | SKF_AD_CPU: 1248 ctx->flags |= SEEN_A | SEEN_OFF; 1249 /* A = current_thread_info()->cpu */ 1250 BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, 1251 cpu) != 4); 1252 off = offsetof(struct thread_info, cpu); 1253 /* $28/gp points to the thread_info struct */ 1254 emit_load(r_A, 28, off, ctx); 1255 break; 1256 case BPF_ANC | SKF_AD_IFINDEX: 1257 /* A = skb->dev->ifindex */ 1258 ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0; 1259 off = offsetof(struct sk_buff, dev); 1260 /* Load *dev pointer */ 1261 emit_load_ptr(r_s0, r_skb, off, ctx); 1262 /* error (0) in the delay slot */ 1263 emit_bcond(MIPS_COND_EQ, r_s0, r_zero, 1264 b_imm(prog->len, ctx), ctx); 1265 emit_reg_move(r_ret, r_zero, ctx); 1266 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, 1267 ifindex) != 4); 1268 off = offsetof(struct net_device, ifindex); 1269 emit_load(r_A, r_s0, off, ctx); 1270 break; 1271 case BPF_ANC | SKF_AD_MARK: 1272 ctx->flags |= SEEN_SKB | SEEN_A; 1273 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 1274 off = offsetof(struct sk_buff, mark); 1275 emit_load(r_A, r_skb, off, ctx); 1276 break; 1277 case BPF_ANC | SKF_AD_RXHASH: 1278 ctx->flags |= SEEN_SKB | SEEN_A; 1279 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 1280 off = offsetof(struct sk_buff, hash); 1281 emit_load(r_A, r_skb, off, ctx); 1282 break; 1283 case BPF_ANC | SKF_AD_VLAN_TAG: 1284 case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: 1285 ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A; 1286 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1287 vlan_tci) != 2); 1288 off = offsetof(struct sk_buff, vlan_tci); 1289 emit_half_load(r_s0, r_skb, off, ctx); 1290 if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { 1291 emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx); 1292 } else { 1293 emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx); 1294 /* return 1 if present */ 1295 emit_sltu(r_A, r_zero, r_A, ctx); 1296 } 1297 break; 1298 case BPF_ANC | SKF_AD_PKTTYPE: 1299 ctx->flags |= SEEN_SKB; 1300 1301 emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); 1302 /* Keep only the last 3 bits */ 1303 emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); 1304#ifdef __BIG_ENDIAN_BITFIELD 1305 /* Get the actual packet type to the lower 3 bits */ 1306 emit_srl(r_A, r_A, 5, ctx); 1307#endif 1308 break; 1309 case BPF_ANC | SKF_AD_QUEUE: 1310 ctx->flags |= SEEN_SKB | SEEN_A; 1311 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, 1312 queue_mapping) != 2); 1313 BUILD_BUG_ON(offsetof(struct sk_buff, 1314 queue_mapping) > 0xff); 1315 off = offsetof(struct sk_buff, queue_mapping); 1316 emit_half_load(r_A, r_skb, off, ctx); 1317 break; 1318 default: 1319 pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, 1320 inst->code); 1321 return -1; 1322 } 1323 } 1324 1325 /* compute offsets only during the first pass */ 1326 if (ctx->target == NULL) 1327 ctx->offsets[i] = ctx->idx * 4; 1328 1329 return 0; 1330} 1331 1332int bpf_jit_enable __read_mostly; 1333 1334void bpf_jit_compile(struct bpf_prog *fp) 1335{ 1336 struct jit_ctx ctx; 1337 unsigned int alloc_size, tmp_idx; 1338 1339 if (!bpf_jit_enable) 1340 return; 1341 1342 memset(&ctx, 0, sizeof(ctx)); 1343 1344 ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL); 1345 if (ctx.offsets == NULL) 1346 return; 1347 1348 ctx.skf = fp; 1349 1350 if (build_body(&ctx)) 1351 goto out; 1352 1353 tmp_idx = ctx.idx; 1354 build_prologue(&ctx); 1355 ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; 1356 /* just to complete the ctx.idx count */ 1357 build_epilogue(&ctx); 1358 1359 alloc_size = 4 * ctx.idx; 1360 ctx.target = module_alloc(alloc_size); 1361 if (ctx.target == NULL) 1362 goto out; 1363 1364 /* Clean it */ 1365 memset(ctx.target, 0, alloc_size); 1366 1367 ctx.idx = 0; 1368 1369 /* Generate the actual JIT code */ 1370 build_prologue(&ctx); 1371 build_body(&ctx); 1372 build_epilogue(&ctx); 1373 1374 /* Update the icache */ 1375 flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); 1376 1377 if (bpf_jit_enable > 1) 1378 /* Dump JIT code */ 1379 bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); 1380 1381 fp->bpf_func = (void *)ctx.target; 1382 fp->jited = true; 1383 1384out: 1385 kfree(ctx.offsets); 1386} 1387 1388void bpf_jit_free(struct bpf_prog *fp) 1389{ 1390 if (fp->jited) 1391 module_free(NULL, fp->bpf_func); 1392 1393 bpf_prog_unlock_free(fp); 1394} 1395