1/*
2 * Just-In-Time compiler for BPF filters on MIPS
3 *
4 * Copyright (c) 2014 Imagination Technologies Ltd.
5 * Author: Markos Chandras <markos.chandras@imgtec.com>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the
9 * Free Software Foundation; version 2 of the License.
10 */
11
12#include <linux/bitops.h>
13#include <linux/compiler.h>
14#include <linux/errno.h>
15#include <linux/filter.h>
16#include <linux/if_vlan.h>
17#include <linux/kconfig.h>
18#include <linux/moduleloader.h>
19#include <linux/netdevice.h>
20#include <linux/string.h>
21#include <linux/slab.h>
22#include <linux/types.h>
23#include <asm/bitops.h>
24#include <asm/cacheflush.h>
25#include <asm/cpu-features.h>
26#include <asm/uasm.h>
27
28#include "bpf_jit.h"
29
30/* ABI
31 *
32 * s0	1st scratch register
33 * s1	2nd scratch register
34 * s2	offset register
35 * s3	BPF register A
36 * s4	BPF register X
37 * s5	*skb
38 * s6	*scratch memory
39 *
40 * On entry (*bpf_func)(*skb, *filter)
41 * a0 = MIPS_R_A0 = skb;
42 * a1 = MIPS_R_A1 = filter;
43 *
44 * Stack
45 * ...
46 * M[15]
47 * M[14]
48 * M[13]
49 * ...
50 * M[0] <-- r_M
51 * saved reg k-1
52 * saved reg k-2
53 * ...
54 * saved reg 0 <-- r_sp
55 * <no argument area>
56 *
57 *                     Packet layout
58 *
59 * <--------------------- len ------------------------>
60 * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
61 * ----------------------------------------------------
62 * |                  skb->data                       |
63 * ----------------------------------------------------
64 */
65
66#define RSIZE	(sizeof(unsigned long))
67#define ptr typeof(unsigned long)
68
69/* ABI specific return values */
70#ifdef CONFIG_32BIT /* O32 */
71#ifdef CONFIG_CPU_LITTLE_ENDIAN
72#define r_err	MIPS_R_V1
73#define r_val	MIPS_R_V0
74#else /* CONFIG_CPU_LITTLE_ENDIAN */
75#define r_err	MIPS_R_V0
76#define r_val	MIPS_R_V1
77#endif
78#else /* N64 */
79#define r_err	MIPS_R_V0
80#define r_val	MIPS_R_V0
81#endif
82
83#define r_ret	MIPS_R_V0
84
85/*
86 * Use 2 scratch registers to avoid pipeline interlocks.
87 * There is no overhead during epilogue and prologue since
88 * any of the $s0-$s6 registers will only be preserved if
89 * they are going to actually be used.
90 */
91#define r_s0		MIPS_R_S0 /* scratch reg 1 */
92#define r_s1		MIPS_R_S1 /* scratch reg 2 */
93#define r_off		MIPS_R_S2
94#define r_A		MIPS_R_S3
95#define r_X		MIPS_R_S4
96#define r_skb		MIPS_R_S5
97#define r_M		MIPS_R_S6
98#define r_tmp_imm	MIPS_R_T6 /* No need to preserve this */
99#define r_tmp		MIPS_R_T7 /* No need to preserve this */
100#define r_zero		MIPS_R_ZERO
101#define r_sp		MIPS_R_SP
102#define r_ra		MIPS_R_RA
103
104#define SCRATCH_OFF(k)		(4 * (k))
105
106/* JIT flags */
107#define SEEN_CALL		(1 << BPF_MEMWORDS)
108#define SEEN_SREG_SFT		(BPF_MEMWORDS + 1)
109#define SEEN_SREG_BASE		(1 << SEEN_SREG_SFT)
110#define SEEN_SREG(x)		(SEEN_SREG_BASE << (x))
111#define SEEN_S0			SEEN_SREG(0)
112#define SEEN_S1			SEEN_SREG(1)
113#define SEEN_OFF		SEEN_SREG(2)
114#define SEEN_A			SEEN_SREG(3)
115#define SEEN_X			SEEN_SREG(4)
116#define SEEN_SKB		SEEN_SREG(5)
117#define SEEN_MEM		SEEN_SREG(6)
118
119/* Arguments used by JIT */
120#define ARGS_USED_BY_JIT	2 /* only applicable to 64-bit */
121
122#define SBIT(x)			(1 << (x)) /* Signed version of BIT() */
123
124/**
125 * struct jit_ctx - JIT context
126 * @skf:		The sk_filter
127 * @prologue_bytes:	Number of bytes for prologue
128 * @idx:		Instruction index
129 * @flags:		JIT flags
130 * @offsets:		Instruction offsets
131 * @target:		Memory location for the compiled filter
132 */
133struct jit_ctx {
134	const struct bpf_prog *skf;
135	unsigned int prologue_bytes;
136	u32 idx;
137	u32 flags;
138	u32 *offsets;
139	u32 *target;
140};
141
142
143static inline int optimize_div(u32 *k)
144{
145	/* power of 2 divides can be implemented with right shift */
146	if (!(*k & (*k-1))) {
147		*k = ilog2(*k);
148		return 1;
149	}
150
151	return 0;
152}
153
154static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
155
156/* Simply emit the instruction if the JIT memory space has been allocated */
157#define emit_instr(ctx, func, ...)			\
158do {							\
159	if ((ctx)->target != NULL) {			\
160		u32 *p = &(ctx)->target[ctx->idx];	\
161		uasm_i_##func(&p, ##__VA_ARGS__);	\
162	}						\
163	(ctx)->idx++;					\
164} while (0)
165
166/*
167 * Similar to emit_instr but it must be used when we need to emit
168 * 32-bit or 64-bit instructions
169 */
170#define emit_long_instr(ctx, func, ...)			\
171do {							\
172	if ((ctx)->target != NULL) {			\
173		u32 *p = &(ctx)->target[ctx->idx];	\
174		UASM_i_##func(&p, ##__VA_ARGS__);	\
175	}						\
176	(ctx)->idx++;					\
177} while (0)
178
179/* Determine if immediate is within the 16-bit signed range */
180static inline bool is_range16(s32 imm)
181{
182	return !(imm >= SBIT(15) || imm < -SBIT(15));
183}
184
185static inline void emit_addu(unsigned int dst, unsigned int src1,
186			     unsigned int src2, struct jit_ctx *ctx)
187{
188	emit_instr(ctx, addu, dst, src1, src2);
189}
190
191static inline void emit_nop(struct jit_ctx *ctx)
192{
193	emit_instr(ctx, nop);
194}
195
196/* Load a u32 immediate to a register */
197static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
198{
199	if (ctx->target != NULL) {
200		/* addiu can only handle s16 */
201		if (!is_range16(imm)) {
202			u32 *p = &ctx->target[ctx->idx];
203			uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
204			p = &ctx->target[ctx->idx + 1];
205			uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
206		} else {
207			u32 *p = &ctx->target[ctx->idx];
208			uasm_i_addiu(&p, dst, r_zero, imm);
209		}
210	}
211	ctx->idx++;
212
213	if (!is_range16(imm))
214		ctx->idx++;
215}
216
217static inline void emit_or(unsigned int dst, unsigned int src1,
218			   unsigned int src2, struct jit_ctx *ctx)
219{
220	emit_instr(ctx, or, dst, src1, src2);
221}
222
223static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
224			    struct jit_ctx *ctx)
225{
226	if (imm >= BIT(16)) {
227		emit_load_imm(r_tmp, imm, ctx);
228		emit_or(dst, src, r_tmp, ctx);
229	} else {
230		emit_instr(ctx, ori, dst, src, imm);
231	}
232}
233
234static inline void emit_daddiu(unsigned int dst, unsigned int src,
235			       int imm, struct jit_ctx *ctx)
236{
237	/*
238	 * Only used for stack, so the imm is relatively small
239	 * and it fits in 15-bits
240	 */
241	emit_instr(ctx, daddiu, dst, src, imm);
242}
243
244static inline void emit_addiu(unsigned int dst, unsigned int src,
245			      u32 imm, struct jit_ctx *ctx)
246{
247	if (!is_range16(imm)) {
248		emit_load_imm(r_tmp, imm, ctx);
249		emit_addu(dst, r_tmp, src, ctx);
250	} else {
251		emit_instr(ctx, addiu, dst, src, imm);
252	}
253}
254
255static inline void emit_and(unsigned int dst, unsigned int src1,
256			    unsigned int src2, struct jit_ctx *ctx)
257{
258	emit_instr(ctx, and, dst, src1, src2);
259}
260
261static inline void emit_andi(unsigned int dst, unsigned int src,
262			     u32 imm, struct jit_ctx *ctx)
263{
264	/* If imm does not fit in u16 then load it to register */
265	if (imm >= BIT(16)) {
266		emit_load_imm(r_tmp, imm, ctx);
267		emit_and(dst, src, r_tmp, ctx);
268	} else {
269		emit_instr(ctx, andi, dst, src, imm);
270	}
271}
272
273static inline void emit_xor(unsigned int dst, unsigned int src1,
274			    unsigned int src2, struct jit_ctx *ctx)
275{
276	emit_instr(ctx, xor, dst, src1, src2);
277}
278
279static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
280{
281	/* If imm does not fit in u16 then load it to register */
282	if (imm >= BIT(16)) {
283		emit_load_imm(r_tmp, imm, ctx);
284		emit_xor(dst, src, r_tmp, ctx);
285	} else {
286		emit_instr(ctx, xori, dst, src, imm);
287	}
288}
289
290static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
291{
292	emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
293}
294
295static inline void emit_subu(unsigned int dst, unsigned int src1,
296			     unsigned int src2, struct jit_ctx *ctx)
297{
298	emit_instr(ctx, subu, dst, src1, src2);
299}
300
301static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
302{
303	emit_subu(reg, r_zero, reg, ctx);
304}
305
306static inline void emit_sllv(unsigned int dst, unsigned int src,
307			     unsigned int sa, struct jit_ctx *ctx)
308{
309	emit_instr(ctx, sllv, dst, src, sa);
310}
311
312static inline void emit_sll(unsigned int dst, unsigned int src,
313			    unsigned int sa, struct jit_ctx *ctx)
314{
315	/* sa is 5-bits long */
316	if (sa >= BIT(5))
317		/* Shifting >= 32 results in zero */
318		emit_jit_reg_move(dst, r_zero, ctx);
319	else
320		emit_instr(ctx, sll, dst, src, sa);
321}
322
323static inline void emit_srlv(unsigned int dst, unsigned int src,
324			     unsigned int sa, struct jit_ctx *ctx)
325{
326	emit_instr(ctx, srlv, dst, src, sa);
327}
328
329static inline void emit_srl(unsigned int dst, unsigned int src,
330			    unsigned int sa, struct jit_ctx *ctx)
331{
332	/* sa is 5-bits long */
333	if (sa >= BIT(5))
334		/* Shifting >= 32 results in zero */
335		emit_jit_reg_move(dst, r_zero, ctx);
336	else
337		emit_instr(ctx, srl, dst, src, sa);
338}
339
340static inline void emit_slt(unsigned int dst, unsigned int src1,
341			    unsigned int src2, struct jit_ctx *ctx)
342{
343	emit_instr(ctx, slt, dst, src1, src2);
344}
345
346static inline void emit_sltu(unsigned int dst, unsigned int src1,
347			     unsigned int src2, struct jit_ctx *ctx)
348{
349	emit_instr(ctx, sltu, dst, src1, src2);
350}
351
352static inline void emit_sltiu(unsigned dst, unsigned int src,
353			      unsigned int imm, struct jit_ctx *ctx)
354{
355	/* 16 bit immediate */
356	if (!is_range16((s32)imm)) {
357		emit_load_imm(r_tmp, imm, ctx);
358		emit_sltu(dst, src, r_tmp, ctx);
359	} else {
360		emit_instr(ctx, sltiu, dst, src, imm);
361	}
362
363}
364
365/* Store register on the stack */
366static inline void emit_store_stack_reg(ptr reg, ptr base,
367					unsigned int offset,
368					struct jit_ctx *ctx)
369{
370	emit_long_instr(ctx, SW, reg, offset, base);
371}
372
373static inline void emit_store(ptr reg, ptr base, unsigned int offset,
374			      struct jit_ctx *ctx)
375{
376	emit_instr(ctx, sw, reg, offset, base);
377}
378
379static inline void emit_load_stack_reg(ptr reg, ptr base,
380				       unsigned int offset,
381				       struct jit_ctx *ctx)
382{
383	emit_long_instr(ctx, LW, reg, offset, base);
384}
385
386static inline void emit_load(unsigned int reg, unsigned int base,
387			     unsigned int offset, struct jit_ctx *ctx)
388{
389	emit_instr(ctx, lw, reg, offset, base);
390}
391
392static inline void emit_load_byte(unsigned int reg, unsigned int base,
393				  unsigned int offset, struct jit_ctx *ctx)
394{
395	emit_instr(ctx, lb, reg, offset, base);
396}
397
398static inline void emit_half_load(unsigned int reg, unsigned int base,
399				  unsigned int offset, struct jit_ctx *ctx)
400{
401	emit_instr(ctx, lh, reg, offset, base);
402}
403
404static inline void emit_mul(unsigned int dst, unsigned int src1,
405			    unsigned int src2, struct jit_ctx *ctx)
406{
407	emit_instr(ctx, mul, dst, src1, src2);
408}
409
410static inline void emit_div(unsigned int dst, unsigned int src,
411			    struct jit_ctx *ctx)
412{
413	if (ctx->target != NULL) {
414		u32 *p = &ctx->target[ctx->idx];
415		uasm_i_divu(&p, dst, src);
416		p = &ctx->target[ctx->idx + 1];
417		uasm_i_mflo(&p, dst);
418	}
419	ctx->idx += 2; /* 2 insts */
420}
421
422static inline void emit_mod(unsigned int dst, unsigned int src,
423			    struct jit_ctx *ctx)
424{
425	if (ctx->target != NULL) {
426		u32 *p = &ctx->target[ctx->idx];
427		uasm_i_divu(&p, dst, src);
428		p = &ctx->target[ctx->idx + 1];
429		uasm_i_mflo(&p, dst);
430	}
431	ctx->idx += 2; /* 2 insts */
432}
433
434static inline void emit_dsll(unsigned int dst, unsigned int src,
435			     unsigned int sa, struct jit_ctx *ctx)
436{
437	emit_instr(ctx, dsll, dst, src, sa);
438}
439
440static inline void emit_dsrl32(unsigned int dst, unsigned int src,
441			       unsigned int sa, struct jit_ctx *ctx)
442{
443	emit_instr(ctx, dsrl32, dst, src, sa);
444}
445
446static inline void emit_wsbh(unsigned int dst, unsigned int src,
447			     struct jit_ctx *ctx)
448{
449	emit_instr(ctx, wsbh, dst, src);
450}
451
452/* load pointer to register */
453static inline void emit_load_ptr(unsigned int dst, unsigned int src,
454				     int imm, struct jit_ctx *ctx)
455{
456	/* src contains the base addr of the 32/64-pointer */
457	emit_long_instr(ctx, LW, dst, imm, src);
458}
459
460/* load a function pointer to register */
461static inline void emit_load_func(unsigned int reg, ptr imm,
462				  struct jit_ctx *ctx)
463{
464	if (config_enabled(CONFIG_64BIT)) {
465		/* At this point imm is always 64-bit */
466		emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
467		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
468		emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
469		emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
470		emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
471	} else {
472		emit_load_imm(reg, imm, ctx);
473	}
474}
475
476/* Move to real MIPS register */
477static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
478{
479	emit_long_instr(ctx, ADDU, dst, src, r_zero);
480}
481
482/* Move to JIT (32-bit) register */
483static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
484{
485	emit_addu(dst, src, r_zero, ctx);
486}
487
488/* Compute the immediate value for PC-relative branches. */
489static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
490{
491	if (ctx->target == NULL)
492		return 0;
493
494	/*
495	 * We want a pc-relative branch. We only do forward branches
496	 * so tgt is always after pc. tgt is the instruction offset
497	 * we want to jump to.
498
499	 * Branch on MIPS:
500	 * I: target_offset <- sign_extend(offset)
501	 * I+1: PC += target_offset (delay slot)
502	 *
503	 * ctx->idx currently points to the branch instruction
504	 * but the offset is added to the delay slot so we need
505	 * to subtract 4.
506	 */
507	return ctx->offsets[tgt] -
508		(ctx->idx * 4 - ctx->prologue_bytes) - 4;
509}
510
511static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
512			     unsigned int imm, struct jit_ctx *ctx)
513{
514	if (ctx->target != NULL) {
515		u32 *p = &ctx->target[ctx->idx];
516
517		switch (cond) {
518		case MIPS_COND_EQ:
519			uasm_i_beq(&p, reg1, reg2, imm);
520			break;
521		case MIPS_COND_NE:
522			uasm_i_bne(&p, reg1, reg2, imm);
523			break;
524		case MIPS_COND_ALL:
525			uasm_i_b(&p, imm);
526			break;
527		default:
528			pr_warn("%s: Unhandled branch conditional: %d\n",
529				__func__, cond);
530		}
531	}
532	ctx->idx++;
533}
534
535static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
536{
537	emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
538}
539
540static inline void emit_jalr(unsigned int link, unsigned int reg,
541			     struct jit_ctx *ctx)
542{
543	emit_instr(ctx, jalr, link, reg);
544}
545
546static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
547{
548	emit_instr(ctx, jr, reg);
549}
550
551static inline u16 align_sp(unsigned int num)
552{
553	/* Double word alignment for 32-bit, quadword for 64-bit */
554	unsigned int align = config_enabled(CONFIG_64BIT) ? 16 : 8;
555	num = (num + (align - 1)) & -align;
556	return num;
557}
558
559static bool is_load_to_a(u16 inst)
560{
561	switch (inst) {
562	case BPF_LD | BPF_W | BPF_LEN:
563	case BPF_LD | BPF_W | BPF_ABS:
564	case BPF_LD | BPF_H | BPF_ABS:
565	case BPF_LD | BPF_B | BPF_ABS:
566		return true;
567	default:
568		return false;
569	}
570}
571
572static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
573{
574	int i = 0, real_off = 0;
575	u32 sflags, tmp_flags;
576
577	/* Adjust the stack pointer */
578	emit_stack_offset(-align_sp(offset), ctx);
579
580	if (ctx->flags & SEEN_CALL) {
581		/* Argument save area */
582		if (config_enabled(CONFIG_64BIT))
583			/* Bottom of current frame */
584			real_off = align_sp(offset) - RSIZE;
585		else
586			/* Top of previous frame */
587			real_off = align_sp(offset) + RSIZE;
588		emit_store_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
589		emit_store_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
590
591		real_off = 0;
592	}
593
594	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
595	/* sflags is essentially a bitmap */
596	while (tmp_flags) {
597		if ((sflags >> i) & 0x1) {
598			emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
599					     ctx);
600			real_off += RSIZE;
601		}
602		i++;
603		tmp_flags >>= 1;
604	}
605
606	/* save return address */
607	if (ctx->flags & SEEN_CALL) {
608		emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
609		real_off += RSIZE;
610	}
611
612	/* Setup r_M leaving the alignment gap if necessary */
613	if (ctx->flags & SEEN_MEM) {
614		if (real_off % (RSIZE * 2))
615			real_off += RSIZE;
616		emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
617	}
618}
619
620static void restore_bpf_jit_regs(struct jit_ctx *ctx,
621				 unsigned int offset)
622{
623	int i, real_off = 0;
624	u32 sflags, tmp_flags;
625
626	if (ctx->flags & SEEN_CALL) {
627		if (config_enabled(CONFIG_64BIT))
628			/* Bottom of current frame */
629			real_off = align_sp(offset) - RSIZE;
630		else
631			/* Top of previous frame */
632			real_off = align_sp(offset) + RSIZE;
633		emit_load_stack_reg(MIPS_R_A0, r_sp, real_off, ctx);
634		emit_load_stack_reg(MIPS_R_A1, r_sp, real_off + RSIZE, ctx);
635
636		real_off = 0;
637	}
638
639	tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
640	/* sflags is a bitmap */
641	i = 0;
642	while (tmp_flags) {
643		if ((sflags >> i) & 0x1) {
644			emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
645					    ctx);
646			real_off += RSIZE;
647		}
648		i++;
649		tmp_flags >>= 1;
650	}
651
652	/* restore return address */
653	if (ctx->flags & SEEN_CALL)
654		emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
655
656	/* Restore the sp and discard the scrach memory */
657	emit_stack_offset(align_sp(offset), ctx);
658}
659
660static unsigned int get_stack_depth(struct jit_ctx *ctx)
661{
662	int sp_off = 0;
663
664
665	/* How may s* regs do we need to preserved? */
666	sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * RSIZE;
667
668	if (ctx->flags & SEEN_MEM)
669		sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
670
671	if (ctx->flags & SEEN_CALL)
672		/*
673		 * The JIT code make calls to external functions using 2
674		 * arguments. Therefore, for o32 we don't need to allocate
675		 * space because we don't care if the argumetns are lost
676		 * across calls. We do need however to preserve incoming
677		 * arguments but the space is already allocated for us by
678		 * the caller. On the other hand, for n64, we need to allocate
679		 * this space ourselves. We need to preserve $ra as well.
680		 */
681		sp_off += config_enabled(CONFIG_64BIT) ?
682			(ARGS_USED_BY_JIT + 1) * RSIZE : RSIZE;
683
684	/*
685	 * Subtract the bytes for the last registers since we only care about
686	 * the location on the stack pointer.
687	 */
688	return sp_off - RSIZE;
689}
690
691static void build_prologue(struct jit_ctx *ctx)
692{
693	u16 first_inst = ctx->skf->insns[0].code;
694	int sp_off;
695
696	/* Calculate the total offset for the stack pointer */
697	sp_off = get_stack_depth(ctx);
698	save_bpf_jit_regs(ctx, sp_off);
699
700	if (ctx->flags & SEEN_SKB)
701		emit_reg_move(r_skb, MIPS_R_A0, ctx);
702
703	if (ctx->flags & SEEN_X)
704		emit_jit_reg_move(r_X, r_zero, ctx);
705
706	/* Do not leak kernel data to userspace */
707	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
708		emit_jit_reg_move(r_A, r_zero, ctx);
709}
710
711static void build_epilogue(struct jit_ctx *ctx)
712{
713	unsigned int sp_off;
714
715	/* Calculate the total offset for the stack pointer */
716
717	sp_off = get_stack_depth(ctx);
718	restore_bpf_jit_regs(ctx, sp_off);
719
720	/* Return */
721	emit_jr(r_ra, ctx);
722	emit_nop(ctx);
723}
724
725static u64 jit_get_skb_b(struct sk_buff *skb, unsigned offset)
726{
727	u8 ret;
728	int err;
729
730	err = skb_copy_bits(skb, offset, &ret, 1);
731
732	return (u64)err << 32 | ret;
733}
734
735static u64 jit_get_skb_h(struct sk_buff *skb, unsigned offset)
736{
737	u16 ret;
738	int err;
739
740	err = skb_copy_bits(skb, offset, &ret, 2);
741
742	return (u64)err << 32 | ntohs(ret);
743}
744
745static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
746{
747	u32 ret;
748	int err;
749
750	err = skb_copy_bits(skb, offset, &ret, 4);
751
752	return (u64)err << 32 | ntohl(ret);
753}
754
755static int build_body(struct jit_ctx *ctx)
756{
757	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
758	const struct bpf_prog *prog = ctx->skf;
759	const struct sock_filter *inst;
760	unsigned int i, off, load_order, condt;
761	u32 k, b_off __maybe_unused;
762
763	for (i = 0; i < prog->len; i++) {
764		u16 code;
765
766		inst = &(prog->insns[i]);
767		pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
768			 __func__, inst->code, inst->jt, inst->jf, inst->k);
769		k = inst->k;
770		code = bpf_anc_helper(inst);
771
772		if (ctx->target == NULL)
773			ctx->offsets[i] = ctx->idx * 4;
774
775		switch (code) {
776		case BPF_LD | BPF_IMM:
777			/* A <- k ==> li r_A, k */
778			ctx->flags |= SEEN_A;
779			emit_load_imm(r_A, k, ctx);
780			break;
781		case BPF_LD | BPF_W | BPF_LEN:
782			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
783			/* A <- len ==> lw r_A, offset(skb) */
784			ctx->flags |= SEEN_SKB | SEEN_A;
785			off = offsetof(struct sk_buff, len);
786			emit_load(r_A, r_skb, off, ctx);
787			break;
788		case BPF_LD | BPF_MEM:
789			/* A <- M[k] ==> lw r_A, offset(M) */
790			ctx->flags |= SEEN_MEM | SEEN_A;
791			emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
792			break;
793		case BPF_LD | BPF_W | BPF_ABS:
794			/* A <- P[k:4] */
795			load_order = 2;
796			goto load;
797		case BPF_LD | BPF_H | BPF_ABS:
798			/* A <- P[k:2] */
799			load_order = 1;
800			goto load;
801		case BPF_LD | BPF_B | BPF_ABS:
802			/* A <- P[k:1] */
803			load_order = 0;
804load:
805			/* the interpreter will deal with the negative K */
806			if ((int)k < 0)
807				return -ENOTSUPP;
808
809			emit_load_imm(r_off, k, ctx);
810load_common:
811			/*
812			 * We may got here from the indirect loads so
813			 * return if offset is negative.
814			 */
815			emit_slt(r_s0, r_off, r_zero, ctx);
816			emit_bcond(MIPS_COND_NE, r_s0, r_zero,
817				   b_imm(prog->len, ctx), ctx);
818			emit_reg_move(r_ret, r_zero, ctx);
819
820			ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 |
821				SEEN_SKB | SEEN_A;
822
823			emit_load_func(r_s0, (ptr)load_func[load_order],
824				      ctx);
825			emit_reg_move(MIPS_R_A0, r_skb, ctx);
826			emit_jalr(MIPS_R_RA, r_s0, ctx);
827			/* Load second argument to delay slot */
828			emit_reg_move(MIPS_R_A1, r_off, ctx);
829			/* Check the error value */
830			if (config_enabled(CONFIG_64BIT)) {
831				/* Get error code from the top 32-bits */
832				emit_dsrl32(r_s0, r_val, 0, ctx);
833				/* Branch to 3 instructions ahead */
834				emit_bcond(MIPS_COND_NE, r_s0, r_zero, 3 << 2,
835					   ctx);
836			} else {
837				/* Branch to 3 instructions ahead */
838				emit_bcond(MIPS_COND_NE, r_err, r_zero, 3 << 2,
839					   ctx);
840			}
841			emit_nop(ctx);
842			/* We are good */
843			emit_b(b_imm(i + 1, ctx), ctx);
844			emit_jit_reg_move(r_A, r_val, ctx);
845			/* Return with error */
846			emit_b(b_imm(prog->len, ctx), ctx);
847			emit_reg_move(r_ret, r_zero, ctx);
848			break;
849		case BPF_LD | BPF_W | BPF_IND:
850			/* A <- P[X + k:4] */
851			load_order = 2;
852			goto load_ind;
853		case BPF_LD | BPF_H | BPF_IND:
854			/* A <- P[X + k:2] */
855			load_order = 1;
856			goto load_ind;
857		case BPF_LD | BPF_B | BPF_IND:
858			/* A <- P[X + k:1] */
859			load_order = 0;
860load_ind:
861			ctx->flags |= SEEN_OFF | SEEN_X;
862			emit_addiu(r_off, r_X, k, ctx);
863			goto load_common;
864		case BPF_LDX | BPF_IMM:
865			/* X <- k */
866			ctx->flags |= SEEN_X;
867			emit_load_imm(r_X, k, ctx);
868			break;
869		case BPF_LDX | BPF_MEM:
870			/* X <- M[k] */
871			ctx->flags |= SEEN_X | SEEN_MEM;
872			emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
873			break;
874		case BPF_LDX | BPF_W | BPF_LEN:
875			/* X <- len */
876			ctx->flags |= SEEN_X | SEEN_SKB;
877			off = offsetof(struct sk_buff, len);
878			emit_load(r_X, r_skb, off, ctx);
879			break;
880		case BPF_LDX | BPF_B | BPF_MSH:
881			/* the interpreter will deal with the negative K */
882			if ((int)k < 0)
883				return -ENOTSUPP;
884
885			/* X <- 4 * (P[k:1] & 0xf) */
886			ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB;
887			/* Load offset to a1 */
888			emit_load_func(r_s0, (ptr)jit_get_skb_b, ctx);
889			/*
890			 * This may emit two instructions so it may not fit
891			 * in the delay slot. So use a0 in the delay slot.
892			 */
893			emit_load_imm(MIPS_R_A1, k, ctx);
894			emit_jalr(MIPS_R_RA, r_s0, ctx);
895			emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
896			/* Check the error value */
897			if (config_enabled(CONFIG_64BIT)) {
898				/* Top 32-bits of $v0 on 64-bit */
899				emit_dsrl32(r_s0, r_val, 0, ctx);
900				emit_bcond(MIPS_COND_NE, r_s0, r_zero,
901					   3 << 2, ctx);
902			} else {
903				emit_bcond(MIPS_COND_NE, r_err, r_zero,
904					   3 << 2, ctx);
905			}
906			/* No need for delay slot */
907			/* We are good */
908			/* X <- P[1:K] & 0xf */
909			emit_andi(r_X, r_val, 0xf, ctx);
910			/* X << 2 */
911			emit_b(b_imm(i + 1, ctx), ctx);
912			emit_sll(r_X, r_X, 2, ctx); /* delay slot */
913			/* Return with error */
914			emit_b(b_imm(prog->len, ctx), ctx);
915			emit_load_imm(r_ret, 0, ctx); /* delay slot */
916			break;
917		case BPF_ST:
918			/* M[k] <- A */
919			ctx->flags |= SEEN_MEM | SEEN_A;
920			emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
921			break;
922		case BPF_STX:
923			/* M[k] <- X */
924			ctx->flags |= SEEN_MEM | SEEN_X;
925			emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
926			break;
927		case BPF_ALU | BPF_ADD | BPF_K:
928			/* A += K */
929			ctx->flags |= SEEN_A;
930			emit_addiu(r_A, r_A, k, ctx);
931			break;
932		case BPF_ALU | BPF_ADD | BPF_X:
933			/* A += X */
934			ctx->flags |= SEEN_A | SEEN_X;
935			emit_addu(r_A, r_A, r_X, ctx);
936			break;
937		case BPF_ALU | BPF_SUB | BPF_K:
938			/* A -= K */
939			ctx->flags |= SEEN_A;
940			emit_addiu(r_A, r_A, -k, ctx);
941			break;
942		case BPF_ALU | BPF_SUB | BPF_X:
943			/* A -= X */
944			ctx->flags |= SEEN_A | SEEN_X;
945			emit_subu(r_A, r_A, r_X, ctx);
946			break;
947		case BPF_ALU | BPF_MUL | BPF_K:
948			/* A *= K */
949			/* Load K to scratch register before MUL */
950			ctx->flags |= SEEN_A | SEEN_S0;
951			emit_load_imm(r_s0, k, ctx);
952			emit_mul(r_A, r_A, r_s0, ctx);
953			break;
954		case BPF_ALU | BPF_MUL | BPF_X:
955			/* A *= X */
956			ctx->flags |= SEEN_A | SEEN_X;
957			emit_mul(r_A, r_A, r_X, ctx);
958			break;
959		case BPF_ALU | BPF_DIV | BPF_K:
960			/* A /= k */
961			if (k == 1)
962				break;
963			if (optimize_div(&k)) {
964				ctx->flags |= SEEN_A;
965				emit_srl(r_A, r_A, k, ctx);
966				break;
967			}
968			ctx->flags |= SEEN_A | SEEN_S0;
969			emit_load_imm(r_s0, k, ctx);
970			emit_div(r_A, r_s0, ctx);
971			break;
972		case BPF_ALU | BPF_MOD | BPF_K:
973			/* A %= k */
974			if (k == 1 || optimize_div(&k)) {
975				ctx->flags |= SEEN_A;
976				emit_jit_reg_move(r_A, r_zero, ctx);
977			} else {
978				ctx->flags |= SEEN_A | SEEN_S0;
979				emit_load_imm(r_s0, k, ctx);
980				emit_mod(r_A, r_s0, ctx);
981			}
982			break;
983		case BPF_ALU | BPF_DIV | BPF_X:
984			/* A /= X */
985			ctx->flags |= SEEN_X | SEEN_A;
986			/* Check if r_X is zero */
987			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
988				   b_imm(prog->len, ctx), ctx);
989			emit_load_imm(r_val, 0, ctx); /* delay slot */
990			emit_div(r_A, r_X, ctx);
991			break;
992		case BPF_ALU | BPF_MOD | BPF_X:
993			/* A %= X */
994			ctx->flags |= SEEN_X | SEEN_A;
995			/* Check if r_X is zero */
996			emit_bcond(MIPS_COND_EQ, r_X, r_zero,
997				   b_imm(prog->len, ctx), ctx);
998			emit_load_imm(r_val, 0, ctx); /* delay slot */
999			emit_mod(r_A, r_X, ctx);
1000			break;
1001		case BPF_ALU | BPF_OR | BPF_K:
1002			/* A |= K */
1003			ctx->flags |= SEEN_A;
1004			emit_ori(r_A, r_A, k, ctx);
1005			break;
1006		case BPF_ALU | BPF_OR | BPF_X:
1007			/* A |= X */
1008			ctx->flags |= SEEN_A;
1009			emit_ori(r_A, r_A, r_X, ctx);
1010			break;
1011		case BPF_ALU | BPF_XOR | BPF_K:
1012			/* A ^= k */
1013			ctx->flags |= SEEN_A;
1014			emit_xori(r_A, r_A, k, ctx);
1015			break;
1016		case BPF_ANC | SKF_AD_ALU_XOR_X:
1017		case BPF_ALU | BPF_XOR | BPF_X:
1018			/* A ^= X */
1019			ctx->flags |= SEEN_A;
1020			emit_xor(r_A, r_A, r_X, ctx);
1021			break;
1022		case BPF_ALU | BPF_AND | BPF_K:
1023			/* A &= K */
1024			ctx->flags |= SEEN_A;
1025			emit_andi(r_A, r_A, k, ctx);
1026			break;
1027		case BPF_ALU | BPF_AND | BPF_X:
1028			/* A &= X */
1029			ctx->flags |= SEEN_A | SEEN_X;
1030			emit_and(r_A, r_A, r_X, ctx);
1031			break;
1032		case BPF_ALU | BPF_LSH | BPF_K:
1033			/* A <<= K */
1034			ctx->flags |= SEEN_A;
1035			emit_sll(r_A, r_A, k, ctx);
1036			break;
1037		case BPF_ALU | BPF_LSH | BPF_X:
1038			/* A <<= X */
1039			ctx->flags |= SEEN_A | SEEN_X;
1040			emit_sllv(r_A, r_A, r_X, ctx);
1041			break;
1042		case BPF_ALU | BPF_RSH | BPF_K:
1043			/* A >>= K */
1044			ctx->flags |= SEEN_A;
1045			emit_srl(r_A, r_A, k, ctx);
1046			break;
1047		case BPF_ALU | BPF_RSH | BPF_X:
1048			ctx->flags |= SEEN_A | SEEN_X;
1049			emit_srlv(r_A, r_A, r_X, ctx);
1050			break;
1051		case BPF_ALU | BPF_NEG:
1052			/* A = -A */
1053			ctx->flags |= SEEN_A;
1054			emit_neg(r_A, ctx);
1055			break;
1056		case BPF_JMP | BPF_JA:
1057			/* pc += K */
1058			emit_b(b_imm(i + k + 1, ctx), ctx);
1059			emit_nop(ctx);
1060			break;
1061		case BPF_JMP | BPF_JEQ | BPF_K:
1062			/* pc += ( A == K ) ? pc->jt : pc->jf */
1063			condt = MIPS_COND_EQ | MIPS_COND_K;
1064			goto jmp_cmp;
1065		case BPF_JMP | BPF_JEQ | BPF_X:
1066			ctx->flags |= SEEN_X;
1067			/* pc += ( A == X ) ? pc->jt : pc->jf */
1068			condt = MIPS_COND_EQ | MIPS_COND_X;
1069			goto jmp_cmp;
1070		case BPF_JMP | BPF_JGE | BPF_K:
1071			/* pc += ( A >= K ) ? pc->jt : pc->jf */
1072			condt = MIPS_COND_GE | MIPS_COND_K;
1073			goto jmp_cmp;
1074		case BPF_JMP | BPF_JGE | BPF_X:
1075			ctx->flags |= SEEN_X;
1076			/* pc += ( A >= X ) ? pc->jt : pc->jf */
1077			condt = MIPS_COND_GE | MIPS_COND_X;
1078			goto jmp_cmp;
1079		case BPF_JMP | BPF_JGT | BPF_K:
1080			/* pc += ( A > K ) ? pc->jt : pc->jf */
1081			condt = MIPS_COND_GT | MIPS_COND_K;
1082			goto jmp_cmp;
1083		case BPF_JMP | BPF_JGT | BPF_X:
1084			ctx->flags |= SEEN_X;
1085			/* pc += ( A > X ) ? pc->jt : pc->jf */
1086			condt = MIPS_COND_GT | MIPS_COND_X;
1087jmp_cmp:
1088			/* Greater or Equal */
1089			if ((condt & MIPS_COND_GE) ||
1090			    (condt & MIPS_COND_GT)) {
1091				if (condt & MIPS_COND_K) { /* K */
1092					ctx->flags |= SEEN_S0 | SEEN_A;
1093					emit_sltiu(r_s0, r_A, k, ctx);
1094				} else { /* X */
1095					ctx->flags |= SEEN_S0 | SEEN_A |
1096						SEEN_X;
1097					emit_sltu(r_s0, r_A, r_X, ctx);
1098				}
1099				/* A < (K|X) ? r_scrach = 1 */
1100				b_off = b_imm(i + inst->jf + 1, ctx);
1101				emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
1102					   ctx);
1103				emit_nop(ctx);
1104				/* A > (K|X) ? scratch = 0 */
1105				if (condt & MIPS_COND_GT) {
1106					/* Checking for equality */
1107					ctx->flags |= SEEN_S0 | SEEN_A | SEEN_X;
1108					if (condt & MIPS_COND_K)
1109						emit_load_imm(r_s0, k, ctx);
1110					else
1111						emit_jit_reg_move(r_s0, r_X,
1112								  ctx);
1113					b_off = b_imm(i + inst->jf + 1, ctx);
1114					emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1115						   b_off, ctx);
1116					emit_nop(ctx);
1117					/* Finally, A > K|X */
1118					b_off = b_imm(i + inst->jt + 1, ctx);
1119					emit_b(b_off, ctx);
1120					emit_nop(ctx);
1121				} else {
1122					/* A >= (K|X) so jump */
1123					b_off = b_imm(i + inst->jt + 1, ctx);
1124					emit_b(b_off, ctx);
1125					emit_nop(ctx);
1126				}
1127			} else {
1128				/* A == K|X */
1129				if (condt & MIPS_COND_K) { /* K */
1130					ctx->flags |= SEEN_S0 | SEEN_A;
1131					emit_load_imm(r_s0, k, ctx);
1132					/* jump true */
1133					b_off = b_imm(i + inst->jt + 1, ctx);
1134					emit_bcond(MIPS_COND_EQ, r_A, r_s0,
1135						   b_off, ctx);
1136					emit_nop(ctx);
1137					/* jump false */
1138					b_off = b_imm(i + inst->jf + 1,
1139						      ctx);
1140					emit_bcond(MIPS_COND_NE, r_A, r_s0,
1141						   b_off, ctx);
1142					emit_nop(ctx);
1143				} else { /* X */
1144					/* jump true */
1145					ctx->flags |= SEEN_A | SEEN_X;
1146					b_off = b_imm(i + inst->jt + 1,
1147						      ctx);
1148					emit_bcond(MIPS_COND_EQ, r_A, r_X,
1149						   b_off, ctx);
1150					emit_nop(ctx);
1151					/* jump false */
1152					b_off = b_imm(i + inst->jf + 1, ctx);
1153					emit_bcond(MIPS_COND_NE, r_A, r_X,
1154						   b_off, ctx);
1155					emit_nop(ctx);
1156				}
1157			}
1158			break;
1159		case BPF_JMP | BPF_JSET | BPF_K:
1160			ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A;
1161			/* pc += (A & K) ? pc -> jt : pc -> jf */
1162			emit_load_imm(r_s1, k, ctx);
1163			emit_and(r_s0, r_A, r_s1, ctx);
1164			/* jump true */
1165			b_off = b_imm(i + inst->jt + 1, ctx);
1166			emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1167			emit_nop(ctx);
1168			/* jump false */
1169			b_off = b_imm(i + inst->jf + 1, ctx);
1170			emit_b(b_off, ctx);
1171			emit_nop(ctx);
1172			break;
1173		case BPF_JMP | BPF_JSET | BPF_X:
1174			ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A;
1175			/* pc += (A & X) ? pc -> jt : pc -> jf */
1176			emit_and(r_s0, r_A, r_X, ctx);
1177			/* jump true */
1178			b_off = b_imm(i + inst->jt + 1, ctx);
1179			emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
1180			emit_nop(ctx);
1181			/* jump false */
1182			b_off = b_imm(i + inst->jf + 1, ctx);
1183			emit_b(b_off, ctx);
1184			emit_nop(ctx);
1185			break;
1186		case BPF_RET | BPF_A:
1187			ctx->flags |= SEEN_A;
1188			if (i != prog->len - 1)
1189				/*
1190				 * If this is not the last instruction
1191				 * then jump to the epilogue
1192				 */
1193				emit_b(b_imm(prog->len, ctx), ctx);
1194			emit_reg_move(r_ret, r_A, ctx); /* delay slot */
1195			break;
1196		case BPF_RET | BPF_K:
1197			/*
1198			 * It can emit two instructions so it does not fit on
1199			 * the delay slot.
1200			 */
1201			emit_load_imm(r_ret, k, ctx);
1202			if (i != prog->len - 1) {
1203				/*
1204				 * If this is not the last instruction
1205				 * then jump to the epilogue
1206				 */
1207				emit_b(b_imm(prog->len, ctx), ctx);
1208				emit_nop(ctx);
1209			}
1210			break;
1211		case BPF_MISC | BPF_TAX:
1212			/* X = A */
1213			ctx->flags |= SEEN_X | SEEN_A;
1214			emit_jit_reg_move(r_X, r_A, ctx);
1215			break;
1216		case BPF_MISC | BPF_TXA:
1217			/* A = X */
1218			ctx->flags |= SEEN_A | SEEN_X;
1219			emit_jit_reg_move(r_A, r_X, ctx);
1220			break;
1221		/* AUX */
1222		case BPF_ANC | SKF_AD_PROTOCOL:
1223			/* A = ntohs(skb->protocol */
1224			ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
1225			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1226						  protocol) != 2);
1227			off = offsetof(struct sk_buff, protocol);
1228			emit_half_load(r_A, r_skb, off, ctx);
1229#ifdef CONFIG_CPU_LITTLE_ENDIAN
1230			/* This needs little endian fixup */
1231			if (cpu_has_wsbh) {
1232				/* R2 and later have the wsbh instruction */
1233				emit_wsbh(r_A, r_A, ctx);
1234			} else {
1235				/* Get first byte */
1236				emit_andi(r_tmp_imm, r_A, 0xff, ctx);
1237				/* Shift it */
1238				emit_sll(r_tmp, r_tmp_imm, 8, ctx);
1239				/* Get second byte */
1240				emit_srl(r_tmp_imm, r_A, 8, ctx);
1241				emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
1242				/* Put everyting together in r_A */
1243				emit_or(r_A, r_tmp, r_tmp_imm, ctx);
1244			}
1245#endif
1246			break;
1247		case BPF_ANC | SKF_AD_CPU:
1248			ctx->flags |= SEEN_A | SEEN_OFF;
1249			/* A = current_thread_info()->cpu */
1250			BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
1251						  cpu) != 4);
1252			off = offsetof(struct thread_info, cpu);
1253			/* $28/gp points to the thread_info struct */
1254			emit_load(r_A, 28, off, ctx);
1255			break;
1256		case BPF_ANC | SKF_AD_IFINDEX:
1257			/* A = skb->dev->ifindex */
1258			ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0;
1259			off = offsetof(struct sk_buff, dev);
1260			/* Load *dev pointer */
1261			emit_load_ptr(r_s0, r_skb, off, ctx);
1262			/* error (0) in the delay slot */
1263			emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
1264				   b_imm(prog->len, ctx), ctx);
1265			emit_reg_move(r_ret, r_zero, ctx);
1266			BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
1267						  ifindex) != 4);
1268			off = offsetof(struct net_device, ifindex);
1269			emit_load(r_A, r_s0, off, ctx);
1270			break;
1271		case BPF_ANC | SKF_AD_MARK:
1272			ctx->flags |= SEEN_SKB | SEEN_A;
1273			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
1274			off = offsetof(struct sk_buff, mark);
1275			emit_load(r_A, r_skb, off, ctx);
1276			break;
1277		case BPF_ANC | SKF_AD_RXHASH:
1278			ctx->flags |= SEEN_SKB | SEEN_A;
1279			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
1280			off = offsetof(struct sk_buff, hash);
1281			emit_load(r_A, r_skb, off, ctx);
1282			break;
1283		case BPF_ANC | SKF_AD_VLAN_TAG:
1284		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
1285			ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A;
1286			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1287						  vlan_tci) != 2);
1288			off = offsetof(struct sk_buff, vlan_tci);
1289			emit_half_load(r_s0, r_skb, off, ctx);
1290			if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
1291				emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx);
1292			} else {
1293				emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx);
1294				/* return 1 if present */
1295				emit_sltu(r_A, r_zero, r_A, ctx);
1296			}
1297			break;
1298		case BPF_ANC | SKF_AD_PKTTYPE:
1299			ctx->flags |= SEEN_SKB;
1300
1301			emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
1302			/* Keep only the last 3 bits */
1303			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
1304#ifdef __BIG_ENDIAN_BITFIELD
1305			/* Get the actual packet type to the lower 3 bits */
1306			emit_srl(r_A, r_A, 5, ctx);
1307#endif
1308			break;
1309		case BPF_ANC | SKF_AD_QUEUE:
1310			ctx->flags |= SEEN_SKB | SEEN_A;
1311			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
1312						  queue_mapping) != 2);
1313			BUILD_BUG_ON(offsetof(struct sk_buff,
1314					      queue_mapping) > 0xff);
1315			off = offsetof(struct sk_buff, queue_mapping);
1316			emit_half_load(r_A, r_skb, off, ctx);
1317			break;
1318		default:
1319			pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
1320				 inst->code);
1321			return -1;
1322		}
1323	}
1324
1325	/* compute offsets only during the first pass */
1326	if (ctx->target == NULL)
1327		ctx->offsets[i] = ctx->idx * 4;
1328
1329	return 0;
1330}
1331
1332int bpf_jit_enable __read_mostly;
1333
1334void bpf_jit_compile(struct bpf_prog *fp)
1335{
1336	struct jit_ctx ctx;
1337	unsigned int alloc_size, tmp_idx;
1338
1339	if (!bpf_jit_enable)
1340		return;
1341
1342	memset(&ctx, 0, sizeof(ctx));
1343
1344	ctx.offsets = kcalloc(fp->len, sizeof(*ctx.offsets), GFP_KERNEL);
1345	if (ctx.offsets == NULL)
1346		return;
1347
1348	ctx.skf = fp;
1349
1350	if (build_body(&ctx))
1351		goto out;
1352
1353	tmp_idx = ctx.idx;
1354	build_prologue(&ctx);
1355	ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
1356	/* just to complete the ctx.idx count */
1357	build_epilogue(&ctx);
1358
1359	alloc_size = 4 * ctx.idx;
1360	ctx.target = module_alloc(alloc_size);
1361	if (ctx.target == NULL)
1362		goto out;
1363
1364	/* Clean it */
1365	memset(ctx.target, 0, alloc_size);
1366
1367	ctx.idx = 0;
1368
1369	/* Generate the actual JIT code */
1370	build_prologue(&ctx);
1371	build_body(&ctx);
1372	build_epilogue(&ctx);
1373
1374	/* Update the icache */
1375	flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
1376
1377	if (bpf_jit_enable > 1)
1378		/* Dump JIT code */
1379		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
1380
1381	fp->bpf_func = (void *)ctx.target;
1382	fp->jited = true;
1383
1384out:
1385	kfree(ctx.offsets);
1386}
1387
1388void bpf_jit_free(struct bpf_prog *fp)
1389{
1390	if (fp->jited)
1391		module_free(NULL, fp->bpf_func);
1392
1393	bpf_prog_unlock_free(fp);
1394}
1395