1cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza/*
2cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *    Stack-less Just-In-Time compiler
3cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *
4cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *
6cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * Redistribution and use in source and binary forms, with or without modification, are
7cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * permitted provided that the following conditions are met:
8cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *
9cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *   1. Redistributions of source code must retain the above copyright notice, this list of
10cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *      conditions and the following disclaimer.
11cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *
12cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *      of conditions and the following disclaimer in the documentation and/or other materials
14cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *      provided with the distribution.
15cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza *
16cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza */
26cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
27cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan StozaSLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza{
29cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza	return "ARM-64" SLJIT_CPUINFO;
30cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza}
31cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
32cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza/* Length of an instruction word */
33cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stozatypedef sljit_ui sljit_ins;
34cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
35cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_ZERO	0
36cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
37cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
38cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
39cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
40cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_REG4	(SLJIT_NUMBER_OF_REGISTERS + 5)
41cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_LR		(SLJIT_NUMBER_OF_REGISTERS + 6)
42cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_SP		(SLJIT_NUMBER_OF_REGISTERS + 7)
43cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
44cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_FREG1	(0)
45cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
46cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
47cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stozastatic SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
48cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza  31, 0, 1, 2, 3, 4, 5, 6, 7, 13, 14, 15, 16, 17, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 11, 12, 30, 31
49cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza};
50cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
51cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define W_OP (1 << 31)
52cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define RD(rd) (reg_map[rd])
53cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define RT(rt) (reg_map[rt])
54cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define RN(rn) (reg_map[rn] << 5)
55cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define RT2(rt2) (reg_map[rt2] << 10)
56cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define RM(rm) (reg_map[rm] << 16)
57cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define VD(vd) (vd)
58cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define VT(vt) (vt)
59cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define VN(vn) ((vn) << 5)
60cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define VM(vm) ((vm) << 16)
61cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
62cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza/* --------------------------------------------------------------------- */
63cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza/*  Instrucion forms                                                     */
64cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza/* --------------------------------------------------------------------- */
65cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza
66cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ADC 0x9a000000
67cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ADD 0x8b000000
68cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ADDI 0x91000000
69cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define AND 0x8a000000
70cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ANDI 0x92000000
71cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ASRV 0x9ac02800
72cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define B 0x14000000
73cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define B_CC 0x54000000
74cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define BL 0x94000000
75cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define BLR 0xd63f0000
76cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define BR 0xd61f0000
77cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define BRK 0xd4200000
78cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define CBZ 0xb4000000
79cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define CLZ 0xdac01000
80cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define CSINC 0x9a800400
81cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define EOR 0xca000000
82cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define EORI 0xd2000000
83cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FABS 0x1e60c000
84cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FADD 0x1e602800
85cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FCMP 0x1e602000
86cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FCVT 0x1e224000
87cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FCVTZS 0x9e780000
88cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FDIV 0x1e601800
89cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FMOV 0x1e604000
90cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FMUL 0x1e600800
91cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FNEG 0x1e614000
92cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define FSUB 0x1e603800
93cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define LDRI 0xf9400000
94cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define LDP 0xa9400000
95cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define LDP_PST 0xa8c00000
96cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define LSLV 0x9ac02000
97cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define LSRV 0x9ac02400
98cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define MADD 0x9b000000
99cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define MOVK 0xf2800000
100cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define MOVN 0x92800000
101cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define MOVZ 0xd2800000
102cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define NOP 0xd503201f
103cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ORN 0xaa200000
104cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ORR 0xaa000000
105cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define ORRI 0xb2000000
106cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define RET 0xd65f0000
107cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define SBC 0xda000000
108cb1fcdedaaf95acabeac6a2d5bff423d6ca62296Dan Stoza#define SBFM 0x93000000
109#define SCVTF 0x9e620000
110#define SDIV 0x9ac00c00
111#define SMADDL 0x9b200000
112#define SMULH 0x9b403c00
113#define STP 0xa9000000
114#define STP_PRE 0xa9800000
115#define STRI 0xf9000000
116#define STR_FI 0x3d000000
117#define STR_FR 0x3c206800
118#define STUR_FI 0x3c000000
119#define SUB 0xcb000000
120#define SUBI 0xd1000000
121#define SUBS 0xeb000000
122#define UBFM 0xd3000000
123#define UDIV 0x9ac00800
124#define UMULH 0x9bc03c00
125
126/* dest_reg is the absolute name of the register
127   Useful for reordering instructions in the delay slot. */
128static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
129{
130	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
131	FAIL_IF(!ptr);
132	*ptr = ins;
133	compiler->size++;
134	return SLJIT_SUCCESS;
135}
136
137static SLJIT_INLINE sljit_si emit_imm64_const(struct sljit_compiler *compiler, sljit_si dst, sljit_uw imm)
138{
139	FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
140	FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 16) & 0xffff) << 5) | (1 << 21)));
141	FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((imm >> 32) & 0xffff) << 5) | (2 << 21)));
142	return push_inst(compiler, MOVK | RD(dst) | ((imm >> 48) << 5) | (3 << 21));
143}
144
145static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm)
146{
147	sljit_si dst = inst[0] & 0x1f;
148	SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
149	inst[0] = MOVZ | dst | ((new_imm & 0xffff) << 5);
150	inst[1] = MOVK | dst | (((new_imm >> 16) & 0xffff) << 5) | (1 << 21);
151	inst[2] = MOVK | dst | (((new_imm >> 32) & 0xffff) << 5) | (2 << 21);
152	inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21);
153}
154
155static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
156{
157	sljit_sw diff;
158	sljit_uw target_addr;
159
160	if (jump->flags & SLJIT_REWRITABLE_JUMP) {
161		jump->flags |= PATCH_ABS64;
162		return 0;
163	}
164
165	if (jump->flags & JUMP_ADDR)
166		target_addr = jump->u.target;
167	else {
168		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
169		target_addr = (sljit_uw)(code + jump->u.label->size);
170	}
171	diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4);
172
173	if (jump->flags & IS_COND) {
174		diff += sizeof(sljit_ins);
175		if (diff <= 0xfffff && diff >= -0x100000) {
176			code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
177			jump->addr -= sizeof(sljit_ins);
178			jump->flags |= PATCH_COND;
179			return 5;
180		}
181		diff -= sizeof(sljit_ins);
182	}
183
184	if (diff <= 0x7ffffff && diff >= -0x8000000) {
185		jump->flags |= PATCH_B;
186		return 4;
187	}
188
189	if (target_addr <= 0xffffffffl) {
190		if (jump->flags & IS_COND)
191			code_ptr[-5] -= (2 << 5);
192		code_ptr[-2] = code_ptr[0];
193		return 2;
194	}
195	if (target_addr <= 0xffffffffffffl) {
196		if (jump->flags & IS_COND)
197			code_ptr[-5] -= (1 << 5);
198		jump->flags |= PATCH_ABS48;
199		code_ptr[-1] = code_ptr[0];
200		return 1;
201	}
202
203	jump->flags |= PATCH_ABS64;
204	return 0;
205}
206
207SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
208{
209	struct sljit_memory_fragment *buf;
210	sljit_ins *code;
211	sljit_ins *code_ptr;
212	sljit_ins *buf_ptr;
213	sljit_ins *buf_end;
214	sljit_uw word_count;
215	sljit_uw addr;
216	sljit_si dst;
217
218	struct sljit_label *label;
219	struct sljit_jump *jump;
220	struct sljit_const *const_;
221
222	CHECK_ERROR_PTR();
223	check_sljit_generate_code(compiler);
224	reverse_buf(compiler);
225
226	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
227	PTR_FAIL_WITH_EXEC_IF(code);
228	buf = compiler->buf;
229
230	code_ptr = code;
231	word_count = 0;
232	label = compiler->labels;
233	jump = compiler->jumps;
234	const_ = compiler->consts;
235
236	do {
237		buf_ptr = (sljit_ins*)buf->memory;
238		buf_end = buf_ptr + (buf->used_size >> 2);
239		do {
240			*code_ptr = *buf_ptr++;
241			/* These structures are ordered by their address. */
242			SLJIT_ASSERT(!label || label->size >= word_count);
243			SLJIT_ASSERT(!jump || jump->addr >= word_count);
244			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
245			if (label && label->size == word_count) {
246				label->addr = (sljit_uw)code_ptr;
247				label->size = code_ptr - code;
248				label = label->next;
249			}
250			if (jump && jump->addr == word_count) {
251					jump->addr = (sljit_uw)(code_ptr - 4);
252					code_ptr -= detect_jump_type(jump, code_ptr, code);
253					jump = jump->next;
254			}
255			if (const_ && const_->addr == word_count) {
256				const_->addr = (sljit_uw)code_ptr;
257				const_ = const_->next;
258			}
259			code_ptr ++;
260			word_count ++;
261		} while (buf_ptr < buf_end);
262
263		buf = buf->next;
264	} while (buf);
265
266	if (label && label->size == word_count) {
267		label->addr = (sljit_uw)code_ptr;
268		label->size = code_ptr - code;
269		label = label->next;
270	}
271
272	SLJIT_ASSERT(!label);
273	SLJIT_ASSERT(!jump);
274	SLJIT_ASSERT(!const_);
275	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
276
277	jump = compiler->jumps;
278	while (jump) {
279		do {
280			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
281			buf_ptr = (sljit_ins*)jump->addr;
282			if (jump->flags & PATCH_B) {
283				addr = (sljit_sw)(addr - jump->addr) >> 2;
284				SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000);
285				buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff);
286				if (jump->flags & IS_COND)
287					buf_ptr[-1] -= (4 << 5);
288				break;
289			}
290			if (jump->flags & PATCH_COND) {
291				addr = (sljit_sw)(addr - jump->addr) >> 2;
292				SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000);
293				buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5);
294				break;
295			}
296
297			SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || addr <= 0xffffffffl);
298			SLJIT_ASSERT((jump->flags & PATCH_ABS64) || addr <= 0xffffffffffffl);
299
300			dst = buf_ptr[0] & 0x1f;
301			buf_ptr[0] = MOVZ | dst | ((addr & 0xffff) << 5);
302			buf_ptr[1] = MOVK | dst | (((addr >> 16) & 0xffff) << 5) | (1 << 21);
303			if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
304				buf_ptr[2] = MOVK | dst | (((addr >> 32) & 0xffff) << 5) | (2 << 21);
305			if (jump->flags & PATCH_ABS64)
306				buf_ptr[3] = MOVK | dst | (((addr >> 48) & 0xffff) << 5) | (3 << 21);
307		} while (0);
308		jump = jump->next;
309	}
310
311	compiler->error = SLJIT_ERR_COMPILED;
312	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
313	SLJIT_CACHE_FLUSH(code, code_ptr);
314	return code;
315}
316
317/* --------------------------------------------------------------------- */
318/*  Core code generator functions.                                       */
319/* --------------------------------------------------------------------- */
320
321#define COUNT_TRAILING_ZERO(value, result) \
322	result = 0; \
323	if (!(value & 0xffffffff)) { \
324		result += 32; \
325		value >>= 32; \
326	} \
327	if (!(value & 0xffff)) { \
328		result += 16; \
329		value >>= 16; \
330	} \
331	if (!(value & 0xff)) { \
332		result += 8; \
333		value >>= 8; \
334	} \
335	if (!(value & 0xf)) { \
336		result += 4; \
337		value >>= 4; \
338	} \
339	if (!(value & 0x3)) { \
340		result += 2; \
341		value >>= 2; \
342	} \
343	if (!(value & 0x1)) { \
344		result += 1; \
345		value >>= 1; \
346	}
347
348#define LOGICAL_IMM_CHECK 0x100
349
350static sljit_ins logical_imm(sljit_sw imm, sljit_si len)
351{
352	sljit_si negated, ones, right;
353	sljit_uw mask, uimm;
354	sljit_ins ins;
355
356	if (len & LOGICAL_IMM_CHECK) {
357		len &= ~LOGICAL_IMM_CHECK;
358		if (len == 32 && (imm == 0 || imm == -1))
359			return 0;
360		if (len == 16 && ((sljit_si)imm == 0 || (sljit_si)imm == -1))
361			return 0;
362	}
363
364	SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
365		|| (len == 16 && (sljit_si)imm != 0 && (sljit_si)imm != -1));
366	uimm = (sljit_uw)imm;
367	while (1) {
368		if (len <= 0) {
369			SLJIT_ASSERT_STOP();
370			return 0;
371		}
372		mask = ((sljit_uw)1 << len) - 1;
373		if ((uimm & mask) != ((uimm >> len) & mask))
374			break;
375		len >>= 1;
376	}
377
378	len <<= 1;
379
380	negated = 0;
381	if (uimm & 0x1) {
382		negated = 1;
383		uimm = ~uimm;
384	}
385
386	if (len < 64)
387		uimm &= ((sljit_uw)1 << len) - 1;
388
389	/* Unsigned right shift. */
390	COUNT_TRAILING_ZERO(uimm, right);
391
392	/* Signed shift. We also know that the highest bit is set. */
393	imm = (sljit_sw)~uimm;
394	SLJIT_ASSERT(imm < 0);
395
396	COUNT_TRAILING_ZERO(imm, ones);
397
398	if (~imm)
399		return 0;
400
401	if (len == 64)
402		ins = 1 << 22;
403	else
404		ins = (0x3f - ((len << 1) - 1)) << 10;
405
406	if (negated)
407		return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
408
409	return ins | ((ones - 1) << 10) | ((len - right) << 16);
410}
411
412#undef COUNT_TRAILING_ZERO
413
414static sljit_si load_immediate(struct sljit_compiler *compiler, sljit_si dst, sljit_sw simm)
415{
416	sljit_uw imm = (sljit_uw)simm;
417	sljit_si i, zeros, ones, first;
418	sljit_ins bitmask;
419
420	if (imm <= 0xffff)
421		return push_inst(compiler, MOVZ | RD(dst) | (imm << 5));
422
423	if (simm >= -0x10000 && simm < 0)
424		return push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5));
425
426	if (imm <= 0xffffffffl) {
427		if ((imm & 0xffff0000l) == 0xffff0000)
428			return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff) << 5));
429		if ((imm & 0xffff) == 0xffff)
430			return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | ((~imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
431		bitmask = logical_imm(simm, 16);
432		if (bitmask != 0)
433			return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
434	}
435	else {
436		bitmask = logical_imm(simm, 32);
437		if (bitmask != 0)
438			return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
439	}
440
441	if (imm <= 0xffffffffl) {
442		FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((imm & 0xffff) << 5)));
443		return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
444	}
445
446	if (simm >= -0x100000000l && simm < 0) {
447		FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((~imm & 0xffff) << 5)));
448		return push_inst(compiler, MOVK | RD(dst) | ((imm & 0xffff0000l) >> (16 - 5)) | (1 << 21));
449	}
450
451	/* A large amount of number can be constructed from ORR and MOVx,
452	but computing them is costly. We don't  */
453
454	zeros = 0;
455	ones = 0;
456	for (i = 4; i > 0; i--) {
457		if ((simm & 0xffff) == 0)
458			zeros++;
459		if ((simm & 0xffff) == 0xffff)
460			ones++;
461		simm >>= 16;
462	}
463
464	simm = (sljit_sw)imm;
465	first = 1;
466	if (ones > zeros) {
467		simm = ~simm;
468		for (i = 0; i < 4; i++) {
469			if (!(simm & 0xffff)) {
470				simm >>= 16;
471				continue;
472			}
473			if (first) {
474				first = 0;
475				FAIL_IF(push_inst(compiler, MOVN | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
476			}
477			else
478				FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((~simm & 0xffff) << 5) | (i << 21)));
479			simm >>= 16;
480		}
481		return SLJIT_SUCCESS;
482	}
483
484	for (i = 0; i < 4; i++) {
485		if (!(simm & 0xffff)) {
486			simm >>= 16;
487			continue;
488		}
489		if (first) {
490			first = 0;
491			FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
492		}
493		else
494			FAIL_IF(push_inst(compiler, MOVK | RD(dst) | ((simm & 0xffff) << 5) | (i << 21)));
495		simm >>= 16;
496	}
497	return SLJIT_SUCCESS;
498}
499
500#define ARG1_IMM	0x0010000
501#define ARG2_IMM	0x0020000
502#define INT_OP		0x0040000
503#define SET_FLAGS	0x0080000
504#define UNUSED_RETURN	0x0100000
505#define SLOW_DEST	0x0200000
506#define SLOW_SRC1	0x0400000
507#define SLOW_SRC2	0x0800000
508
509#define CHECK_FLAGS(flag_bits) \
510	if (flags & SET_FLAGS) { \
511		inv_bits |= flag_bits; \
512		if (flags & UNUSED_RETURN) \
513			dst = TMP_ZERO; \
514	}
515
516static sljit_si emit_op_imm(struct sljit_compiler *compiler, sljit_si flags, sljit_si dst, sljit_sw arg1, sljit_sw arg2)
517{
518	/* dst must be register, TMP_REG1
519	   arg1 must be register, TMP_REG1, imm
520	   arg2 must be register, TMP_REG2, imm */
521	sljit_ins inv_bits = (flags & INT_OP) ? (1 << 31) : 0;
522	sljit_ins inst_bits;
523	sljit_si op = (flags & 0xffff);
524	sljit_si reg;
525	sljit_sw imm, nimm;
526
527	if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
528		/* Both are immediates. */
529		flags &= ~ARG1_IMM;
530		if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
531			arg1 = TMP_ZERO;
532		else {
533			FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
534			arg1 = TMP_REG1;
535		}
536	}
537
538	if (flags & (ARG1_IMM | ARG2_IMM)) {
539		reg = (flags & ARG2_IMM) ? arg1 : arg2;
540		imm = (flags & ARG2_IMM) ? arg2 : arg1;
541
542		switch (op) {
543		case SLJIT_MUL:
544		case SLJIT_NEG:
545		case SLJIT_CLZ:
546		case SLJIT_ADDC:
547		case SLJIT_SUBC:
548			/* No form with immediate operand (except imm 0, which
549			is represented by a ZERO register). */
550			break;
551		case SLJIT_MOV:
552			SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
553			return load_immediate(compiler, dst, imm);
554		case SLJIT_NOT:
555			SLJIT_ASSERT(flags & ARG2_IMM);
556			FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
557			goto set_flags;
558		case SLJIT_SUB:
559			if (flags & ARG1_IMM)
560				break;
561			imm = -imm;
562			/* Fall through. */
563		case SLJIT_ADD:
564			if (imm == 0) {
565				CHECK_FLAGS(1 << 29);
566				return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
567			}
568			if (imm > 0 && imm <= 0xfff) {
569				CHECK_FLAGS(1 << 29);
570				return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (imm << 10));
571			}
572			nimm = -imm;
573			if (nimm > 0 && nimm <= 0xfff) {
574				CHECK_FLAGS(1 << 29);
575				return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (nimm << 10));
576			}
577			if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
578				CHECK_FLAGS(1 << 29);
579				return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22));
580			}
581			if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
582				CHECK_FLAGS(1 << 29);
583				return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22));
584			}
585			if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
586				FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((imm >> 12) << 10) | (1 << 22)));
587				return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | ((imm & 0xfff) << 10));
588			}
589			if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
590				FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((nimm >> 12) << 10) | (1 << 22)));
591				return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | ((nimm & 0xfff) << 10));
592			}
593			break;
594		case SLJIT_AND:
595			inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
596			if (!inst_bits)
597				break;
598			CHECK_FLAGS(3 << 29);
599			return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
600		case SLJIT_OR:
601		case SLJIT_XOR:
602			inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
603			if (!inst_bits)
604				break;
605			if (op == SLJIT_OR)
606				inst_bits |= ORRI;
607			else
608				inst_bits |= EORI;
609			FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
610			goto set_flags;
611		case SLJIT_SHL:
612			if (flags & ARG1_IMM)
613				break;
614			if (flags & INT_OP) {
615				imm &= 0x1f;
616				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | ((-imm & 0x1f) << 16) | ((31 - imm) << 10)));
617			}
618			else {
619				imm &= 0x3f;
620				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | ((-imm & 0x3f) << 16) | ((63 - imm) << 10)));
621			}
622			goto set_flags;
623		case SLJIT_LSHR:
624		case SLJIT_ASHR:
625			if (flags & ARG1_IMM)
626				break;
627			if (op == SLJIT_ASHR)
628				inv_bits |= 1 << 30;
629			if (flags & INT_OP) {
630				imm &= 0x1f;
631				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (imm << 16) | (31 << 10)));
632			}
633			else {
634				imm &= 0x3f;
635				FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) | (imm << 16) | (63 << 10)));
636			}
637			goto set_flags;
638		default:
639			SLJIT_ASSERT_STOP();
640			break;
641		}
642
643		if (flags & ARG2_IMM) {
644			if (arg2 == 0)
645				arg2 = TMP_ZERO;
646			else {
647				FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
648				arg2 = TMP_REG2;
649			}
650		}
651		else {
652			if (arg1 == 0)
653				arg1 = TMP_ZERO;
654			else {
655				FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
656				arg1 = TMP_REG1;
657			}
658		}
659	}
660
661	/* Both arguments are registers. */
662	switch (op) {
663	case SLJIT_MOV:
664	case SLJIT_MOV_P:
665	case SLJIT_MOVU:
666	case SLJIT_MOVU_P:
667		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
668		if (dst == arg2)
669			return SLJIT_SUCCESS;
670		return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
671	case SLJIT_MOV_UB:
672	case SLJIT_MOVU_UB:
673		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
674		return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (7 << 10));
675	case SLJIT_MOV_SB:
676	case SLJIT_MOVU_SB:
677		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
678		if (!(flags & INT_OP))
679			inv_bits |= 1 << 22;
680		return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
681	case SLJIT_MOV_UH:
682	case SLJIT_MOVU_UH:
683		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
684		return push_inst(compiler, (UBFM ^ (1 << 31)) | RD(dst) | RN(arg2) | (15 << 10));
685	case SLJIT_MOV_SH:
686	case SLJIT_MOVU_SH:
687		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
688		if (!(flags & INT_OP))
689			inv_bits |= 1 << 22;
690		return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
691	case SLJIT_MOV_UI:
692	case SLJIT_MOVU_UI:
693		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
694		if ((flags & INT_OP) && dst == arg2)
695			return SLJIT_SUCCESS;
696		return push_inst(compiler, (ORR ^ (1 << 31)) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
697	case SLJIT_MOV_SI:
698	case SLJIT_MOVU_SI:
699		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
700		if ((flags & INT_OP) && dst == arg2)
701			return SLJIT_SUCCESS;
702		return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
703	case SLJIT_NOT:
704		SLJIT_ASSERT(arg1 == TMP_REG1);
705		FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
706		goto set_flags;
707	case SLJIT_NEG:
708		SLJIT_ASSERT(arg1 == TMP_REG1);
709		if (flags & SET_FLAGS)
710			inv_bits |= 1 << 29;
711		return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
712	case SLJIT_CLZ:
713		SLJIT_ASSERT(arg1 == TMP_REG1);
714		FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)));
715		goto set_flags;
716	case SLJIT_ADD:
717		CHECK_FLAGS(1 << 29);
718		return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
719	case SLJIT_ADDC:
720		CHECK_FLAGS(1 << 29);
721		return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
722	case SLJIT_SUB:
723		CHECK_FLAGS(1 << 29);
724		return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
725	case SLJIT_SUBC:
726		CHECK_FLAGS(1 << 29);
727		return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
728	case SLJIT_MUL:
729		if (!(flags & SET_FLAGS))
730			return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
731		if (flags & INT_OP) {
732			FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
733			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG4) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
734			return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_REG4) | RM(dst) | (2 << 22) | (63 << 10));
735		}
736		FAIL_IF(push_inst(compiler, SMULH | RD(TMP_REG4) | RN(arg1) | RM(arg2)));
737		FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
738		return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_REG4) | RM(dst) | (2 << 22) | (63 << 10));
739	case SLJIT_AND:
740		CHECK_FLAGS(3 << 29);
741		return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
742	case SLJIT_OR:
743		FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
744		goto set_flags;
745	case SLJIT_XOR:
746		FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
747		goto set_flags;
748	case SLJIT_SHL:
749		FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
750		goto set_flags;
751	case SLJIT_LSHR:
752		FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
753		goto set_flags;
754	case SLJIT_ASHR:
755		FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
756		goto set_flags;
757	}
758
759	SLJIT_ASSERT_STOP();
760	return SLJIT_SUCCESS;
761
762set_flags:
763	if (flags & SET_FLAGS)
764		return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
765	return SLJIT_SUCCESS;
766}
767
768#define STORE		0x01
769#define SIGNED		0x02
770
771#define UPDATE		0x04
772#define ARG_TEST	0x08
773
774#define BYTE_SIZE	0x000
775#define HALF_SIZE	0x100
776#define INT_SIZE	0x200
777#define WORD_SIZE	0x300
778
779#define MEM_SIZE_SHIFT(flags) ((flags) >> 8)
780
781static SLJIT_CONST sljit_ins sljit_mem_imm[4] = {
782/* u l */ 0x39400000 /* ldrb [reg,imm] */,
783/* u s */ 0x39000000 /* strb [reg,imm] */,
784/* s l */ 0x39800000 /* ldrsb [reg,imm] */,
785/* s s */ 0x39000000 /* strb [reg,imm] */,
786};
787
788static SLJIT_CONST sljit_ins sljit_mem_simm[4] = {
789/* u l */ 0x38400000 /* ldurb [reg,imm] */,
790/* u s */ 0x38000000 /* sturb [reg,imm] */,
791/* s l */ 0x38800000 /* ldursb [reg,imm] */,
792/* s s */ 0x38000000 /* sturb [reg,imm] */,
793};
794
795static SLJIT_CONST sljit_ins sljit_mem_pre_simm[4] = {
796/* u l */ 0x38400c00 /* ldrb [reg,imm]! */,
797/* u s */ 0x38000c00 /* strb [reg,imm]! */,
798/* s l */ 0x38800c00 /* ldrsb [reg,imm]! */,
799/* s s */ 0x38000c00 /* strb [reg,imm]! */,
800};
801
802static SLJIT_CONST sljit_ins sljit_mem_reg[4] = {
803/* u l */ 0x38606800 /* ldrb [reg,reg] */,
804/* u s */ 0x38206800 /* strb [reg,reg] */,
805/* s l */ 0x38a06800 /* ldrsb [reg,reg] */,
806/* s s */ 0x38206800 /* strb [reg,reg] */,
807};
808
809/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
810static sljit_si emit_set_delta(struct sljit_compiler *compiler, sljit_si dst, sljit_si reg, sljit_sw value)
811{
812	if (value >= 0) {
813		if (value <= 0xfff)
814			return push_inst(compiler, ADDI | RD(dst) | RN(reg) | (value << 10));
815		if (value <= 0xffffff && !(value & 0xfff))
816			return push_inst(compiler, ADDI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
817	}
818	else {
819		value = -value;
820		if (value <= 0xfff)
821			return push_inst(compiler, SUBI | RD(dst) | RN(reg) | (value << 10));
822		if (value <= 0xffffff && !(value & 0xfff))
823			return push_inst(compiler, SUBI | (1 << 22) | RD(dst) | RN(reg) | (value >> 2));
824	}
825	return SLJIT_ERR_UNSUPPORTED;
826}
827
828/* Can perform an operation using at most 1 instruction. */
829static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
830{
831	sljit_ui shift = MEM_SIZE_SHIFT(flags);
832
833	SLJIT_ASSERT(arg & SLJIT_MEM);
834
835	if (SLJIT_UNLIKELY(flags & UPDATE)) {
836		if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 255 && argw >= -256) {
837			if (SLJIT_UNLIKELY(flags & ARG_TEST))
838				return 1;
839
840			arg &= REG_MASK;
841			argw &= 0x1ff;
842			FAIL_IF(push_inst(compiler, sljit_mem_pre_simm[flags & 0x3]
843				| (shift << 30) | RT(reg) | RN(arg) | (argw << 12)));
844			return -1;
845		}
846		return 0;
847	}
848
849	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
850		argw &= 0x3;
851		if (argw && argw != shift)
852			return 0;
853
854		if (SLJIT_UNLIKELY(flags & ARG_TEST))
855			return 1;
856
857		FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg)
858			| RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0)));
859		return -1;
860	}
861
862	arg &= REG_MASK;
863	if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) {
864		if (SLJIT_UNLIKELY(flags & ARG_TEST))
865			return 1;
866
867		FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
868			| RT(reg) | RN(arg) | (argw << (10 - shift))));
869		return -1;
870	}
871
872	if (argw > 255 || argw < -256)
873		return 0;
874
875	if (SLJIT_UNLIKELY(flags & ARG_TEST))
876		return 1;
877
878	FAIL_IF(push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
879		| RT(reg) | RN(arg) | ((argw & 0x1ff) << 12)));
880	return -1;
881}
882
883/* see getput_arg below.
884   Note: can_cache is called only for binary operators. Those
885   operators always uses word arguments without write back. */
886static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
887{
888	sljit_sw diff;
889	if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM))
890		return 0;
891
892	if (!(arg & REG_MASK)) {
893		diff = argw - next_argw;
894		if (diff <= 0xfff && diff >= -0xfff)
895			return 1;
896		return 0;
897	}
898
899	if (argw == next_argw)
900		return 1;
901
902	diff = argw - next_argw;
903	if (arg == next_arg && diff <= 0xfff && diff >= -0xfff)
904		return 1;
905
906	return 0;
907}
908
909/* Emit the necessary instructions. See can_cache above. */
910static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg,
911	sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
912{
913	sljit_ui shift = MEM_SIZE_SHIFT(flags);
914	sljit_si tmp_r, other_r;
915	sljit_sw diff;
916
917	SLJIT_ASSERT(arg & SLJIT_MEM);
918	if (!(next_arg & SLJIT_MEM)) {
919		next_arg = 0;
920		next_argw = 0;
921	}
922
923	tmp_r = (flags & STORE) ? TMP_REG3 : reg;
924
925	if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) {
926		/* Update only applies if a base register exists. */
927		other_r = OFFS_REG(arg);
928		if (!other_r) {
929			other_r = arg & REG_MASK;
930			if (other_r != reg && argw >= 0 && argw <= 0xffffff) {
931				if ((argw & 0xfff) != 0)
932					FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
933				if (argw >> 12)
934					FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
935				return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
936			}
937			else if (other_r != reg && argw < 0 && argw >= -0xffffff) {
938				argw = -argw;
939				if ((argw & 0xfff) != 0)
940					FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10)));
941				if (argw >> 12)
942					FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10)));
943				return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r));
944			}
945
946			if (compiler->cache_arg == SLJIT_MEM) {
947				if (argw == compiler->cache_argw) {
948					other_r = TMP_REG3;
949					argw = 0;
950				}
951				else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
952					FAIL_IF(compiler->error);
953					compiler->cache_argw = argw;
954					other_r = TMP_REG3;
955					argw = 0;
956				}
957			}
958
959			if (argw) {
960				FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
961				compiler->cache_arg = SLJIT_MEM;
962				compiler->cache_argw = argw;
963				other_r = TMP_REG3;
964				argw = 0;
965			}
966		}
967
968		/* No caching here. */
969		arg &= REG_MASK;
970		argw &= 0x3;
971		if (!argw || argw == shift) {
972			FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0)));
973			return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10));
974		}
975		if (arg != reg) {
976			FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)));
977			return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
978		}
979		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG4) | RN(arg) | RM(other_r) | (argw << 10)));
980		FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG4)));
981		return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_REG4));
982	}
983
984	if (arg & OFFS_REG_MASK) {
985		other_r = OFFS_REG(arg);
986		arg &= REG_MASK;
987		FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RN(arg) | RM(other_r) | ((argw & 0x3) << 10)));
988		return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(tmp_r));
989	}
990
991	if (compiler->cache_arg == arg) {
992		diff = argw - compiler->cache_argw;
993		if (diff <= 255 && diff >= -256)
994			return push_inst(compiler, sljit_mem_simm[flags & 0x3] | (shift << 30)
995				| RT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
996		if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) {
997			FAIL_IF(compiler->error);
998			return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg));
999		}
1000	}
1001
1002	if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) {
1003		FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10)));
1004		return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30)
1005			| RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift)));
1006	}
1007
1008	diff = argw - next_argw;
1009	next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0;
1010	arg &= REG_MASK;
1011
1012	if (arg && compiler->cache_arg == SLJIT_MEM) {
1013		if (compiler->cache_argw == argw)
1014			return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1015		if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1016			FAIL_IF(compiler->error);
1017			compiler->cache_argw = argw;
1018			return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1019		}
1020	}
1021
1022	compiler->cache_argw = argw;
1023	if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
1024		FAIL_IF(compiler->error);
1025		compiler->cache_arg = SLJIT_MEM | arg;
1026		arg = 0;
1027	}
1028	else {
1029		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1030		compiler->cache_arg = SLJIT_MEM;
1031
1032		if (next_arg) {
1033			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RN(TMP_REG3) | RM(arg)));
1034			compiler->cache_arg = SLJIT_MEM | arg;
1035			arg = 0;
1036		}
1037	}
1038
1039	if (arg)
1040		return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3));
1041	return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_REG3));
1042}
1043
1044static SLJIT_INLINE sljit_si emit_op_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
1045{
1046	if (getput_arg_fast(compiler, flags, reg, arg, argw))
1047		return compiler->error;
1048	compiler->cache_arg = 0;
1049	compiler->cache_argw = 0;
1050	return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
1051}
1052
1053static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1054{
1055	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1056		return compiler->error;
1057	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1058}
1059
1060/* --------------------------------------------------------------------- */
1061/*  Entry, exit                                                          */
1062/* --------------------------------------------------------------------- */
1063
1064SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
1065	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
1066	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
1067{
1068	sljit_si i, tmp, offs, prev;
1069
1070	CHECK_ERROR();
1071	check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1072
1073	compiler->options = options;
1074	compiler->scratches = scratches;
1075	compiler->saveds = saveds;
1076	compiler->fscratches = fscratches;
1077	compiler->fsaveds = fsaveds;
1078#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1079	compiler->logical_local_size = local_size;
1080#endif
1081	compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
1082	local_size = (compiler->locals_offset + local_size + 15) & ~15;
1083	compiler->local_size = local_size;
1084
1085	if (local_size <= (64 << 3))
1086		FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
1087			| RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
1088	else {
1089		local_size -= (64 << 3);
1090		if (local_size > 0xfff) {
1091			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
1092			local_size &= 0xfff;
1093		}
1094		if (local_size)
1095			FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
1096		FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x40 << 15)));
1097	}
1098
1099	FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP)));
1100
1101	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
1102	offs = 2 << 15;
1103	prev = -1;
1104	for (i = SLJIT_S0; i >= tmp; i--) {
1105		if (prev == -1) {
1106			prev = i;
1107			continue;
1108		}
1109		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1110		offs += 2 << 15;
1111		prev = -1;
1112	}
1113
1114	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1115		if (prev == -1) {
1116			prev = i;
1117			continue;
1118		}
1119		FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1120		offs += 2 << 15;
1121		prev = -1;
1122	}
1123
1124	if (prev != -1)
1125		FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
1126
1127	if (args >= 1)
1128		FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1129	if (args >= 2)
1130		FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S1) | RN(TMP_ZERO) | RM(SLJIT_R1)));
1131	if (args >= 3)
1132		FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
1133
1134	return SLJIT_SUCCESS;
1135}
1136
1137SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
1138	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
1139	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
1140{
1141	CHECK_ERROR_VOID();
1142	check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1143
1144	compiler->options = options;
1145	compiler->scratches = scratches;
1146	compiler->saveds = saveds;
1147	compiler->fscratches = fscratches;
1148	compiler->fsaveds = fsaveds;
1149#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
1150	compiler->logical_local_size = local_size;
1151#endif
1152	compiler->locals_offset = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
1153	compiler->local_size = (compiler->locals_offset + local_size + 15) & ~15;
1154}
1155
1156SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
1157{
1158	sljit_si local_size;
1159	sljit_si i, tmp, offs, prev;
1160
1161	CHECK_ERROR();
1162	check_sljit_emit_return(compiler, op, src, srcw);
1163
1164	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1165
1166	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1167	offs = 2 << 15;
1168	prev = -1;
1169	for (i = SLJIT_S0; i >= tmp; i--) {
1170		if (prev == -1) {
1171			prev = i;
1172			continue;
1173		}
1174		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1175		offs += 2 << 15;
1176		prev = -1;
1177	}
1178
1179	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1180		if (prev == -1) {
1181			prev = i;
1182			continue;
1183		}
1184		FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
1185		offs += 2 << 15;
1186		prev = -1;
1187	}
1188
1189	if (prev != -1)
1190		FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5)));
1191
1192	local_size = compiler->local_size;
1193
1194	if (local_size <= (62 << 3))
1195		FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
1196			| RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
1197	else {
1198		FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) | RN(TMP_SP) | (0x3e << 15)));
1199		local_size -= (62 << 3);
1200		if (local_size > 0xfff) {
1201			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
1202			local_size &= 0xfff;
1203		}
1204		if (local_size)
1205			FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
1206	}
1207
1208	FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
1209	return SLJIT_SUCCESS;
1210}
1211
1212/* --------------------------------------------------------------------- */
1213/*  Operators                                                            */
1214/* --------------------------------------------------------------------- */
1215
1216SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
1217{
1218	sljit_ins inv_bits = (op & SLJIT_INT_OP) ? (1 << 31) : 0;
1219
1220	CHECK_ERROR();
1221	check_sljit_emit_op0(compiler, op);
1222
1223	op = GET_OPCODE(op);
1224	switch (op) {
1225	case SLJIT_BREAKPOINT:
1226		return push_inst(compiler, BRK);
1227	case SLJIT_NOP:
1228		return push_inst(compiler, NOP);
1229	case SLJIT_UMUL:
1230	case SLJIT_SMUL:
1231		FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1232		FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1233		return push_inst(compiler, (op == SLJIT_SMUL ? SMULH : UMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1234	case SLJIT_UDIV:
1235	case SLJIT_SDIV:
1236		FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
1237		FAIL_IF(push_inst(compiler, ((op == SLJIT_SDIV ? SDIV : UDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
1238		FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1239		return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1240	}
1241
1242	return SLJIT_SUCCESS;
1243}
1244
1245SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1246	sljit_si dst, sljit_sw dstw,
1247	sljit_si src, sljit_sw srcw)
1248{
1249	sljit_si dst_r, flags, mem_flags;
1250	sljit_si op_flags = GET_ALL_FLAGS(op);
1251
1252	CHECK_ERROR();
1253	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1254	ADJUST_LOCAL_OFFSET(dst, dstw);
1255	ADJUST_LOCAL_OFFSET(src, srcw);
1256
1257	compiler->cache_arg = 0;
1258	compiler->cache_argw = 0;
1259
1260	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1261
1262	op = GET_OPCODE(op);
1263	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1264		switch (op) {
1265		case SLJIT_MOV:
1266		case SLJIT_MOV_P:
1267			flags = WORD_SIZE;
1268			break;
1269		case SLJIT_MOV_UB:
1270			flags = BYTE_SIZE;
1271			if (src & SLJIT_IMM)
1272				srcw = (sljit_ub)srcw;
1273			break;
1274		case SLJIT_MOV_SB:
1275			flags = BYTE_SIZE | SIGNED;
1276			if (src & SLJIT_IMM)
1277				srcw = (sljit_sb)srcw;
1278			break;
1279		case SLJIT_MOV_UH:
1280			flags = HALF_SIZE;
1281			if (src & SLJIT_IMM)
1282				srcw = (sljit_uh)srcw;
1283			break;
1284		case SLJIT_MOV_SH:
1285			flags = HALF_SIZE | SIGNED;
1286			if (src & SLJIT_IMM)
1287				srcw = (sljit_sh)srcw;
1288			break;
1289		case SLJIT_MOV_UI:
1290			flags = INT_SIZE;
1291			if (src & SLJIT_IMM)
1292				srcw = (sljit_ui)srcw;
1293			break;
1294		case SLJIT_MOV_SI:
1295			flags = INT_SIZE | SIGNED;
1296			if (src & SLJIT_IMM)
1297				srcw = (sljit_si)srcw;
1298			break;
1299		case SLJIT_MOVU:
1300		case SLJIT_MOVU_P:
1301			flags = WORD_SIZE | UPDATE;
1302			break;
1303		case SLJIT_MOVU_UB:
1304			flags = BYTE_SIZE | UPDATE;
1305			if (src & SLJIT_IMM)
1306				srcw = (sljit_ub)srcw;
1307			break;
1308		case SLJIT_MOVU_SB:
1309			flags = BYTE_SIZE | SIGNED | UPDATE;
1310			if (src & SLJIT_IMM)
1311				srcw = (sljit_sb)srcw;
1312			break;
1313		case SLJIT_MOVU_UH:
1314			flags = HALF_SIZE | UPDATE;
1315			if (src & SLJIT_IMM)
1316				srcw = (sljit_uh)srcw;
1317			break;
1318		case SLJIT_MOVU_SH:
1319			flags = HALF_SIZE | SIGNED | UPDATE;
1320			if (src & SLJIT_IMM)
1321				srcw = (sljit_sh)srcw;
1322			break;
1323		case SLJIT_MOVU_UI:
1324			flags = INT_SIZE | UPDATE;
1325			if (src & SLJIT_IMM)
1326				srcw = (sljit_ui)srcw;
1327			break;
1328		case SLJIT_MOVU_SI:
1329			flags = INT_SIZE | SIGNED | UPDATE;
1330			if (src & SLJIT_IMM)
1331				srcw = (sljit_si)srcw;
1332			break;
1333		default:
1334			SLJIT_ASSERT_STOP();
1335			flags = 0;
1336			break;
1337		}
1338
1339		if (src & SLJIT_IMM)
1340			FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1341		else if (src & SLJIT_MEM) {
1342			if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
1343				FAIL_IF(compiler->error);
1344			else
1345				FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
1346		} else {
1347			if (dst_r != TMP_REG1)
1348				return emit_op_imm(compiler, op | ((op_flags & SLJIT_INT_OP) ? INT_OP : 0), dst_r, TMP_REG1, src);
1349			dst_r = src;
1350		}
1351
1352		if (dst & SLJIT_MEM) {
1353			if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1354				return compiler->error;
1355			else
1356				return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1357		}
1358		return SLJIT_SUCCESS;
1359	}
1360
1361	flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0;
1362	mem_flags = WORD_SIZE;
1363	if (op_flags & SLJIT_INT_OP) {
1364		flags |= INT_OP;
1365		mem_flags = INT_SIZE;
1366	}
1367
1368	if (dst == SLJIT_UNUSED)
1369		flags |= UNUSED_RETURN;
1370
1371	if (src & SLJIT_MEM) {
1372		if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src, srcw))
1373			FAIL_IF(compiler->error);
1374		else
1375			FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src, srcw, dst, dstw));
1376		src = TMP_REG2;
1377	}
1378
1379	if (src & SLJIT_IMM) {
1380		flags |= ARG2_IMM;
1381		if (op_flags & SLJIT_INT_OP)
1382			srcw = (sljit_si)srcw;
1383	} else
1384		srcw = src;
1385
1386	emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw);
1387
1388	if (dst & SLJIT_MEM) {
1389		if (getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw))
1390			return compiler->error;
1391		else
1392			return getput_arg(compiler, mem_flags | STORE, dst_r, dst, dstw, 0, 0);
1393	}
1394	return SLJIT_SUCCESS;
1395}
1396
1397SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
1398	sljit_si dst, sljit_sw dstw,
1399	sljit_si src1, sljit_sw src1w,
1400	sljit_si src2, sljit_sw src2w)
1401{
1402	sljit_si dst_r, flags, mem_flags;
1403
1404	CHECK_ERROR();
1405	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1406	ADJUST_LOCAL_OFFSET(dst, dstw);
1407	ADJUST_LOCAL_OFFSET(src1, src1w);
1408	ADJUST_LOCAL_OFFSET(src2, src2w);
1409
1410	compiler->cache_arg = 0;
1411	compiler->cache_argw = 0;
1412
1413	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1414	flags = GET_FLAGS(op) ? SET_FLAGS : 0;
1415	mem_flags = WORD_SIZE;
1416	if (op & SLJIT_INT_OP) {
1417		flags |= INT_OP;
1418		mem_flags = INT_SIZE;
1419	}
1420
1421	if (dst == SLJIT_UNUSED)
1422		flags |= UNUSED_RETURN;
1423
1424	if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, mem_flags | STORE | ARG_TEST, TMP_REG1, dst, dstw))
1425		flags |= SLOW_DEST;
1426
1427	if (src1 & SLJIT_MEM) {
1428		if (getput_arg_fast(compiler, mem_flags, TMP_REG1, src1, src1w))
1429			FAIL_IF(compiler->error);
1430		else
1431			flags |= SLOW_SRC1;
1432	}
1433	if (src2 & SLJIT_MEM) {
1434		if (getput_arg_fast(compiler, mem_flags, TMP_REG2, src2, src2w))
1435			FAIL_IF(compiler->error);
1436		else
1437			flags |= SLOW_SRC2;
1438	}
1439
1440	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1441		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1442			FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, src1, src1w));
1443			FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
1444		}
1445		else {
1446			FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, src2, src2w));
1447			FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
1448		}
1449	}
1450	else if (flags & SLOW_SRC1)
1451		FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG1, src1, src1w, dst, dstw));
1452	else if (flags & SLOW_SRC2)
1453		FAIL_IF(getput_arg(compiler, mem_flags, TMP_REG2, src2, src2w, dst, dstw));
1454
1455	if (src1 & SLJIT_MEM)
1456		src1 = TMP_REG1;
1457	if (src2 & SLJIT_MEM)
1458		src2 = TMP_REG2;
1459
1460	if (src1 & SLJIT_IMM)
1461		flags |= ARG1_IMM;
1462	else
1463		src1w = src1;
1464	if (src2 & SLJIT_IMM)
1465		flags |= ARG2_IMM;
1466	else
1467		src2w = src2;
1468
1469	emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1470
1471	if (dst & SLJIT_MEM) {
1472		if (!(flags & SLOW_DEST)) {
1473			getput_arg_fast(compiler, mem_flags | STORE, dst_r, dst, dstw);
1474			return compiler->error;
1475		}
1476		return getput_arg(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
1477	}
1478
1479	return SLJIT_SUCCESS;
1480}
1481
1482SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
1483{
1484	check_sljit_get_register_index(reg);
1485	return reg_map[reg];
1486}
1487
1488SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
1489{
1490	check_sljit_get_float_register_index(reg);
1491	return reg;
1492}
1493
1494SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
1495	void *instruction, sljit_si size)
1496{
1497	CHECK_ERROR();
1498	check_sljit_emit_op_custom(compiler, instruction, size);
1499	SLJIT_ASSERT(size == 4);
1500
1501	return push_inst(compiler, *(sljit_ins*)instruction);
1502}
1503
1504/* --------------------------------------------------------------------- */
1505/*  Floating point operators                                             */
1506/* --------------------------------------------------------------------- */
1507
1508SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
1509{
1510#ifdef SLJIT_IS_FPU_AVAILABLE
1511	return SLJIT_IS_FPU_AVAILABLE;
1512#else
1513	/* Available by default. */
1514	return 1;
1515#endif
1516}
1517
1518static sljit_si emit_fop_mem(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg, sljit_sw argw)
1519{
1520	sljit_ui shift = MEM_SIZE_SHIFT(flags);
1521	sljit_ins ins_bits = (shift << 30);
1522	sljit_si other_r;
1523	sljit_sw diff;
1524
1525	SLJIT_ASSERT(arg & SLJIT_MEM);
1526
1527	if (!(flags & STORE))
1528		ins_bits |= 1 << 22;
1529
1530	if (arg & OFFS_REG_MASK) {
1531		argw &= 3;
1532		if (!argw || argw == shift)
1533			return push_inst(compiler, STR_FR | ins_bits | VT(reg)
1534				| RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
1535		other_r = OFFS_REG(arg);
1536		arg &= REG_MASK;
1537		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | RM(other_r) | (argw << 10)));
1538		arg = TMP_REG1;
1539		argw = 0;
1540	}
1541
1542	arg &= REG_MASK;
1543	if (arg && argw >= 0 && ((argw >> shift) <= 0xfff) && (argw & ((1 << shift) - 1)) == 0)
1544		return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(arg) | (argw << (10 - shift)));
1545
1546	if (arg && argw <= 255 && argw >= -256)
1547		return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(arg) | ((argw & 0x1ff) << 12));
1548
1549	/* Slow cases */
1550	if (compiler->cache_arg == SLJIT_MEM && argw != compiler->cache_argw) {
1551		diff = argw - compiler->cache_argw;
1552		if (!arg && diff <= 255 && diff >= -256)
1553			return push_inst(compiler, STUR_FI | ins_bits | VT(reg) | RN(TMP_REG3) | ((diff & 0x1ff) << 12));
1554		if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1555			FAIL_IF(compiler->error);
1556			compiler->cache_argw = argw;
1557		}
1558	}
1559
1560	if (compiler->cache_arg != SLJIT_MEM || argw != compiler->cache_argw) {
1561		compiler->cache_arg = SLJIT_MEM;
1562		compiler->cache_argw = argw;
1563		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1564	}
1565
1566	if (arg & REG_MASK)
1567		return push_inst(compiler, STR_FR | ins_bits | VT(reg) | RN(arg) | RM(TMP_REG3));
1568	return push_inst(compiler, STR_FI | ins_bits | VT(reg) | RN(TMP_REG3));
1569}
1570
1571static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
1572	sljit_si dst, sljit_sw dstw,
1573	sljit_si src, sljit_sw srcw)
1574{
1575	sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1576	sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
1577
1578	if (GET_OPCODE(op) == SLJIT_CONVI_FROMD)
1579		inv_bits |= (1 << 31);
1580
1581	if (src & SLJIT_MEM) {
1582		emit_fop_mem(compiler, (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
1583		src = TMP_FREG1;
1584	}
1585
1586	FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
1587
1588	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
1589		return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVI_FROMD) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw);
1590	return SLJIT_SUCCESS;
1591}
1592
1593static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
1594	sljit_si dst, sljit_sw dstw,
1595	sljit_si src, sljit_sw srcw)
1596{
1597	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1598	sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
1599
1600	if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
1601		inv_bits |= (1 << 31);
1602
1603	if (src & SLJIT_MEM) {
1604		emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONVD_FROMI) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw);
1605		src = TMP_REG1;
1606	} else if (src & SLJIT_IMM) {
1607#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1608		if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
1609			srcw = (sljit_si)srcw;
1610#endif
1611		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1612		src = TMP_REG1;
1613	}
1614
1615	FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
1616
1617	if (dst & SLJIT_MEM)
1618		return emit_fop_mem(compiler, ((op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
1619	return SLJIT_SUCCESS;
1620}
1621
1622static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
1623	sljit_si src1, sljit_sw src1w,
1624	sljit_si src2, sljit_sw src2w)
1625{
1626	sljit_si mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
1627	sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
1628
1629	if (src1 & SLJIT_MEM) {
1630		emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
1631		src1 = TMP_FREG1;
1632	}
1633
1634	if (src2 & SLJIT_MEM) {
1635		emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
1636		src2 = TMP_FREG2;
1637	}
1638
1639	return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
1640}
1641
1642SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
1643	sljit_si dst, sljit_sw dstw,
1644	sljit_si src, sljit_sw srcw)
1645{
1646	sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
1647	sljit_ins inv_bits;
1648
1649	CHECK_ERROR();
1650	compiler->cache_arg = 0;
1651	compiler->cache_argw = 0;
1652
1653	SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x100) == WORD_SIZE, must_be_one_bit_difference);
1654	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1655
1656	inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
1657	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1658
1659	if (src & SLJIT_MEM) {
1660		emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONVD_FROMS) ? (mem_flags ^ 0x100) : mem_flags, dst_r, src, srcw);
1661		src = dst_r;
1662	}
1663
1664	switch (GET_OPCODE(op)) {
1665	case SLJIT_MOVD:
1666		if (src != dst_r) {
1667			if (dst_r != TMP_FREG1)
1668				FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
1669			else
1670				dst_r = src;
1671		}
1672		break;
1673	case SLJIT_NEGD:
1674		FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
1675		break;
1676	case SLJIT_ABSD:
1677		FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
1678		break;
1679	case SLJIT_CONVD_FROMS:
1680		FAIL_IF(push_inst(compiler, FCVT | ((op & SLJIT_SINGLE_OP) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
1681		break;
1682	}
1683
1684	if (dst & SLJIT_MEM)
1685		return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
1686	return SLJIT_SUCCESS;
1687}
1688
1689SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
1690	sljit_si dst, sljit_sw dstw,
1691	sljit_si src1, sljit_sw src1w,
1692	sljit_si src2, sljit_sw src2w)
1693{
1694	sljit_si dst_r, mem_flags = (op & SLJIT_SINGLE_OP) ? INT_SIZE : WORD_SIZE;
1695	sljit_ins inv_bits = (op & SLJIT_SINGLE_OP) ? (1 << 22) : 0;
1696
1697	CHECK_ERROR();
1698	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1699	ADJUST_LOCAL_OFFSET(dst, dstw);
1700	ADJUST_LOCAL_OFFSET(src1, src1w);
1701	ADJUST_LOCAL_OFFSET(src2, src2w);
1702
1703	compiler->cache_arg = 0;
1704	compiler->cache_argw = 0;
1705
1706	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1707	if (src1 & SLJIT_MEM) {
1708		emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
1709		src1 = TMP_FREG1;
1710	}
1711	if (src2 & SLJIT_MEM) {
1712		emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
1713		src2 = TMP_FREG2;
1714	}
1715
1716	switch (GET_OPCODE(op)) {
1717	case SLJIT_ADDD:
1718		FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1719		break;
1720	case SLJIT_SUBD:
1721		FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1722		break;
1723	case SLJIT_MULD:
1724		FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1725		break;
1726	case SLJIT_DIVD:
1727		FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1728		break;
1729	}
1730
1731	if (!(dst & SLJIT_MEM))
1732		return SLJIT_SUCCESS;
1733	return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
1734}
1735
1736/* --------------------------------------------------------------------- */
1737/*  Other instructions                                                   */
1738/* --------------------------------------------------------------------- */
1739
1740SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
1741{
1742	CHECK_ERROR();
1743	check_sljit_emit_fast_enter(compiler, dst, dstw);
1744	ADJUST_LOCAL_OFFSET(dst, dstw);
1745
1746	/* For UNUSED dst. Uncommon, but possible. */
1747	if (dst == SLJIT_UNUSED)
1748		return SLJIT_SUCCESS;
1749
1750	if (FAST_IS_REG(dst))
1751		return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
1752
1753	/* Memory. */
1754	return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw);
1755}
1756
1757SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
1758{
1759	CHECK_ERROR();
1760	check_sljit_emit_fast_return(compiler, src, srcw);
1761	ADJUST_LOCAL_OFFSET(src, srcw);
1762
1763	if (FAST_IS_REG(src))
1764		FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
1765	else if (src & SLJIT_MEM)
1766		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw));
1767	else if (src & SLJIT_IMM)
1768		FAIL_IF(load_immediate(compiler, TMP_LR, srcw));
1769
1770	return push_inst(compiler, RET | RN(TMP_LR));
1771}
1772
1773/* --------------------------------------------------------------------- */
1774/*  Conditional instructions                                             */
1775/* --------------------------------------------------------------------- */
1776
1777static sljit_uw get_cc(sljit_si type)
1778{
1779	switch (type) {
1780	case SLJIT_C_EQUAL:
1781	case SLJIT_C_MUL_NOT_OVERFLOW:
1782	case SLJIT_C_FLOAT_EQUAL:
1783		return 0x1;
1784
1785	case SLJIT_C_NOT_EQUAL:
1786	case SLJIT_C_MUL_OVERFLOW:
1787	case SLJIT_C_FLOAT_NOT_EQUAL:
1788		return 0x0;
1789
1790	case SLJIT_C_LESS:
1791	case SLJIT_C_FLOAT_LESS:
1792		return 0x2;
1793
1794	case SLJIT_C_GREATER_EQUAL:
1795	case SLJIT_C_FLOAT_GREATER_EQUAL:
1796		return 0x3;
1797
1798	case SLJIT_C_GREATER:
1799	case SLJIT_C_FLOAT_GREATER:
1800		return 0x9;
1801
1802	case SLJIT_C_LESS_EQUAL:
1803	case SLJIT_C_FLOAT_LESS_EQUAL:
1804		return 0x8;
1805
1806	case SLJIT_C_SIG_LESS:
1807		return 0xa;
1808
1809	case SLJIT_C_SIG_GREATER_EQUAL:
1810		return 0xb;
1811
1812	case SLJIT_C_SIG_GREATER:
1813		return 0xd;
1814
1815	case SLJIT_C_SIG_LESS_EQUAL:
1816		return 0xc;
1817
1818	case SLJIT_C_OVERFLOW:
1819	case SLJIT_C_FLOAT_UNORDERED:
1820		return 0x7;
1821
1822	case SLJIT_C_NOT_OVERFLOW:
1823	case SLJIT_C_FLOAT_ORDERED:
1824		return 0x6;
1825
1826	default:
1827		SLJIT_ASSERT_STOP();
1828		return 0xe;
1829	}
1830}
1831
1832SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1833{
1834	struct sljit_label *label;
1835
1836	CHECK_ERROR_PTR();
1837	check_sljit_emit_label(compiler);
1838
1839	if (compiler->last_label && compiler->last_label->size == compiler->size)
1840		return compiler->last_label;
1841
1842	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1843	PTR_FAIL_IF(!label);
1844	set_label(label, compiler);
1845	return label;
1846}
1847
1848SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
1849{
1850	struct sljit_jump *jump;
1851
1852	CHECK_ERROR_PTR();
1853	check_sljit_emit_jump(compiler, type);
1854
1855	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1856	PTR_FAIL_IF(!jump);
1857	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1858	type &= 0xff;
1859
1860	if (type < SLJIT_JUMP) {
1861		jump->flags |= IS_COND;
1862		PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(type)));
1863	}
1864	else if (type >= SLJIT_FAST_CALL)
1865		jump->flags |= IS_BL;
1866
1867	PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1868	jump->addr = compiler->size;
1869	PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
1870
1871	return jump;
1872}
1873
1874static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_si type,
1875	sljit_si src, sljit_sw srcw)
1876{
1877	struct sljit_jump *jump;
1878	sljit_ins inv_bits = (type & SLJIT_INT_OP) ? (1 << 31) : 0;
1879
1880	SLJIT_ASSERT((type & 0xff) == SLJIT_C_EQUAL || (type & 0xff) == SLJIT_C_NOT_EQUAL);
1881	ADJUST_LOCAL_OFFSET(src, srcw);
1882
1883	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1884	PTR_FAIL_IF(!jump);
1885	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1886	jump->flags |= IS_CBZ | IS_COND;
1887
1888	if (src & SLJIT_MEM) {
1889		PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw));
1890		src = TMP_REG1;
1891	}
1892	else if (src & SLJIT_IMM) {
1893		PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1894		src = TMP_REG1;
1895	}
1896	SLJIT_ASSERT(FAST_IS_REG(src));
1897
1898	if ((type & 0xff) == SLJIT_C_EQUAL)
1899		inv_bits |= 1 << 24;
1900
1901	PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
1902	PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1903	jump->addr = compiler->size;
1904	PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
1905	return jump;
1906}
1907
1908SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
1909{
1910	struct sljit_jump *jump;
1911
1912	CHECK_ERROR();
1913	check_sljit_emit_ijump(compiler, type, src, srcw);
1914	ADJUST_LOCAL_OFFSET(src, srcw);
1915
1916	/* In ARM, we don't need to touch the arguments. */
1917	if (!(src & SLJIT_IMM)) {
1918		if (src & SLJIT_MEM) {
1919			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw));
1920			src = TMP_REG1;
1921		}
1922		return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
1923	}
1924
1925	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1926	FAIL_IF(!jump);
1927	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
1928	jump->u.target = srcw;
1929
1930	FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
1931	jump->addr = compiler->size;
1932	return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
1933}
1934
1935SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
1936	sljit_si dst, sljit_sw dstw,
1937	sljit_si src, sljit_sw srcw,
1938	sljit_si type)
1939{
1940	sljit_si dst_r, flags, mem_flags;
1941	sljit_ins cc;
1942
1943	CHECK_ERROR();
1944	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
1945	ADJUST_LOCAL_OFFSET(dst, dstw);
1946	ADJUST_LOCAL_OFFSET(src, srcw);
1947
1948	if (dst == SLJIT_UNUSED)
1949		return SLJIT_SUCCESS;
1950
1951	cc = get_cc(type);
1952	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1953
1954	if (GET_OPCODE(op) < SLJIT_ADD) {
1955		FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
1956		if (dst_r != TMP_REG1)
1957			return SLJIT_SUCCESS;
1958		return emit_op_mem(compiler, (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE, TMP_REG1, dst, dstw);
1959	}
1960
1961	compiler->cache_arg = 0;
1962	compiler->cache_argw = 0;
1963	flags = GET_FLAGS(op) ? SET_FLAGS : 0;
1964	mem_flags = WORD_SIZE;
1965	if (op & SLJIT_INT_OP) {
1966		flags |= INT_OP;
1967		mem_flags = INT_SIZE;
1968	}
1969
1970	if (src & SLJIT_MEM) {
1971		FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw));
1972		src = TMP_REG1;
1973		srcw = 0;
1974	} else if (src & SLJIT_IMM)
1975		flags |= ARG1_IMM;
1976
1977	FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
1978	emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2);
1979
1980	if (dst_r != TMP_REG1)
1981		return SLJIT_SUCCESS;
1982	return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0);
1983}
1984
1985SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
1986{
1987	struct sljit_const *const_;
1988	sljit_si dst_r;
1989
1990	CHECK_ERROR_PTR();
1991	check_sljit_emit_const(compiler, dst, dstw, init_value);
1992	ADJUST_LOCAL_OFFSET(dst, dstw);
1993
1994	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
1995	PTR_FAIL_IF(!const_);
1996	set_const(const_, compiler);
1997
1998	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1999	PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value));
2000
2001	if (dst & SLJIT_MEM)
2002		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
2003	return const_;
2004}
2005
2006SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2007{
2008	sljit_ins* inst = (sljit_ins*)addr;
2009	modify_imm64_const(inst, new_addr);
2010	SLJIT_CACHE_FLUSH(inst, inst + 4);
2011}
2012
2013SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2014{
2015	sljit_ins* inst = (sljit_ins*)addr;
2016	modify_imm64_const(inst, new_constant);
2017	SLJIT_CACHE_FLUSH(inst, inst + 4);
2018}
2019