1/*
2 *    Stack-less Just-In-Time compiler
3 *
4 *    Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
5 *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without modification, are
8 * permitted provided that the following conditions are met:
9 *
10 *   1. Redistributions of source code must retain the above copyright notice, this list of
11 *      conditions and the following disclaimer.
12 *
13 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
14 *      of conditions and the following disclaimer in the documentation and/or other materials
15 *      provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* TileGX architecture. */
29/* Contributed by Tilera Corporation. */
30#include "sljitNativeTILEGX-encoder.c"
31
32#define SIMM_8BIT_MAX (0x7f)
33#define SIMM_8BIT_MIN (-0x80)
34#define SIMM_16BIT_MAX (0x7fff)
35#define SIMM_16BIT_MIN (-0x8000)
36#define SIMM_17BIT_MAX (0xffff)
37#define SIMM_17BIT_MIN (-0x10000)
38#define SIMM_32BIT_MAX (0x7fffffff)
39#define SIMM_32BIT_MIN (-0x7fffffff - 1)
40#define SIMM_48BIT_MAX (0x7fffffff0000L)
41#define SIMM_48BIT_MIN (-0x800000000000L)
42#define IMM16(imm) ((imm) & 0xffff)
43
44#define UIMM_16BIT_MAX (0xffff)
45
46#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
47#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
48#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
49#define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5)
50#define PIC_ADDR_REG TMP_REG2
51
52static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
53	63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
54};
55
56#define SLJIT_LOCALS_REG_mapped 54
57#define TMP_REG1_mapped 5
58#define TMP_REG2_mapped 16
59#define TMP_REG3_mapped 6
60#define ADDR_TMP_mapped 7
61
62/* Flags are keept in volatile registers. */
63#define EQUAL_FLAG 8
64/* And carry flag as well. */
65#define ULESS_FLAG 9
66#define UGREATER_FLAG 10
67#define LESS_FLAG 11
68#define GREATER_FLAG 12
69#define OVERFLOW_FLAG 13
70
71#define ZERO 63
72#define RA 55
73#define TMP_EREG1 14
74#define TMP_EREG2 15
75
76#define LOAD_DATA 0x01
77#define WORD_DATA 0x00
78#define BYTE_DATA 0x02
79#define HALF_DATA 0x04
80#define INT_DATA 0x06
81#define SIGNED_DATA 0x08
82#define DOUBLE_DATA 0x10
83
84/* Separates integer and floating point registers */
85#define GPR_REG 0xf
86
87#define MEM_MASK 0x1f
88
89#define WRITE_BACK 0x00020
90#define ARG_TEST 0x00040
91#define ALT_KEEP_CACHE 0x00080
92#define CUMULATIVE_OP 0x00100
93#define LOGICAL_OP 0x00200
94#define IMM_OP 0x00400
95#define SRC2_IMM 0x00800
96
97#define UNUSED_DEST 0x01000
98#define REG_DEST 0x02000
99#define REG1_SOURCE 0x04000
100#define REG2_SOURCE 0x08000
101#define SLOW_SRC1 0x10000
102#define SLOW_SRC2 0x20000
103#define SLOW_DEST 0x40000
104
105/* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
106 */
107#define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
108
109SLJIT_API_FUNC_ATTRIBUTE const char *sljit_get_platform_name(void)
110{
111	return "TileGX" SLJIT_CPUINFO;
112}
113
114/* Length of an instruction word */
115typedef sljit_uw sljit_ins;
116
117struct jit_instr {
118	const struct tilegx_opcode* opcode;
119	tilegx_pipeline pipe;
120	unsigned long input_registers;
121	unsigned long output_registers;
122	int operand_value[4];
123	int line;
124};
125
126/* Opcode Helper Macros */
127#define TILEGX_X_MODE 0
128
129#define X_MODE create_Mode(TILEGX_X_MODE)
130
131#define FNOP_X0 \
132	create_Opcode_X0(RRR_0_OPCODE_X0) | \
133	create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
134	create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
135
136#define FNOP_X1 \
137	create_Opcode_X1(RRR_0_OPCODE_X1) | \
138	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
139	create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
140
141#define NOP \
142	create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
143
144#define ANOP_X0 \
145	create_Opcode_X0(RRR_0_OPCODE_X0) | \
146	create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
147	create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
148
149#define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
150	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
151	create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
152	create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
153
154#define ADD_X1 \
155	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
156	create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
157
158#define ADDI_X1 \
159	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
160	create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
161
162#define SUB_X1 \
163	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
164	create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
165
166#define NOR_X1 \
167	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
168	create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
169
170#define OR_X1 \
171	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
172	create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
173
174#define AND_X1 \
175	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
176	create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
177
178#define XOR_X1 \
179	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
180	create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
181
182#define CMOVNEZ_X0 \
183	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
184	create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
185
186#define CMOVEQZ_X0 \
187	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
188	create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
189
190#define ADDLI_X1 \
191	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
192
193#define V4INT_L_X1 \
194	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
195	create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
196
197#define BFEXTU_X0 \
198	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
199	create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
200
201#define BFEXTS_X0 \
202	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
203	create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
204
205#define SHL16INSLI_X1 \
206	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
207
208#define ST_X1 \
209	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
210	create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
211
212#define LD_X1 \
213	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
214	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
215	create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
216
217#define JR_X1 \
218	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
219	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
220	create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
221
222#define JALR_X1 \
223	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
224	create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
225	create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
226
227#define CLZ_X0 \
228	create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
229	create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
230	create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
231
232#define CMPLTUI_X1 \
233	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
234	create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
235
236#define CMPLTU_X1 \
237	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
238	create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
239
240#define CMPLTS_X1 \
241	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
242	create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
243
244#define XORI_X1 \
245	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
246	create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
247
248#define ORI_X1 \
249	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
250	create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
251
252#define ANDI_X1 \
253	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
254	create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
255
256#define SHLI_X1 \
257	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
258	create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
259
260#define SHL_X1 \
261	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
262	create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
263
264#define SHRSI_X1 \
265	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
266	create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
267
268#define SHRS_X1 \
269	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
270	create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
271
272#define SHRUI_X1 \
273	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
274	create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
275
276#define SHRU_X1 \
277	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
278	create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
279
280#define BEQZ_X1 \
281	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
282	create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
283
284#define BNEZ_X1 \
285	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
286	create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
287
288#define J_X1 \
289	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
290	create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
291
292#define JAL_X1 \
293	create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
294	create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
295
296#define DEST_X0(x) create_Dest_X0(x)
297#define SRCA_X0(x) create_SrcA_X0(x)
298#define SRCB_X0(x) create_SrcB_X0(x)
299#define DEST_X1(x) create_Dest_X1(x)
300#define SRCA_X1(x) create_SrcA_X1(x)
301#define SRCB_X1(x) create_SrcB_X1(x)
302#define IMM16_X1(x) create_Imm16_X1(x)
303#define IMM8_X1(x) create_Imm8_X1(x)
304#define BFSTART_X0(x) create_BFStart_X0(x)
305#define BFEND_X0(x) create_BFEnd_X0(x)
306#define SHIFTIMM_X1(x) create_ShAmt_X1(x)
307#define JOFF_X1(x) create_JumpOff_X1(x)
308#define BOFF_X1(x) create_BrOff_X1(x)
309
310static const tilegx_mnemonic data_transfer_insts[16] = {
311	/* u w s */ TILEGX_OPC_ST   /* st */,
312	/* u w l */ TILEGX_OPC_LD   /* ld */,
313	/* u b s */ TILEGX_OPC_ST1  /* st1 */,
314	/* u b l */ TILEGX_OPC_LD1U /* ld1u */,
315	/* u h s */ TILEGX_OPC_ST2  /* st2 */,
316	/* u h l */ TILEGX_OPC_LD2U /* ld2u */,
317	/* u i s */ TILEGX_OPC_ST4  /* st4 */,
318	/* u i l */ TILEGX_OPC_LD4U /* ld4u */,
319	/* s w s */ TILEGX_OPC_ST   /* st */,
320	/* s w l */ TILEGX_OPC_LD   /* ld */,
321	/* s b s */ TILEGX_OPC_ST1  /* st1 */,
322	/* s b l */ TILEGX_OPC_LD1S /* ld1s */,
323	/* s h s */ TILEGX_OPC_ST2  /* st2 */,
324	/* s h l */ TILEGX_OPC_LD2S /* ld2s */,
325	/* s i s */ TILEGX_OPC_ST4  /* st4 */,
326	/* s i l */ TILEGX_OPC_LD4S /* ld4s */,
327};
328
329#ifdef TILEGX_JIT_DEBUG
330static sljit_s32 push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
331{
332	sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
333	FAIL_IF(!ptr);
334	*ptr = ins;
335	compiler->size++;
336	printf("|%04d|S0|:\t\t", line);
337	print_insn_tilegx(ptr);
338	return SLJIT_SUCCESS;
339}
340
341static sljit_s32 push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
342{
343	sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
344	FAIL_IF(!ptr);
345	*ptr = ins;
346	compiler->size++;
347	return SLJIT_SUCCESS;
348}
349
350#define push_inst(a, b) push_inst_debug(a, b, __LINE__)
351#else
352static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
353{
354	sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
355	FAIL_IF(!ptr);
356	*ptr = ins;
357	compiler->size++;
358	return SLJIT_SUCCESS;
359}
360#endif
361
362#define BUNDLE_FORMAT_MASK(p0, p1, p2) \
363	((p0) | ((p1) << 8) | ((p2) << 16))
364
365#define BUNDLE_FORMAT(p0, p1, p2) \
366	{ \
367		{ \
368			(tilegx_pipeline)(p0), \
369			(tilegx_pipeline)(p1), \
370			(tilegx_pipeline)(p2) \
371		}, \
372		BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
373	}
374
375#define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
376
377#define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
378
379#define PI(encoding) \
380	push_inst(compiler, encoding)
381
382#define PB3(opcode, dst, srca, srcb) \
383	push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
384
385#define PB2(opcode, dst, src) \
386	push_2_buffer(compiler, opcode, dst, src, __LINE__)
387
388#define JR(reg) \
389	push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
390
391#define ADD(dst, srca, srcb) \
392	push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
393
394#define SUB(dst, srca, srcb) \
395	push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
396
397#define MUL(dst, srca, srcb) \
398	push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__)
399
400#define NOR(dst, srca, srcb) \
401	push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
402
403#define OR(dst, srca, srcb) \
404	push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
405
406#define XOR(dst, srca, srcb) \
407	push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
408
409#define AND(dst, srca, srcb) \
410	push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
411
412#define CLZ(dst, src) \
413	push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
414
415#define SHLI(dst, srca, srcb) \
416	push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
417
418#define SHRUI(dst, srca, imm) \
419	push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
420
421#define XORI(dst, srca, imm) \
422	push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
423
424#define ORI(dst, srca, imm) \
425	push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
426
427#define CMPLTU(dst, srca, srcb) \
428	push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
429
430#define CMPLTS(dst, srca, srcb) \
431	push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
432
433#define CMPLTUI(dst, srca, imm) \
434	push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
435
436#define CMOVNEZ(dst, srca, srcb) \
437	push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
438
439#define CMOVEQZ(dst, srca, srcb) \
440	push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
441
442#define ADDLI(dst, srca, srcb) \
443	push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
444
445#define SHL16INSLI(dst, srca, srcb) \
446	push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
447
448#define LD_ADD(dst, addr, adjust) \
449	push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
450
451#define ST_ADD(src, addr, adjust) \
452	push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
453
454#define LD(dst, addr) \
455	push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
456
457#define BFEXTU(dst, src, start, end) \
458	push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
459
460#define BFEXTS(dst, src, start, end) \
461	push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
462
463#define ADD_SOLO(dest, srca, srcb) \
464	push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
465
466#define ADDI_SOLO(dest, srca, imm) \
467	push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
468
469#define ADDLI_SOLO(dest, srca, imm) \
470	push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
471
472#define SHL16INSLI_SOLO(dest, srca, imm) \
473	push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
474
475#define JALR_SOLO(reg) \
476	push_inst(compiler, JALR_X1 | SRCA_X1(reg))
477
478#define JR_SOLO(reg) \
479	push_inst(compiler, JR_X1 | SRCA_X1(reg))
480
481struct Format {
482	/* Mapping of bundle issue slot to assigned pipe. */
483	tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
484
485	/* Mask of pipes used by this bundle. */
486	unsigned int pipe_mask;
487};
488
489const struct Format formats[] =
490{
491	/* In Y format we must always have something in Y2, since it has
492	* no fnop, so this conveys that Y2 must always be used. */
493	BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
494	BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
495	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
496	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
497
498	/* Y format has three instructions. */
499	BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
500	BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
501	BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
502	BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
503	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
504	BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
505
506	/* X format has only two instructions. */
507	BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
508	BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
509};
510
511
512struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
513unsigned long inst_buf_index;
514
515tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
516{
517	/* FIXME: tile: we could pregenerate this. */
518	int pipe;
519	for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
520		;
521	return (tilegx_pipeline)(pipe);
522}
523
524void insert_nop(tilegx_mnemonic opc, int line)
525{
526	const struct tilegx_opcode* opcode = NULL;
527
528	memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
529
530	opcode = &tilegx_opcodes[opc];
531	inst_buf[0].opcode = opcode;
532	inst_buf[0].pipe = get_any_valid_pipe(opcode);
533	inst_buf[0].input_registers = 0;
534	inst_buf[0].output_registers = 0;
535	inst_buf[0].line = line;
536	++inst_buf_index;
537}
538
539const struct Format* compute_format()
540{
541	unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
542		inst_buf[0].opcode->pipes,
543		inst_buf[1].opcode->pipes,
544		(inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
545
546	const struct Format* match = NULL;
547	const struct Format *b = NULL;
548	unsigned int i;
549	for (i = 0; i < sizeof formats / sizeof formats[0]; i++) {
550		b = &formats[i];
551		if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
552			match = b;
553			break;
554		}
555	}
556
557	return match;
558}
559
560sljit_s32 assign_pipes()
561{
562	unsigned long output_registers = 0;
563	unsigned int i = 0;
564
565	if (inst_buf_index == 1) {
566		tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
567					? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
568		insert_nop(opc, __LINE__);
569	}
570
571	const struct Format* match = compute_format();
572
573	if (match == NULL)
574		return -1;
575
576	for (i = 0; i < inst_buf_index; i++) {
577
578		if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
579			return -1;
580
581		if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
582			return -1;
583
584		/* Don't include Rzero in the match set, to avoid triggering
585		   needlessly on 'prefetch' instrs. */
586
587		output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
588
589		inst_buf[i].pipe = match->pipe[i];
590	}
591
592	/* If only 2 instrs, and in Y-mode, insert a nop. */
593	if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
594		insert_nop(TILEGX_OPC_FNOP, __LINE__);
595
596		/* Select the yet unassigned pipe. */
597		tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
598					+ TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
599					- (inst_buf[1].pipe + inst_buf[2].pipe)));
600
601		inst_buf[0].pipe = pipe;
602	}
603
604	return 0;
605}
606
607tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
608{
609	int i, val;
610	const struct tilegx_opcode* opcode = inst->opcode;
611	tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
612
613	const struct tilegx_operand* operand = NULL;
614	for (i = 0; i < opcode->num_operands; i++) {
615		operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
616		val = inst->operand_value[i];
617
618		bits |= operand->insert(val);
619	}
620
621	return bits;
622}
623
624static sljit_s32 update_buffer(struct sljit_compiler *compiler)
625{
626	int i;
627	int orig_index = inst_buf_index;
628	struct jit_instr inst0 = inst_buf[0];
629	struct jit_instr inst1 = inst_buf[1];
630	struct jit_instr inst2 = inst_buf[2];
631	tilegx_bundle_bits bits = 0;
632
633	/* If the bundle is valid as is, perform the encoding and return 1. */
634	if (assign_pipes() == 0) {
635		for (i = 0; i < inst_buf_index; i++) {
636			bits |= get_bundle_bit(inst_buf + i);
637#ifdef TILEGX_JIT_DEBUG
638			printf("|%04d", inst_buf[i].line);
639#endif
640		}
641#ifdef TILEGX_JIT_DEBUG
642		if (inst_buf_index == 3)
643			printf("|M0|:\t");
644		else
645			printf("|M0|:\t\t");
646		print_insn_tilegx(&bits);
647#endif
648
649		inst_buf_index = 0;
650
651#ifdef TILEGX_JIT_DEBUG
652		return push_inst_nodebug(compiler, bits);
653#else
654		return push_inst(compiler, bits);
655#endif
656	}
657
658	/* If the bundle is invalid, split it in two. First encode the first two
659	   (or possibly 1) instructions, and then the last, separately. Note that
660	   assign_pipes may have re-ordered the instrs (by inserting no-ops in
661	   lower slots) so we need to reset them. */
662
663	inst_buf_index = orig_index - 1;
664	inst_buf[0] = inst0;
665	inst_buf[1] = inst1;
666	inst_buf[2] = inst2;
667	if (assign_pipes() == 0) {
668		for (i = 0; i < inst_buf_index; i++) {
669			bits |= get_bundle_bit(inst_buf + i);
670#ifdef TILEGX_JIT_DEBUG
671			printf("|%04d", inst_buf[i].line);
672#endif
673		}
674
675#ifdef TILEGX_JIT_DEBUG
676		if (inst_buf_index == 3)
677			printf("|M1|:\t");
678		else
679			printf("|M1|:\t\t");
680		print_insn_tilegx(&bits);
681#endif
682
683		if ((orig_index - 1) == 2) {
684			inst_buf[0] = inst2;
685			inst_buf_index = 1;
686		} else if ((orig_index - 1) == 1) {
687			inst_buf[0] = inst1;
688			inst_buf_index = 1;
689		} else
690			SLJIT_ASSERT_STOP();
691
692#ifdef TILEGX_JIT_DEBUG
693		return push_inst_nodebug(compiler, bits);
694#else
695		return push_inst(compiler, bits);
696#endif
697	} else {
698		/* We had 3 instrs of which the first 2 can't live in the same bundle.
699		   Split those two. Note that we don't try to then combine the second
700		   and third instr into a single bundle.  First instruction: */
701		inst_buf_index = 1;
702		inst_buf[0] = inst0;
703		inst_buf[1] = inst1;
704		inst_buf[2] = inst2;
705		if (assign_pipes() == 0) {
706			for (i = 0; i < inst_buf_index; i++) {
707				bits |= get_bundle_bit(inst_buf + i);
708#ifdef TILEGX_JIT_DEBUG
709				printf("|%04d", inst_buf[i].line);
710#endif
711			}
712
713#ifdef TILEGX_JIT_DEBUG
714			if (inst_buf_index == 3)
715				printf("|M2|:\t");
716			else
717				printf("|M2|:\t\t");
718			print_insn_tilegx(&bits);
719#endif
720
721			inst_buf[0] = inst1;
722			inst_buf[1] = inst2;
723			inst_buf_index = orig_index - 1;
724#ifdef TILEGX_JIT_DEBUG
725			return push_inst_nodebug(compiler, bits);
726#else
727			return push_inst(compiler, bits);
728#endif
729		} else
730			SLJIT_ASSERT_STOP();
731	}
732
733	SLJIT_ASSERT_STOP();
734}
735
736static sljit_s32 flush_buffer(struct sljit_compiler *compiler)
737{
738	while (inst_buf_index != 0) {
739		FAIL_IF(update_buffer(compiler));
740	}
741	return SLJIT_SUCCESS;
742}
743
744static sljit_s32 push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
745{
746	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
747		FAIL_IF(update_buffer(compiler));
748
749	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
750	inst_buf[inst_buf_index].opcode = opcode;
751	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
752	inst_buf[inst_buf_index].operand_value[0] = op0;
753	inst_buf[inst_buf_index].operand_value[1] = op1;
754	inst_buf[inst_buf_index].operand_value[2] = op2;
755	inst_buf[inst_buf_index].operand_value[3] = op3;
756	inst_buf[inst_buf_index].input_registers = 1L << op1;
757	inst_buf[inst_buf_index].output_registers = 1L << op0;
758	inst_buf[inst_buf_index].line = line;
759	inst_buf_index++;
760
761	return SLJIT_SUCCESS;
762}
763
764static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
765{
766	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
767		FAIL_IF(update_buffer(compiler));
768
769	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
770	inst_buf[inst_buf_index].opcode = opcode;
771	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
772	inst_buf[inst_buf_index].operand_value[0] = op0;
773	inst_buf[inst_buf_index].operand_value[1] = op1;
774	inst_buf[inst_buf_index].operand_value[2] = op2;
775	inst_buf[inst_buf_index].line = line;
776
777	switch (opc) {
778	case TILEGX_OPC_ST_ADD:
779		inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
780		inst_buf[inst_buf_index].output_registers = 1L << op0;
781		break;
782	case TILEGX_OPC_LD_ADD:
783		inst_buf[inst_buf_index].input_registers = 1L << op1;
784		inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
785		break;
786	case TILEGX_OPC_ADD:
787	case TILEGX_OPC_AND:
788	case TILEGX_OPC_SUB:
789	case TILEGX_OPC_MULX:
790	case TILEGX_OPC_OR:
791	case TILEGX_OPC_XOR:
792	case TILEGX_OPC_NOR:
793	case TILEGX_OPC_SHL:
794	case TILEGX_OPC_SHRU:
795	case TILEGX_OPC_SHRS:
796	case TILEGX_OPC_CMPLTU:
797	case TILEGX_OPC_CMPLTS:
798	case TILEGX_OPC_CMOVEQZ:
799	case TILEGX_OPC_CMOVNEZ:
800		inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
801		inst_buf[inst_buf_index].output_registers = 1L << op0;
802		break;
803	case TILEGX_OPC_ADDLI:
804	case TILEGX_OPC_XORI:
805	case TILEGX_OPC_ORI:
806	case TILEGX_OPC_SHLI:
807	case TILEGX_OPC_SHRUI:
808	case TILEGX_OPC_SHRSI:
809	case TILEGX_OPC_SHL16INSLI:
810	case TILEGX_OPC_CMPLTUI:
811	case TILEGX_OPC_CMPLTSI:
812		inst_buf[inst_buf_index].input_registers = 1L << op1;
813		inst_buf[inst_buf_index].output_registers = 1L << op0;
814		break;
815	default:
816		printf("unrecoginzed opc: %s\n", opcode->name);
817		SLJIT_ASSERT_STOP();
818	}
819
820	inst_buf_index++;
821
822	return SLJIT_SUCCESS;
823}
824
825static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
826{
827	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
828		FAIL_IF(update_buffer(compiler));
829
830	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
831	inst_buf[inst_buf_index].opcode = opcode;
832	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
833	inst_buf[inst_buf_index].operand_value[0] = op0;
834	inst_buf[inst_buf_index].operand_value[1] = op1;
835	inst_buf[inst_buf_index].line = line;
836
837	switch (opc) {
838	case TILEGX_OPC_BEQZ:
839	case TILEGX_OPC_BNEZ:
840		inst_buf[inst_buf_index].input_registers = 1L << op0;
841		break;
842	case TILEGX_OPC_ST:
843	case TILEGX_OPC_ST1:
844	case TILEGX_OPC_ST2:
845	case TILEGX_OPC_ST4:
846		inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
847		inst_buf[inst_buf_index].output_registers = 0;
848		break;
849	case TILEGX_OPC_CLZ:
850	case TILEGX_OPC_LD:
851	case TILEGX_OPC_LD1U:
852	case TILEGX_OPC_LD1S:
853	case TILEGX_OPC_LD2U:
854	case TILEGX_OPC_LD2S:
855	case TILEGX_OPC_LD4U:
856	case TILEGX_OPC_LD4S:
857		inst_buf[inst_buf_index].input_registers = 1L << op1;
858		inst_buf[inst_buf_index].output_registers = 1L << op0;
859		break;
860	default:
861		printf("unrecoginzed opc: %s\n", opcode->name);
862		SLJIT_ASSERT_STOP();
863	}
864
865	inst_buf_index++;
866
867	return SLJIT_SUCCESS;
868}
869
870static sljit_s32 push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
871{
872	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
873		FAIL_IF(update_buffer(compiler));
874
875	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
876	inst_buf[inst_buf_index].opcode = opcode;
877	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
878	inst_buf[inst_buf_index].input_registers = 0;
879	inst_buf[inst_buf_index].output_registers = 0;
880	inst_buf[inst_buf_index].line = line;
881	inst_buf_index++;
882
883	return SLJIT_SUCCESS;
884}
885
886static sljit_s32 push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
887{
888	if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
889		FAIL_IF(update_buffer(compiler));
890
891	const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
892	inst_buf[inst_buf_index].opcode = opcode;
893	inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
894	inst_buf[inst_buf_index].operand_value[0] = op0;
895	inst_buf[inst_buf_index].input_registers = 1L << op0;
896	inst_buf[inst_buf_index].output_registers = 0;
897	inst_buf[inst_buf_index].line = line;
898	inst_buf_index++;
899
900	return flush_buffer(compiler);
901}
902
903static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
904{
905	sljit_sw diff;
906	sljit_uw target_addr;
907	sljit_ins *inst;
908
909	if (jump->flags & SLJIT_REWRITABLE_JUMP)
910		return code_ptr;
911
912	if (jump->flags & JUMP_ADDR)
913		target_addr = jump->u.target;
914	else {
915		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
916		target_addr = (sljit_uw)(code + jump->u.label->size);
917	}
918
919	inst = (sljit_ins *)jump->addr;
920	if (jump->flags & IS_COND)
921		inst--;
922
923	diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
924	if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
925		jump->flags |= PATCH_B;
926
927		if (!(jump->flags & IS_COND)) {
928			if (jump->flags & IS_JAL) {
929				jump->flags &= ~(PATCH_B);
930				jump->flags |= PATCH_J;
931				inst[0] = JAL_X1;
932
933#ifdef TILEGX_JIT_DEBUG
934				printf("[runtime relocate]%04d:\t", __LINE__);
935				print_insn_tilegx(inst);
936#endif
937			} else {
938				inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
939
940#ifdef TILEGX_JIT_DEBUG
941				printf("[runtime relocate]%04d:\t", __LINE__);
942				print_insn_tilegx(inst);
943#endif
944			}
945
946			return inst;
947		}
948
949		inst[0] = inst[0] ^ (0x7L << 55);
950
951#ifdef TILEGX_JIT_DEBUG
952		printf("[runtime relocate]%04d:\t", __LINE__);
953		print_insn_tilegx(inst);
954#endif
955		jump->addr -= sizeof(sljit_ins);
956		return inst;
957	}
958
959	if (jump->flags & IS_COND) {
960		if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
961			jump->flags |= PATCH_J;
962			inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
963			inst[1] = J_X1;
964			return inst + 1;
965		}
966
967		return code_ptr;
968	}
969
970	if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
971		jump->flags |= PATCH_J;
972
973		if (jump->flags & IS_JAL) {
974			inst[0] = JAL_X1;
975
976#ifdef TILEGX_JIT_DEBUG
977			printf("[runtime relocate]%04d:\t", __LINE__);
978			print_insn_tilegx(inst);
979#endif
980
981		} else {
982			inst[0] = J_X1;
983
984#ifdef TILEGX_JIT_DEBUG
985			printf("[runtime relocate]%04d:\t", __LINE__);
986			print_insn_tilegx(inst);
987#endif
988		}
989
990		return inst;
991	}
992
993	return code_ptr;
994}
995
996SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
997{
998	struct sljit_memory_fragment *buf;
999	sljit_ins *code;
1000	sljit_ins *code_ptr;
1001	sljit_ins *buf_ptr;
1002	sljit_ins *buf_end;
1003	sljit_uw word_count;
1004	sljit_uw addr;
1005
1006	struct sljit_label *label;
1007	struct sljit_jump *jump;
1008	struct sljit_const *const_;
1009
1010	CHECK_ERROR_PTR();
1011	CHECK_PTR(check_sljit_generate_code(compiler));
1012	reverse_buf(compiler);
1013
1014	code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
1015	PTR_FAIL_WITH_EXEC_IF(code);
1016	buf = compiler->buf;
1017
1018	code_ptr = code;
1019	word_count = 0;
1020	label = compiler->labels;
1021	jump = compiler->jumps;
1022	const_ = compiler->consts;
1023	do {
1024		buf_ptr = (sljit_ins *)buf->memory;
1025		buf_end = buf_ptr + (buf->used_size >> 3);
1026		do {
1027			*code_ptr = *buf_ptr++;
1028			SLJIT_ASSERT(!label || label->size >= word_count);
1029			SLJIT_ASSERT(!jump || jump->addr >= word_count);
1030			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
1031			/* These structures are ordered by their address. */
1032			if (label && label->size == word_count) {
1033				/* Just recording the address. */
1034				label->addr = (sljit_uw) code_ptr;
1035				label->size = code_ptr - code;
1036				label = label->next;
1037			}
1038
1039			if (jump && jump->addr == word_count) {
1040				if (jump->flags & IS_JAL)
1041					jump->addr = (sljit_uw)(code_ptr - 4);
1042				else
1043					jump->addr = (sljit_uw)(code_ptr - 3);
1044
1045				code_ptr = detect_jump_type(jump, code_ptr, code);
1046				jump = jump->next;
1047			}
1048
1049			if (const_ && const_->addr == word_count) {
1050				/* Just recording the address. */
1051				const_->addr = (sljit_uw) code_ptr;
1052				const_ = const_->next;
1053			}
1054
1055			code_ptr++;
1056			word_count++;
1057		} while (buf_ptr < buf_end);
1058
1059		buf = buf->next;
1060	} while (buf);
1061
1062	if (label && label->size == word_count) {
1063		label->addr = (sljit_uw) code_ptr;
1064		label->size = code_ptr - code;
1065		label = label->next;
1066	}
1067
1068	SLJIT_ASSERT(!label);
1069	SLJIT_ASSERT(!jump);
1070	SLJIT_ASSERT(!const_);
1071	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
1072
1073	jump = compiler->jumps;
1074	while (jump) {
1075		do {
1076			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
1077			buf_ptr = (sljit_ins *)jump->addr;
1078
1079			if (jump->flags & PATCH_B) {
1080				addr = (sljit_sw)(addr - (jump->addr)) >> 3;
1081				SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
1082				buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
1083
1084#ifdef TILEGX_JIT_DEBUG
1085				printf("[runtime relocate]%04d:\t", __LINE__);
1086				print_insn_tilegx(buf_ptr);
1087#endif
1088				break;
1089			}
1090
1091			if (jump->flags & PATCH_J) {
1092				SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
1093				addr = (sljit_sw)(addr - (jump->addr)) >> 3;
1094				buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
1095
1096#ifdef TILEGX_JIT_DEBUG
1097				printf("[runtime relocate]%04d:\t", __LINE__);
1098				print_insn_tilegx(buf_ptr);
1099#endif
1100				break;
1101			}
1102
1103			SLJIT_ASSERT(!(jump->flags & IS_JAL));
1104
1105			/* Set the fields of immediate loads. */
1106			buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
1107			buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
1108			buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
1109		} while (0);
1110
1111		jump = jump->next;
1112	}
1113
1114	compiler->error = SLJIT_ERR_COMPILED;
1115	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
1116	SLJIT_CACHE_FLUSH(code, code_ptr);
1117	return code;
1118}
1119
1120static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
1121{
1122
1123	if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
1124		return ADDLI(dst_ar, ZERO, imm);
1125
1126	if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
1127		FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
1128		return SHL16INSLI(dst_ar, dst_ar, imm);
1129	}
1130
1131	if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
1132		FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
1133		FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1134		return SHL16INSLI(dst_ar, dst_ar, imm);
1135	}
1136
1137	FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
1138	FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
1139	FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1140	return SHL16INSLI(dst_ar, dst_ar, imm);
1141}
1142
1143static sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush)
1144{
1145	/* Should *not* be optimized as load_immediate, as pcre relocation
1146	   mechanism will match this fixed 4-instruction pattern. */
1147	if (flush) {
1148		FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
1149		FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
1150		return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
1151	}
1152
1153	FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
1154	FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
1155	return SHL16INSLI(dst_ar, dst_ar, imm);
1156}
1157
1158static sljit_s32 emit_const_64(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush)
1159{
1160	/* Should *not* be optimized as load_immediate, as pcre relocation
1161	   mechanism will match this fixed 4-instruction pattern. */
1162	if (flush) {
1163		FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
1164		FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
1165		FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
1166		return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
1167	}
1168
1169	FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
1170	FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
1171	FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
1172	return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
1173}
1174
1175SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1176	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
1177	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1178{
1179	sljit_ins base;
1180	sljit_s32 i, tmp;
1181
1182	CHECK_ERROR();
1183	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
1184	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1185
1186	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
1187	local_size = (local_size + 7) & ~7;
1188	compiler->local_size = local_size;
1189
1190	if (local_size <= SIMM_16BIT_MAX) {
1191		/* Frequent case. */
1192		FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
1193		base = SLJIT_LOCALS_REG_mapped;
1194	} else {
1195		FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
1196		FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
1197		FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
1198		base = TMP_REG2_mapped;
1199		local_size = 0;
1200	}
1201
1202	/* Save the return address. */
1203	FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
1204	FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
1205
1206	/* Save the S registers. */
1207	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
1208	for (i = SLJIT_S0; i >= tmp; i--) {
1209		FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
1210	}
1211
1212	/* Save the R registers that need to be reserved. */
1213	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1214		FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
1215	}
1216
1217	/* Move the arguments to S registers. */
1218	for (i = 0; i < args; i++) {
1219		FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO));
1220	}
1221
1222	return SLJIT_SUCCESS;
1223}
1224
1225SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1226	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
1227	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1228{
1229	CHECK_ERROR();
1230	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
1231	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
1232
1233	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
1234	compiler->local_size = (local_size + 7) & ~7;
1235
1236	return SLJIT_SUCCESS;
1237}
1238
1239SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1240{
1241	sljit_s32 local_size;
1242	sljit_ins base;
1243	sljit_s32 i, tmp;
1244	sljit_s32 saveds;
1245
1246	CHECK_ERROR();
1247	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
1248
1249	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
1250
1251	local_size = compiler->local_size;
1252	if (local_size <= SIMM_16BIT_MAX)
1253		base = SLJIT_LOCALS_REG_mapped;
1254	else {
1255		FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
1256		FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
1257		base = TMP_REG1_mapped;
1258		local_size = 0;
1259	}
1260
1261	/* Restore the return address. */
1262	FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
1263	FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8));
1264
1265	/* Restore the S registers. */
1266	saveds = compiler->saveds;
1267	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
1268	for (i = SLJIT_S0; i >= tmp; i--) {
1269		FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
1270	}
1271
1272	/* Restore the R registers that need to be reserved. */
1273	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1274		FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
1275	}
1276
1277	if (compiler->local_size <= SIMM_16BIT_MAX)
1278		FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
1279	else
1280		FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
1281
1282	return JR(RA);
1283}
1284
1285/* reg_ar is an absoulute register! */
1286
1287/* Can perform an operation using at most 1 instruction. */
1288static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
1289{
1290	SLJIT_ASSERT(arg & SLJIT_MEM);
1291
1292	if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
1293			&& !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1294		/* Works for both absoulte and relative addresses. */
1295		if (SLJIT_UNLIKELY(flags & ARG_TEST))
1296			return 1;
1297
1298		FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
1299
1300		if (flags & LOAD_DATA)
1301			FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
1302		else
1303			FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
1304
1305		return -1;
1306	}
1307
1308	return 0;
1309}
1310
1311/* See getput_arg below.
1312   Note: can_cache is called only for binary operators. Those
1313   operators always uses word arguments without write back. */
1314static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1315{
1316	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1317
1318	/* Simple operation except for updates. */
1319	if (arg & OFFS_REG_MASK) {
1320		argw &= 0x3;
1321		next_argw &= 0x3;
1322		if (argw && argw == next_argw
1323				&& (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
1324			return 1;
1325		return 0;
1326	}
1327
1328	if (arg == next_arg) {
1329		if (((next_argw - argw) <= SIMM_16BIT_MAX
1330				&& (next_argw - argw) >= SIMM_16BIT_MIN))
1331			return 1;
1332
1333		return 0;
1334	}
1335
1336	return 0;
1337}
1338
1339/* Emit the necessary instructions. See can_cache above. */
1340static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1341{
1342	sljit_s32 tmp_ar, base;
1343
1344	SLJIT_ASSERT(arg & SLJIT_MEM);
1345	if (!(next_arg & SLJIT_MEM)) {
1346		next_arg = 0;
1347		next_argw = 0;
1348	}
1349
1350	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1351		tmp_ar = reg_ar;
1352	else
1353		tmp_ar = TMP_REG1_mapped;
1354
1355	base = arg & REG_MASK;
1356
1357	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1358		argw &= 0x3;
1359
1360		if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
1361			SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
1362			FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
1363			reg_ar = TMP_REG1_mapped;
1364		}
1365
1366		/* Using the cache. */
1367		if (argw == compiler->cache_argw) {
1368			if (!(flags & WRITE_BACK)) {
1369				if (arg == compiler->cache_arg) {
1370					if (flags & LOAD_DATA)
1371						return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1372					else
1373						return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1374				}
1375
1376				if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1377					if (arg == next_arg && argw == (next_argw & 0x3)) {
1378						compiler->cache_arg = arg;
1379						compiler->cache_argw = argw;
1380						FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
1381						if (flags & LOAD_DATA)
1382							return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1383						else
1384							return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1385					}
1386
1387					FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
1388					if (flags & LOAD_DATA)
1389						return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1390					else
1391						return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1392				}
1393			} else {
1394				if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1395					FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1396					if (flags & LOAD_DATA)
1397						return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1398					else
1399						return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1400				}
1401			}
1402		}
1403
1404		if (SLJIT_UNLIKELY(argw)) {
1405			compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
1406			compiler->cache_argw = argw;
1407			FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
1408		}
1409
1410		if (!(flags & WRITE_BACK)) {
1411			if (arg == next_arg && argw == (next_argw & 0x3)) {
1412				compiler->cache_arg = arg;
1413				compiler->cache_argw = argw;
1414				FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1415				tmp_ar = TMP_REG3_mapped;
1416			} else
1417				FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1418
1419			if (flags & LOAD_DATA)
1420				return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1421			else
1422				return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1423		}
1424
1425		FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
1426
1427		if (flags & LOAD_DATA)
1428			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1429		else
1430			return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1431	}
1432
1433	if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
1434		/* Update only applies if a base register exists. */
1435		if (reg_ar == reg_map[base]) {
1436			SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
1437			if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1438				FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
1439				if (flags & LOAD_DATA)
1440					FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
1441				else
1442					FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
1443
1444				if (argw)
1445					return ADDLI(reg_map[base], reg_map[base], argw);
1446
1447				return SLJIT_SUCCESS;
1448			}
1449
1450			FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
1451			reg_ar = TMP_REG1_mapped;
1452		}
1453
1454		if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
1455			if (argw)
1456				FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
1457		} else {
1458			if (compiler->cache_arg == SLJIT_MEM
1459					&& argw - compiler->cache_argw <= SIMM_16BIT_MAX
1460					&& argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1461				if (argw != compiler->cache_argw) {
1462					FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1463					compiler->cache_argw = argw;
1464				}
1465
1466				FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1467			} else {
1468				compiler->cache_arg = SLJIT_MEM;
1469				compiler->cache_argw = argw;
1470				FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
1471				FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
1472			}
1473		}
1474
1475		if (flags & LOAD_DATA)
1476			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
1477		else
1478			return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
1479	}
1480
1481	if (compiler->cache_arg == arg
1482			&& argw - compiler->cache_argw <= SIMM_16BIT_MAX
1483			&& argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1484		if (argw != compiler->cache_argw) {
1485			FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1486			compiler->cache_argw = argw;
1487		}
1488
1489		if (flags & LOAD_DATA)
1490			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1491		else
1492			return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1493	}
1494
1495	if (compiler->cache_arg == SLJIT_MEM
1496			&& argw - compiler->cache_argw <= SIMM_16BIT_MAX
1497			&& argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
1498		if (argw != compiler->cache_argw)
1499			FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
1500	} else {
1501		compiler->cache_arg = SLJIT_MEM;
1502		FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
1503	}
1504
1505	compiler->cache_argw = argw;
1506
1507	if (!base) {
1508		if (flags & LOAD_DATA)
1509			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1510		else
1511			return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1512	}
1513
1514	if (arg == next_arg
1515			&& next_argw - argw <= SIMM_16BIT_MAX
1516			&& next_argw - argw >= SIMM_16BIT_MIN) {
1517		compiler->cache_arg = arg;
1518		FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
1519		if (flags & LOAD_DATA)
1520			return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
1521		else
1522			return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
1523	}
1524
1525	FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
1526
1527	if (flags & LOAD_DATA)
1528		return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
1529	else
1530		return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
1531}
1532
1533static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
1534{
1535	if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
1536		return compiler->error;
1537
1538	compiler->cache_arg = 0;
1539	compiler->cache_argw = 0;
1540	return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
1541}
1542
1543static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1544{
1545	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1546		return compiler->error;
1547	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1548}
1549
1550SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1551{
1552	CHECK_ERROR();
1553	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1554	ADJUST_LOCAL_OFFSET(dst, dstw);
1555
1556	/* For UNUSED dst. Uncommon, but possible. */
1557	if (dst == SLJIT_UNUSED)
1558		return SLJIT_SUCCESS;
1559
1560	if (FAST_IS_REG(dst))
1561		return ADD(reg_map[dst], RA, ZERO);
1562
1563	/* Memory. */
1564	return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
1565}
1566
1567SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1568{
1569	CHECK_ERROR();
1570	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
1571	ADJUST_LOCAL_OFFSET(src, srcw);
1572
1573	if (FAST_IS_REG(src))
1574		FAIL_IF(ADD(RA, reg_map[src], ZERO));
1575
1576	else if (src & SLJIT_MEM)
1577		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
1578
1579	else if (src & SLJIT_IMM)
1580		FAIL_IF(load_immediate(compiler, RA, srcw));
1581
1582	return JR(RA);
1583}
1584
1585static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1586{
1587	sljit_s32 overflow_ra = 0;
1588
1589	switch (GET_OPCODE(op)) {
1590	case SLJIT_MOV:
1591	case SLJIT_MOV_P:
1592		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1593		if (dst != src2)
1594			return ADD(reg_map[dst], reg_map[src2], ZERO);
1595		return SLJIT_SUCCESS;
1596
1597	case SLJIT_MOV_U32:
1598	case SLJIT_MOV_S32:
1599		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1600		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1601			if (op == SLJIT_MOV_S32)
1602				return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
1603
1604			return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
1605		} else if (dst != src2) {
1606			SLJIT_ASSERT(src2 == 0);
1607			return ADD(reg_map[dst], reg_map[src2], ZERO);
1608		}
1609
1610		return SLJIT_SUCCESS;
1611
1612	case SLJIT_MOV_U8:
1613	case SLJIT_MOV_S8:
1614		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1615		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1616			if (op == SLJIT_MOV_S8)
1617				return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
1618
1619			return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
1620		} else if (dst != src2) {
1621			SLJIT_ASSERT(src2 == 0);
1622			return ADD(reg_map[dst], reg_map[src2], ZERO);
1623		}
1624
1625		return SLJIT_SUCCESS;
1626
1627	case SLJIT_MOV_U16:
1628	case SLJIT_MOV_S16:
1629		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1630		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1631			if (op == SLJIT_MOV_S16)
1632				return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
1633
1634			return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
1635		} else if (dst != src2) {
1636			SLJIT_ASSERT(src2 == 0);
1637			return ADD(reg_map[dst], reg_map[src2], ZERO);
1638		}
1639
1640		return SLJIT_SUCCESS;
1641
1642	case SLJIT_NOT:
1643		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1644		if (op & SLJIT_SET_E)
1645			FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
1646		if (CHECK_FLAGS(SLJIT_SET_E))
1647			FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
1648
1649		return SLJIT_SUCCESS;
1650
1651	case SLJIT_CLZ:
1652		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1653		if (op & SLJIT_SET_E)
1654			FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
1655		if (CHECK_FLAGS(SLJIT_SET_E))
1656			FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
1657
1658		return SLJIT_SUCCESS;
1659
1660	case SLJIT_ADD:
1661		if (flags & SRC2_IMM) {
1662			if (op & SLJIT_SET_O) {
1663				FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
1664				if (src2 < 0)
1665					FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
1666			}
1667
1668			if (op & SLJIT_SET_E)
1669				FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
1670
1671			if (op & SLJIT_SET_C) {
1672				if (src2 >= 0)
1673					FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
1674				else {
1675					FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
1676					FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
1677				}
1678			}
1679
1680			/* dst may be the same as src1 or src2. */
1681			if (CHECK_FLAGS(SLJIT_SET_E))
1682				FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
1683
1684			if (op & SLJIT_SET_O) {
1685				FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
1686
1687				if (src2 < 0)
1688					FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
1689			}
1690		} else {
1691			if (op & SLJIT_SET_O) {
1692				FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1693				FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
1694
1695				if (src1 != dst)
1696					overflow_ra = reg_map[src1];
1697				else if (src2 != dst)
1698					overflow_ra = reg_map[src2];
1699				else {
1700					/* Rare ocasion. */
1701					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1702					overflow_ra = TMP_EREG2;
1703				}
1704			}
1705
1706			if (op & SLJIT_SET_E)
1707				FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
1708
1709			if (op & SLJIT_SET_C)
1710				FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
1711
1712			/* dst may be the same as src1 or src2. */
1713			if (CHECK_FLAGS(SLJIT_SET_E))
1714				FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
1715
1716			if (op & SLJIT_SET_O) {
1717				FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
1718				FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
1719			}
1720		}
1721
1722		/* a + b >= a | b (otherwise, the carry should be set to 1). */
1723		if (op & SLJIT_SET_C)
1724			FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
1725
1726		if (op & SLJIT_SET_O)
1727			return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
1728
1729		return SLJIT_SUCCESS;
1730
1731	case SLJIT_ADDC:
1732		if (flags & SRC2_IMM) {
1733			if (op & SLJIT_SET_C) {
1734				if (src2 >= 0)
1735					FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
1736				else {
1737					FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
1738					FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
1739				}
1740			}
1741
1742			FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
1743
1744		} else {
1745			if (op & SLJIT_SET_C)
1746				FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1747
1748			/* dst may be the same as src1 or src2. */
1749			FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
1750		}
1751
1752		if (op & SLJIT_SET_C)
1753			FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
1754
1755		FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
1756
1757		if (!(op & SLJIT_SET_C))
1758			return SLJIT_SUCCESS;
1759
1760		/* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
1761		FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
1762		FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
1763		/* Set carry flag. */
1764		return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
1765
1766	case SLJIT_SUB:
1767		if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
1768			FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
1769			src2 = TMP_REG2;
1770			flags &= ~SRC2_IMM;
1771		}
1772
1773		if (flags & SRC2_IMM) {
1774			if (op & SLJIT_SET_O) {
1775				FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
1776
1777				if (src2 < 0)
1778					FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
1779
1780				if (src1 != dst)
1781					overflow_ra = reg_map[src1];
1782				else {
1783					/* Rare ocasion. */
1784					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1785					overflow_ra = TMP_EREG2;
1786				}
1787			}
1788
1789			if (op & SLJIT_SET_E)
1790				FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
1791
1792			if (op & SLJIT_SET_C) {
1793				FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
1794				FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
1795			}
1796
1797			/* dst may be the same as src1 or src2. */
1798			if (CHECK_FLAGS(SLJIT_SET_E))
1799				FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
1800
1801		} else {
1802
1803			if (op & SLJIT_SET_O) {
1804				FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
1805				FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
1806
1807				if (src1 != dst)
1808					overflow_ra = reg_map[src1];
1809				else {
1810					/* Rare ocasion. */
1811					FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
1812					overflow_ra = TMP_EREG2;
1813				}
1814			}
1815
1816			if (op & SLJIT_SET_E)
1817				FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
1818
1819			if (op & (SLJIT_SET_U | SLJIT_SET_C))
1820				FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
1821
1822			if (op & SLJIT_SET_U)
1823				FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
1824
1825			if (op & SLJIT_SET_S) {
1826				FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
1827				FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
1828			}
1829
1830			/* dst may be the same as src1 or src2. */
1831			if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
1832				FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
1833		}
1834
1835		if (op & SLJIT_SET_O) {
1836			FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
1837			FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
1838			return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
1839		}
1840
1841		return SLJIT_SUCCESS;
1842
1843	case SLJIT_SUBC:
1844		if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
1845			FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
1846			src2 = TMP_REG2;
1847			flags &= ~SRC2_IMM;
1848		}
1849
1850		if (flags & SRC2_IMM) {
1851			if (op & SLJIT_SET_C) {
1852				FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
1853				FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
1854			}
1855
1856			/* dst may be the same as src1 or src2. */
1857			FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
1858
1859		} else {
1860			if (op & SLJIT_SET_C)
1861				FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
1862				/* dst may be the same as src1 or src2. */
1863			FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
1864		}
1865
1866		if (op & SLJIT_SET_C)
1867			FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
1868
1869		FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
1870
1871		if (op & SLJIT_SET_C)
1872			FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
1873
1874		return SLJIT_SUCCESS;
1875
1876	case SLJIT_MUL:
1877		if (flags & SRC2_IMM) {
1878			FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2));
1879			src2 = TMP_REG2;
1880			flags &= ~SRC2_IMM;
1881		}
1882
1883		FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2]));
1884
1885		return SLJIT_SUCCESS;
1886
1887#define EMIT_LOGICAL(op_imm, op_norm) \
1888	if (flags & SRC2_IMM) { \
1889		FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
1890		if (op & SLJIT_SET_E) \
1891			FAIL_IF(push_3_buffer( \
1892				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1893				ADDR_TMP_mapped, __LINE__)); \
1894		if (CHECK_FLAGS(SLJIT_SET_E)) \
1895			FAIL_IF(push_3_buffer( \
1896				compiler, op_norm, reg_map[dst], reg_map[src1], \
1897				ADDR_TMP_mapped, __LINE__)); \
1898	} else { \
1899		if (op & SLJIT_SET_E) \
1900			FAIL_IF(push_3_buffer( \
1901				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1902				reg_map[src2], __LINE__)); \
1903		if (CHECK_FLAGS(SLJIT_SET_E)) \
1904			FAIL_IF(push_3_buffer( \
1905				compiler, op_norm, reg_map[dst], reg_map[src1], \
1906				reg_map[src2], __LINE__)); \
1907	}
1908
1909	case SLJIT_AND:
1910		EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
1911		return SLJIT_SUCCESS;
1912
1913	case SLJIT_OR:
1914		EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
1915		return SLJIT_SUCCESS;
1916
1917	case SLJIT_XOR:
1918		EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
1919		return SLJIT_SUCCESS;
1920
1921#define EMIT_SHIFT(op_imm, op_norm) \
1922	if (flags & SRC2_IMM) { \
1923		if (op & SLJIT_SET_E) \
1924			FAIL_IF(push_3_buffer( \
1925				compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
1926				src2 & 0x3F, __LINE__)); \
1927		if (CHECK_FLAGS(SLJIT_SET_E)) \
1928			FAIL_IF(push_3_buffer( \
1929				compiler, op_imm, reg_map[dst], reg_map[src1], \
1930				src2 & 0x3F, __LINE__)); \
1931	} else { \
1932		if (op & SLJIT_SET_E) \
1933			FAIL_IF(push_3_buffer( \
1934				compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
1935				reg_map[src2], __LINE__)); \
1936		if (CHECK_FLAGS(SLJIT_SET_E)) \
1937			FAIL_IF(push_3_buffer( \
1938				compiler, op_norm, reg_map[dst], reg_map[src1], \
1939				reg_map[src2], __LINE__)); \
1940	}
1941
1942	case SLJIT_SHL:
1943		EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
1944		return SLJIT_SUCCESS;
1945
1946	case SLJIT_LSHR:
1947		EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
1948		return SLJIT_SUCCESS;
1949
1950	case SLJIT_ASHR:
1951		EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
1952		return SLJIT_SUCCESS;
1953	}
1954
1955	SLJIT_ASSERT_STOP();
1956	return SLJIT_SUCCESS;
1957}
1958
1959static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
1960{
1961	/* arg1 goes to TMP_REG1 or src reg.
1962	   arg2 goes to TMP_REG2, imm or src reg.
1963	   TMP_REG3 can be used for caching.
1964	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1965	sljit_s32 dst_r = TMP_REG2;
1966	sljit_s32 src1_r;
1967	sljit_sw src2_r = 0;
1968	sljit_s32 sugg_src2_r = TMP_REG2;
1969
1970	if (!(flags & ALT_KEEP_CACHE)) {
1971		compiler->cache_arg = 0;
1972		compiler->cache_argw = 0;
1973	}
1974
1975	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1976		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
1977			return SLJIT_SUCCESS;
1978		if (GET_FLAGS(op))
1979			flags |= UNUSED_DEST;
1980	} else if (FAST_IS_REG(dst)) {
1981		dst_r = dst;
1982		flags |= REG_DEST;
1983		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1984			sugg_src2_r = dst_r;
1985	} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
1986		flags |= SLOW_DEST;
1987
1988	if (flags & IMM_OP) {
1989		if ((src2 & SLJIT_IMM) && src2w) {
1990			if ((!(flags & LOGICAL_OP)
1991					&& (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
1992					|| ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
1993				flags |= SRC2_IMM;
1994				src2_r = src2w;
1995			}
1996		}
1997
1998		if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
1999			if ((!(flags & LOGICAL_OP)
2000					&& (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
2001					|| ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
2002				flags |= SRC2_IMM;
2003				src2_r = src1w;
2004
2005				/* And swap arguments. */
2006				src1 = src2;
2007				src1w = src2w;
2008				src2 = SLJIT_IMM;
2009				/* src2w = src2_r unneeded. */
2010			}
2011		}
2012	}
2013
2014	/* Source 1. */
2015	if (FAST_IS_REG(src1)) {
2016		src1_r = src1;
2017		flags |= REG1_SOURCE;
2018	} else if (src1 & SLJIT_IMM) {
2019		if (src1w) {
2020			FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
2021			src1_r = TMP_REG1;
2022		} else
2023			src1_r = 0;
2024	} else {
2025		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
2026			FAIL_IF(compiler->error);
2027		else
2028			flags |= SLOW_SRC1;
2029		src1_r = TMP_REG1;
2030	}
2031
2032	/* Source 2. */
2033	if (FAST_IS_REG(src2)) {
2034		src2_r = src2;
2035		flags |= REG2_SOURCE;
2036		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
2037			dst_r = src2_r;
2038	} else if (src2 & SLJIT_IMM) {
2039		if (!(flags & SRC2_IMM)) {
2040			if (src2w) {
2041				FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
2042				src2_r = sugg_src2_r;
2043			} else {
2044				src2_r = 0;
2045				if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
2046					dst_r = 0;
2047			}
2048		}
2049	} else {
2050		if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
2051			FAIL_IF(compiler->error);
2052		else
2053			flags |= SLOW_SRC2;
2054		src2_r = sugg_src2_r;
2055	}
2056
2057	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2058		SLJIT_ASSERT(src2_r == TMP_REG2);
2059		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2060			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
2061			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
2062		} else {
2063			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
2064			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
2065		}
2066	} else if (flags & SLOW_SRC1)
2067		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
2068	else if (flags & SLOW_SRC2)
2069		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
2070
2071	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
2072
2073	if (dst & SLJIT_MEM) {
2074		if (!(flags & SLOW_DEST)) {
2075			getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
2076			return compiler->error;
2077		}
2078
2079		return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
2080	}
2081
2082	return SLJIT_SUCCESS;
2083}
2084
2085SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw, sljit_s32 type)
2086{
2087	sljit_s32 sugg_dst_ar, dst_ar;
2088	sljit_s32 flags = GET_ALL_FLAGS(op);
2089	sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2090
2091	CHECK_ERROR();
2092	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2093	ADJUST_LOCAL_OFFSET(dst, dstw);
2094
2095	if (dst == SLJIT_UNUSED)
2096		return SLJIT_SUCCESS;
2097
2098	op = GET_OPCODE(op);
2099	if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
2100		mem_type = INT_DATA | SIGNED_DATA;
2101	sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
2102
2103	compiler->cache_arg = 0;
2104	compiler->cache_argw = 0;
2105	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2106		ADJUST_LOCAL_OFFSET(src, srcw);
2107		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
2108		src = TMP_REG1;
2109		srcw = 0;
2110	}
2111
2112	switch (type & 0xff) {
2113	case SLJIT_EQUAL:
2114	case SLJIT_NOT_EQUAL:
2115		FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
2116		dst_ar = sugg_dst_ar;
2117		break;
2118	case SLJIT_LESS:
2119	case SLJIT_GREATER_EQUAL:
2120		dst_ar = ULESS_FLAG;
2121		break;
2122	case SLJIT_GREATER:
2123	case SLJIT_LESS_EQUAL:
2124		dst_ar = UGREATER_FLAG;
2125		break;
2126	case SLJIT_SIG_LESS:
2127	case SLJIT_SIG_GREATER_EQUAL:
2128		dst_ar = LESS_FLAG;
2129		break;
2130	case SLJIT_SIG_GREATER:
2131	case SLJIT_SIG_LESS_EQUAL:
2132		dst_ar = GREATER_FLAG;
2133		break;
2134	case SLJIT_OVERFLOW:
2135	case SLJIT_NOT_OVERFLOW:
2136		dst_ar = OVERFLOW_FLAG;
2137		break;
2138	case SLJIT_MUL_OVERFLOW:
2139	case SLJIT_MUL_NOT_OVERFLOW:
2140		FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
2141		dst_ar = sugg_dst_ar;
2142		type ^= 0x1; /* Flip type bit for the XORI below. */
2143		break;
2144
2145	default:
2146		SLJIT_ASSERT_STOP();
2147		dst_ar = sugg_dst_ar;
2148		break;
2149	}
2150
2151	if (type & 0x1) {
2152		FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
2153		dst_ar = sugg_dst_ar;
2154	}
2155
2156	if (op >= SLJIT_ADD) {
2157		if (TMP_REG2_mapped != dst_ar)
2158			FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
2159		return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
2160	}
2161
2162	if (dst & SLJIT_MEM)
2163		return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
2164
2165	if (sugg_dst_ar != dst_ar)
2166		return ADD(sugg_dst_ar, dst_ar, ZERO);
2167
2168	return SLJIT_SUCCESS;
2169}
2170
2171SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) {
2172	CHECK_ERROR();
2173	CHECK(check_sljit_emit_op0(compiler, op));
2174
2175	op = GET_OPCODE(op);
2176	switch (op) {
2177	case SLJIT_NOP:
2178		return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
2179
2180	case SLJIT_BREAKPOINT:
2181		return PI(BPT);
2182
2183	case SLJIT_LMUL_UW:
2184	case SLJIT_LMUL_SW:
2185	case SLJIT_DIVMOD_UW:
2186	case SLJIT_DIVMOD_SW:
2187	case SLJIT_DIV_UW:
2188	case SLJIT_DIV_SW:
2189		SLJIT_ASSERT_STOP();
2190	}
2191
2192	return SLJIT_SUCCESS;
2193}
2194
2195SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
2196{
2197	CHECK_ERROR();
2198	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2199	ADJUST_LOCAL_OFFSET(dst, dstw);
2200	ADJUST_LOCAL_OFFSET(src, srcw);
2201
2202	switch (GET_OPCODE(op)) {
2203	case SLJIT_MOV:
2204	case SLJIT_MOV_P:
2205		return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2206
2207	case SLJIT_MOV_U32:
2208		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2209
2210	case SLJIT_MOV_S32:
2211		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2212
2213	case SLJIT_MOV_U8:
2214		return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw);
2215
2216	case SLJIT_MOV_S8:
2217		return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw);
2218
2219	case SLJIT_MOV_U16:
2220		return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw);
2221
2222	case SLJIT_MOV_S16:
2223		return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw);
2224
2225	case SLJIT_MOVU:
2226	case SLJIT_MOVU_P:
2227		return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2228
2229	case SLJIT_MOVU_U32:
2230		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2231
2232	case SLJIT_MOVU_S32:
2233		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
2234
2235	case SLJIT_MOVU_U8:
2236		return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw);
2237
2238	case SLJIT_MOVU_S8:
2239		return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw);
2240
2241	case SLJIT_MOVU_U16:
2242		return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw);
2243
2244	case SLJIT_MOVU_S16:
2245		return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw);
2246
2247	case SLJIT_NOT:
2248		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2249
2250	case SLJIT_NEG:
2251		return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
2252
2253	case SLJIT_CLZ:
2254		return emit_op(compiler, op, (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
2255	}
2256
2257	return SLJIT_SUCCESS;
2258}
2259
2260SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
2261{
2262	CHECK_ERROR();
2263	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2264	ADJUST_LOCAL_OFFSET(dst, dstw);
2265	ADJUST_LOCAL_OFFSET(src1, src1w);
2266	ADJUST_LOCAL_OFFSET(src2, src2w);
2267
2268	switch (GET_OPCODE(op)) {
2269	case SLJIT_ADD:
2270	case SLJIT_ADDC:
2271		return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2272
2273	case SLJIT_SUB:
2274	case SLJIT_SUBC:
2275		return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2276
2277	case SLJIT_MUL:
2278		return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
2279
2280	case SLJIT_AND:
2281	case SLJIT_OR:
2282	case SLJIT_XOR:
2283		return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2284
2285	case SLJIT_SHL:
2286	case SLJIT_LSHR:
2287	case SLJIT_ASHR:
2288		if (src2 & SLJIT_IMM)
2289			src2w &= 0x3f;
2290		if (op & SLJIT_I32_OP)
2291			src2w &= 0x1f;
2292
2293		return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2294	}
2295
2296	return SLJIT_SUCCESS;
2297}
2298
2299SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
2300{
2301	struct sljit_label *label;
2302
2303	flush_buffer(compiler);
2304
2305	CHECK_ERROR_PTR();
2306	CHECK_PTR(check_sljit_emit_label(compiler));
2307
2308	if (compiler->last_label && compiler->last_label->size == compiler->size)
2309		return compiler->last_label;
2310
2311	label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
2312	PTR_FAIL_IF(!label);
2313	set_label(label, compiler);
2314	return label;
2315}
2316
2317SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2318{
2319	sljit_s32 src_r = TMP_REG2;
2320	struct sljit_jump *jump = NULL;
2321
2322	flush_buffer(compiler);
2323
2324	CHECK_ERROR();
2325	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2326	ADJUST_LOCAL_OFFSET(src, srcw);
2327
2328	if (FAST_IS_REG(src)) {
2329		if (reg_map[src] != 0)
2330			src_r = src;
2331		else
2332			FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
2333	}
2334
2335	if (type >= SLJIT_CALL0) {
2336		SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
2337		if (src & (SLJIT_IMM | SLJIT_MEM)) {
2338			if (src & SLJIT_IMM)
2339				FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
2340			else {
2341				SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
2342				FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2343			}
2344
2345			FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
2346
2347			FAIL_IF(ADDI_SOLO(54, 54, -16));
2348
2349			FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
2350
2351			return ADDI_SOLO(54, 54, 16);
2352		}
2353
2354		/* Register input. */
2355		if (type >= SLJIT_CALL1)
2356			FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
2357
2358		FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
2359
2360		FAIL_IF(ADDI_SOLO(54, 54, -16));
2361
2362		FAIL_IF(JALR_SOLO(reg_map[src_r]));
2363
2364		return ADDI_SOLO(54, 54, 16);
2365	}
2366
2367	if (src & SLJIT_IMM) {
2368		jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
2369		FAIL_IF(!jump);
2370		set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
2371		jump->u.target = srcw;
2372		FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
2373
2374		if (type >= SLJIT_FAST_CALL) {
2375			FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
2376			jump->addr = compiler->size;
2377			FAIL_IF(JR_SOLO(reg_map[src_r]));
2378		} else {
2379			jump->addr = compiler->size;
2380			FAIL_IF(JR_SOLO(reg_map[src_r]));
2381		}
2382
2383		return SLJIT_SUCCESS;
2384
2385	} else if (src & SLJIT_MEM) {
2386		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2387		flush_buffer(compiler);
2388	}
2389
2390	FAIL_IF(JR_SOLO(reg_map[src_r]));
2391
2392	if (jump)
2393		jump->addr = compiler->size;
2394
2395	return SLJIT_SUCCESS;
2396}
2397
2398#define BR_Z(src) \
2399	inst = BEQZ_X1 | SRCA_X1(src); \
2400	flags = IS_COND;
2401
2402#define BR_NZ(src) \
2403	inst = BNEZ_X1 | SRCA_X1(src); \
2404	flags = IS_COND;
2405
2406SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2407{
2408	struct sljit_jump *jump;
2409	sljit_ins inst;
2410	sljit_s32 flags = 0;
2411
2412	flush_buffer(compiler);
2413
2414	CHECK_ERROR_PTR();
2415	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2416
2417	jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
2418	PTR_FAIL_IF(!jump);
2419	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2420	type &= 0xff;
2421
2422	switch (type) {
2423	case SLJIT_EQUAL:
2424		BR_NZ(EQUAL_FLAG);
2425		break;
2426	case SLJIT_NOT_EQUAL:
2427		BR_Z(EQUAL_FLAG);
2428		break;
2429	case SLJIT_LESS:
2430		BR_Z(ULESS_FLAG);
2431		break;
2432	case SLJIT_GREATER_EQUAL:
2433		BR_NZ(ULESS_FLAG);
2434		break;
2435	case SLJIT_GREATER:
2436		BR_Z(UGREATER_FLAG);
2437		break;
2438	case SLJIT_LESS_EQUAL:
2439		BR_NZ(UGREATER_FLAG);
2440		break;
2441	case SLJIT_SIG_LESS:
2442		BR_Z(LESS_FLAG);
2443		break;
2444	case SLJIT_SIG_GREATER_EQUAL:
2445		BR_NZ(LESS_FLAG);
2446		break;
2447	case SLJIT_SIG_GREATER:
2448		BR_Z(GREATER_FLAG);
2449		break;
2450	case SLJIT_SIG_LESS_EQUAL:
2451		BR_NZ(GREATER_FLAG);
2452		break;
2453	case SLJIT_OVERFLOW:
2454	case SLJIT_MUL_OVERFLOW:
2455		BR_Z(OVERFLOW_FLAG);
2456		break;
2457	case SLJIT_NOT_OVERFLOW:
2458	case SLJIT_MUL_NOT_OVERFLOW:
2459		BR_NZ(OVERFLOW_FLAG);
2460		break;
2461	default:
2462		/* Not conditional branch. */
2463		inst = 0;
2464		break;
2465	}
2466
2467	jump->flags |= flags;
2468
2469	if (inst) {
2470		inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
2471		PTR_FAIL_IF(PI(inst));
2472	}
2473
2474	PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
2475	if (type <= SLJIT_JUMP) {
2476		jump->addr = compiler->size;
2477		PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
2478	} else {
2479		SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
2480		/* Cannot be optimized out if type is >= CALL0. */
2481		jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
2482		PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
2483		jump->addr = compiler->size;
2484		PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
2485	}
2486
2487	return jump;
2488}
2489
2490SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
2491{
2492	return 0;
2493}
2494
2495SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
2496{
2497	SLJIT_ASSERT_STOP();
2498}
2499
2500SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
2501{
2502	SLJIT_ASSERT_STOP();
2503}
2504
2505SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2506{
2507	struct sljit_const *const_;
2508	sljit_s32 reg;
2509
2510	flush_buffer(compiler);
2511
2512	CHECK_ERROR_PTR();
2513	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2514	ADJUST_LOCAL_OFFSET(dst, dstw);
2515
2516	const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
2517	PTR_FAIL_IF(!const_);
2518	set_const(const_, compiler);
2519
2520	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2521
2522	PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
2523
2524	if (dst & SLJIT_MEM)
2525		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2526	return const_;
2527}
2528
2529SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2530{
2531	sljit_ins *inst = (sljit_ins *)addr;
2532
2533	inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43);
2534	inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43);
2535	inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43);
2536	SLJIT_CACHE_FLUSH(inst, inst + 3);
2537}
2538
2539SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2540{
2541	sljit_ins *inst = (sljit_ins *)addr;
2542
2543	inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
2544	inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
2545	inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
2546	inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
2547	SLJIT_CACHE_FLUSH(inst, inst + 4);
2548}
2549
2550SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2551{
2552	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2553	return reg_map[reg];
2554}
2555
2556SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2557	void *instruction, sljit_s32 size)
2558{
2559	CHECK_ERROR();
2560	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2561	return SLJIT_ERR_UNSUPPORTED;
2562}
2563
2564