1/*
2 *    Stack-less Just-In-Time compiler
3 *
4 *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 *   1. Redistributions of source code must retain the above copyright notice, this list of
10 *      conditions and the following disclaimer.
11 *
12 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13 *      of conditions and the following disclaimer in the documentation and/or other materials
14 *      provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28{
29	return "PowerPC" SLJIT_CPUINFO;
30}
31
32/* Length of an instruction word.
33   Both for ppc-32 and ppc-64. */
34typedef sljit_ui sljit_ins;
35
36#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38#define SLJIT_PPC_STACK_FRAME_V2 1
39#endif
40
41#ifdef _AIX
42#include <sys/cache.h>
43#endif
44
45#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47#endif
48
49static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
50{
51#ifdef _AIX
52	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
53#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
54#	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
55	/* Cache flush for POWER architecture. */
56	while (from < to) {
57		__asm__ volatile (
58			"clf 0, %0\n"
59			"dcs\n"
60			: : "r"(from)
61		);
62		from++;
63	}
64	__asm__ volatile ( "ics" );
65#	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
66#	error "Cache flush is not implemented for PowerPC/POWER common mode."
67#	else
68	/* Cache flush for PowerPC architecture. */
69	while (from < to) {
70		__asm__ volatile (
71			"dcbf 0, %0\n"
72			"sync\n"
73			"icbi 0, %0\n"
74			: : "r"(from)
75		);
76		from++;
77	}
78	__asm__ volatile ( "isync" );
79#	endif
80#	ifdef __xlc__
81#	warning "This file may fail to compile if -qfuncsect is used"
82#	endif
83#elif defined(__xlc__)
84#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
85#else
86#error "This platform requires a cache flush implementation."
87#endif /* _AIX */
88}
89
90#define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
91#define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
92#define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
93#define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
94
95#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
96#define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
97#else
98#define TMP_CALL_REG	TMP_REG2
99#endif
100
101#define TMP_FREG1	(0)
102#define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
103
104static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
105	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
106};
107
108/* --------------------------------------------------------------------- */
109/*  Instrucion forms                                                     */
110/* --------------------------------------------------------------------- */
111#define D(d)		(reg_map[d] << 21)
112#define S(s)		(reg_map[s] << 21)
113#define A(a)		(reg_map[a] << 16)
114#define B(b)		(reg_map[b] << 11)
115#define C(c)		(reg_map[c] << 6)
116#define FD(fd)		((fd) << 21)
117#define FS(fs)		((fs) << 21)
118#define FA(fa)		((fa) << 16)
119#define FB(fb)		((fb) << 11)
120#define FC(fc)		((fc) << 6)
121#define IMM(imm)	((imm) & 0xffff)
122#define CRD(d)		((d) << 21)
123
124/* Instruction bit sections.
125   OE and Rc flag (see ALT_SET_FLAGS). */
126#define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
127/* Rc flag (see ALT_SET_FLAGS). */
128#define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
129#define HI(opcode)	((opcode) << 26)
130#define LO(opcode)	((opcode) << 1)
131
132#define ADD		(HI(31) | LO(266))
133#define ADDC		(HI(31) | LO(10))
134#define ADDE		(HI(31) | LO(138))
135#define ADDI		(HI(14))
136#define ADDIC		(HI(13))
137#define ADDIS		(HI(15))
138#define ADDME		(HI(31) | LO(234))
139#define AND		(HI(31) | LO(28))
140#define ANDI		(HI(28))
141#define ANDIS		(HI(29))
142#define Bx		(HI(18))
143#define BCx		(HI(16))
144#define BCCTR		(HI(19) | LO(528) | (3 << 11))
145#define BLR		(HI(19) | LO(16) | (0x14 << 21))
146#define CNTLZD		(HI(31) | LO(58))
147#define CNTLZW		(HI(31) | LO(26))
148#define CMP		(HI(31) | LO(0))
149#define CMPI		(HI(11))
150#define CMPL		(HI(31) | LO(32))
151#define CMPLI		(HI(10))
152#define CROR		(HI(19) | LO(449))
153#define DIVD		(HI(31) | LO(489))
154#define DIVDU		(HI(31) | LO(457))
155#define DIVW		(HI(31) | LO(491))
156#define DIVWU		(HI(31) | LO(459))
157#define EXTSB		(HI(31) | LO(954))
158#define EXTSH		(HI(31) | LO(922))
159#define EXTSW		(HI(31) | LO(986))
160#define FABS		(HI(63) | LO(264))
161#define FADD		(HI(63) | LO(21))
162#define FADDS		(HI(59) | LO(21))
163#define FCFID		(HI(63) | LO(846))
164#define FCMPU		(HI(63) | LO(0))
165#define FCTIDZ		(HI(63) | LO(815))
166#define FCTIWZ		(HI(63) | LO(15))
167#define FDIV		(HI(63) | LO(18))
168#define FDIVS		(HI(59) | LO(18))
169#define FMR		(HI(63) | LO(72))
170#define FMUL		(HI(63) | LO(25))
171#define FMULS		(HI(59) | LO(25))
172#define FNEG		(HI(63) | LO(40))
173#define FRSP		(HI(63) | LO(12))
174#define FSUB		(HI(63) | LO(20))
175#define FSUBS		(HI(59) | LO(20))
176#define LD		(HI(58) | 0)
177#define LWZ		(HI(32))
178#define MFCR		(HI(31) | LO(19))
179#define MFLR		(HI(31) | LO(339) | 0x80000)
180#define MFXER		(HI(31) | LO(339) | 0x10000)
181#define MTCTR		(HI(31) | LO(467) | 0x90000)
182#define MTLR		(HI(31) | LO(467) | 0x80000)
183#define MTXER		(HI(31) | LO(467) | 0x10000)
184#define MULHD		(HI(31) | LO(73))
185#define MULHDU		(HI(31) | LO(9))
186#define MULHW		(HI(31) | LO(75))
187#define MULHWU		(HI(31) | LO(11))
188#define MULLD		(HI(31) | LO(233))
189#define MULLI		(HI(7))
190#define MULLW		(HI(31) | LO(235))
191#define NEG		(HI(31) | LO(104))
192#define NOP		(HI(24))
193#define NOR		(HI(31) | LO(124))
194#define OR		(HI(31) | LO(444))
195#define ORI		(HI(24))
196#define ORIS		(HI(25))
197#define RLDICL		(HI(30))
198#define RLWINM		(HI(21))
199#define SLD		(HI(31) | LO(27))
200#define SLW		(HI(31) | LO(24))
201#define SRAD		(HI(31) | LO(794))
202#define SRADI		(HI(31) | LO(413 << 1))
203#define SRAW		(HI(31) | LO(792))
204#define SRAWI		(HI(31) | LO(824))
205#define SRD		(HI(31) | LO(539))
206#define SRW		(HI(31) | LO(536))
207#define STD		(HI(62) | 0)
208#define STDU		(HI(62) | 1)
209#define STDUX		(HI(31) | LO(181))
210#define STFIWX		(HI(31) | LO(983))
211#define STW		(HI(36))
212#define STWU		(HI(37))
213#define STWUX		(HI(31) | LO(183))
214#define SUBF		(HI(31) | LO(40))
215#define SUBFC		(HI(31) | LO(8))
216#define SUBFE		(HI(31) | LO(136))
217#define SUBFIC		(HI(8))
218#define XOR		(HI(31) | LO(316))
219#define XORI		(HI(26))
220#define XORIS		(HI(27))
221
222#define SIMM_MAX	(0x7fff)
223#define SIMM_MIN	(-0x8000)
224#define UIMM_MAX	(0xffff)
225
226#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
227SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
228{
229	sljit_sw* ptrs;
230	if (func_ptr)
231		*func_ptr = (void*)context;
232	ptrs = (sljit_sw*)func;
233	context->addr = addr ? addr : ptrs[0];
234	context->r2 = ptrs[1];
235	context->r11 = ptrs[2];
236}
237#endif
238
239static sljit_si push_inst(struct sljit_compiler *compiler, sljit_ins ins)
240{
241	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
242	FAIL_IF(!ptr);
243	*ptr = ins;
244	compiler->size++;
245	return SLJIT_SUCCESS;
246}
247
248static SLJIT_INLINE sljit_si detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
249{
250	sljit_sw diff;
251	sljit_uw target_addr;
252	sljit_sw extra_jump_flags;
253
254#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
255	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
256		return 0;
257#else
258	if (jump->flags & SLJIT_REWRITABLE_JUMP)
259		return 0;
260#endif
261
262	if (jump->flags & JUMP_ADDR)
263		target_addr = jump->u.target;
264	else {
265		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
266		target_addr = (sljit_uw)(code + jump->u.label->size);
267	}
268
269#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
270	if (jump->flags & IS_CALL)
271		goto keep_address;
272#endif
273
274	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
275
276	extra_jump_flags = 0;
277	if (jump->flags & IS_COND) {
278		if (diff <= 0x7fff && diff >= -0x8000) {
279			jump->flags |= PATCH_B;
280			return 1;
281		}
282		if (target_addr <= 0xffff) {
283			jump->flags |= PATCH_B | PATCH_ABS_B;
284			return 1;
285		}
286		extra_jump_flags = REMOVE_COND;
287
288		diff -= sizeof(sljit_ins);
289	}
290
291	if (diff <= 0x01ffffff && diff >= -0x02000000) {
292		jump->flags |= PATCH_B | extra_jump_flags;
293		return 1;
294	}
295	if (target_addr <= 0x03ffffff) {
296		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
297		return 1;
298	}
299
300#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
301#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
302keep_address:
303#endif
304	if (target_addr <= 0x7fffffff) {
305		jump->flags |= PATCH_ABS32;
306		return 1;
307	}
308	if (target_addr <= 0x7fffffffffffl) {
309		jump->flags |= PATCH_ABS48;
310		return 1;
311	}
312#endif
313
314	return 0;
315}
316
317SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
318{
319	struct sljit_memory_fragment *buf;
320	sljit_ins *code;
321	sljit_ins *code_ptr;
322	sljit_ins *buf_ptr;
323	sljit_ins *buf_end;
324	sljit_uw word_count;
325	sljit_uw addr;
326
327	struct sljit_label *label;
328	struct sljit_jump *jump;
329	struct sljit_const *const_;
330
331	CHECK_ERROR_PTR();
332	check_sljit_generate_code(compiler);
333	reverse_buf(compiler);
334
335#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
336#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
337	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
338#else
339	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
340#endif
341#endif
342	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
343	PTR_FAIL_WITH_EXEC_IF(code);
344	buf = compiler->buf;
345
346	code_ptr = code;
347	word_count = 0;
348	label = compiler->labels;
349	jump = compiler->jumps;
350	const_ = compiler->consts;
351	do {
352		buf_ptr = (sljit_ins*)buf->memory;
353		buf_end = buf_ptr + (buf->used_size >> 2);
354		do {
355			*code_ptr = *buf_ptr++;
356			SLJIT_ASSERT(!label || label->size >= word_count);
357			SLJIT_ASSERT(!jump || jump->addr >= word_count);
358			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
359			/* These structures are ordered by their address. */
360			if (label && label->size == word_count) {
361				/* Just recording the address. */
362				label->addr = (sljit_uw)code_ptr;
363				label->size = code_ptr - code;
364				label = label->next;
365			}
366			if (jump && jump->addr == word_count) {
367#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
368				jump->addr = (sljit_uw)(code_ptr - 3);
369#else
370				jump->addr = (sljit_uw)(code_ptr - 6);
371#endif
372				if (detect_jump_type(jump, code_ptr, code)) {
373#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
374					code_ptr[-3] = code_ptr[0];
375					code_ptr -= 3;
376#else
377					if (jump->flags & PATCH_ABS32) {
378						code_ptr -= 3;
379						code_ptr[-1] = code_ptr[2];
380						code_ptr[0] = code_ptr[3];
381					}
382					else if (jump->flags & PATCH_ABS48) {
383						code_ptr--;
384						code_ptr[-1] = code_ptr[0];
385						code_ptr[0] = code_ptr[1];
386						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
387						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
388						code_ptr[-3] ^= 0x8422;
389						/* oris -> ori */
390						code_ptr[-2] ^= 0x4000000;
391					}
392					else {
393						code_ptr[-6] = code_ptr[0];
394						code_ptr -= 6;
395					}
396#endif
397					if (jump->flags & REMOVE_COND) {
398						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
399						code_ptr++;
400						jump->addr += sizeof(sljit_ins);
401						code_ptr[0] = Bx;
402						jump->flags -= IS_COND;
403					}
404				}
405				jump = jump->next;
406			}
407			if (const_ && const_->addr == word_count) {
408				const_->addr = (sljit_uw)code_ptr;
409				const_ = const_->next;
410			}
411			code_ptr ++;
412			word_count ++;
413		} while (buf_ptr < buf_end);
414
415		buf = buf->next;
416	} while (buf);
417
418	if (label && label->size == word_count) {
419		label->addr = (sljit_uw)code_ptr;
420		label->size = code_ptr - code;
421		label = label->next;
422	}
423
424	SLJIT_ASSERT(!label);
425	SLJIT_ASSERT(!jump);
426	SLJIT_ASSERT(!const_);
427#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
428	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
429#else
430	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
431#endif
432
433	jump = compiler->jumps;
434	while (jump) {
435		do {
436			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
437			buf_ptr = (sljit_ins*)jump->addr;
438			if (jump->flags & PATCH_B) {
439				if (jump->flags & IS_COND) {
440					if (!(jump->flags & PATCH_ABS_B)) {
441						addr = addr - jump->addr;
442						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
443						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
444					}
445					else {
446						SLJIT_ASSERT(addr <= 0xffff);
447						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
448					}
449				}
450				else {
451					if (!(jump->flags & PATCH_ABS_B)) {
452						addr = addr - jump->addr;
453						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
454						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
455					}
456					else {
457						SLJIT_ASSERT(addr <= 0x03ffffff);
458						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
459					}
460				}
461				break;
462			}
463			/* Set the fields of immediate loads. */
464#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
465			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
466			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
467#else
468			if (jump->flags & PATCH_ABS32) {
469				SLJIT_ASSERT(addr <= 0x7fffffff);
470				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
471				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
472				break;
473			}
474			if (jump->flags & PATCH_ABS48) {
475				SLJIT_ASSERT(addr <= 0x7fffffffffff);
476				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
477				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
478				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
479				break;
480			}
481			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
482			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
483			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
484			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
485#endif
486		} while (0);
487		jump = jump->next;
488	}
489
490	compiler->error = SLJIT_ERR_COMPILED;
491	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
492	SLJIT_CACHE_FLUSH(code, code_ptr);
493
494#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
495#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
496	if (((sljit_sw)code_ptr) & 0x4)
497		code_ptr++;
498	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
499	return code_ptr;
500#else
501	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
502	return code_ptr;
503#endif
504#else
505	return code;
506#endif
507}
508
509/* --------------------------------------------------------------------- */
510/*  Entry, exit                                                          */
511/* --------------------------------------------------------------------- */
512
513/* inp_flags: */
514
515/* Creates an index in data_transfer_insts array. */
516#define LOAD_DATA	0x01
517#define INDEXED		0x02
518#define WRITE_BACK	0x04
519#define WORD_DATA	0x00
520#define BYTE_DATA	0x08
521#define HALF_DATA	0x10
522#define INT_DATA	0x18
523#define SIGNED_DATA	0x20
524/* Separates integer and floating point registers */
525#define GPR_REG		0x3f
526#define DOUBLE_DATA	0x40
527
528#define MEM_MASK	0x7f
529
530/* Other inp_flags. */
531
532#define ARG_TEST	0x000100
533/* Integer opertion and set flags -> requires exts on 64 bit systems. */
534#define ALT_SIGN_EXT	0x000200
535/* This flag affects the RC() and OERC() macros. */
536#define ALT_SET_FLAGS	0x000400
537#define ALT_KEEP_CACHE	0x000800
538#define ALT_FORM1	0x010000
539#define ALT_FORM2	0x020000
540#define ALT_FORM3	0x040000
541#define ALT_FORM4	0x080000
542#define ALT_FORM5	0x100000
543#define ALT_FORM6	0x200000
544
545/* Source and destination is register. */
546#define REG_DEST	0x000001
547#define REG1_SOURCE	0x000002
548#define REG2_SOURCE	0x000004
549/* getput_arg_fast returned true. */
550#define FAST_DEST	0x000008
551/* Multiple instructions are required. */
552#define SLOW_DEST	0x000010
553/*
554ALT_SIGN_EXT		0x000200
555ALT_SET_FLAGS		0x000400
556ALT_FORM1		0x010000
557...
558ALT_FORM6		0x200000 */
559
560#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
561#include "sljitNativePPC_32.c"
562#else
563#include "sljitNativePPC_64.c"
564#endif
565
566#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
567#define STACK_STORE	STW
568#define STACK_LOAD	LWZ
569#else
570#define STACK_STORE	STD
571#define STACK_LOAD	LD
572#endif
573
574SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_enter(struct sljit_compiler *compiler,
575	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
576	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
577{
578	sljit_si i, tmp, offs;
579
580	CHECK_ERROR();
581	check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
582
583	compiler->options = options;
584	compiler->scratches = scratches;
585	compiler->saveds = saveds;
586	compiler->fscratches = fscratches;
587	compiler->fsaveds = fsaveds;
588#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
589	compiler->logical_local_size = local_size;
590#endif
591
592	FAIL_IF(push_inst(compiler, MFLR | D(0)));
593	offs = -(sljit_si)(sizeof(sljit_sw));
594	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
595
596	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
597	for (i = SLJIT_S0; i >= tmp; i--) {
598		offs -= (sljit_si)(sizeof(sljit_sw));
599		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
600	}
601
602	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
603		offs -= (sljit_si)(sizeof(sljit_sw));
604		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
605	}
606
607	SLJIT_ASSERT(offs == -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
608
609#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
610	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
611#else
612	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
613#endif
614
615	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
616	if (args >= 1)
617		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
618	if (args >= 2)
619		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
620	if (args >= 3)
621		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
622
623	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
624	local_size = (local_size + 15) & ~0xf;
625	compiler->local_size = local_size;
626
627#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
628	if (local_size <= SIMM_MAX)
629		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
630	else {
631		FAIL_IF(load_immediate(compiler, 0, -local_size));
632		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
633	}
634#else
635	if (local_size <= SIMM_MAX)
636		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
637	else {
638		FAIL_IF(load_immediate(compiler, 0, -local_size));
639		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
640	}
641#endif
642
643	return SLJIT_SUCCESS;
644}
645
646SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler,
647	sljit_si options, sljit_si args, sljit_si scratches, sljit_si saveds,
648	sljit_si fscratches, sljit_si fsaveds, sljit_si local_size)
649{
650	CHECK_ERROR_VOID();
651	check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
652
653	compiler->options = options;
654	compiler->scratches = scratches;
655	compiler->saveds = saveds;
656	compiler->fscratches = fscratches;
657	compiler->fsaveds = fsaveds;
658#if (defined SLJIT_DEBUG && SLJIT_DEBUG)
659	compiler->logical_local_size = local_size;
660#endif
661
662	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + FIXED_LOCALS_OFFSET;
663	compiler->local_size = (local_size + 15) & ~0xf;
664}
665
666SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_return(struct sljit_compiler *compiler, sljit_si op, sljit_si src, sljit_sw srcw)
667{
668	sljit_si i, tmp, offs;
669
670	CHECK_ERROR();
671	check_sljit_emit_return(compiler, op, src, srcw);
672
673	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
674
675	if (compiler->local_size <= SIMM_MAX)
676		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
677	else {
678		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
679		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
680	}
681
682#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
683	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
684#else
685	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
686#endif
687
688	offs = -(sljit_si)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
689
690	tmp = compiler->scratches;
691	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
692		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
693		offs += (sljit_si)(sizeof(sljit_sw));
694	}
695
696	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
697	for (i = tmp; i <= SLJIT_S0; i++) {
698		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
699		offs += (sljit_si)(sizeof(sljit_sw));
700	}
701
702	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
703	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
704
705	FAIL_IF(push_inst(compiler, MTLR | S(0)));
706	FAIL_IF(push_inst(compiler, BLR));
707
708	return SLJIT_SUCCESS;
709}
710
711#undef STACK_STORE
712#undef STACK_LOAD
713
714/* --------------------------------------------------------------------- */
715/*  Operators                                                            */
716/* --------------------------------------------------------------------- */
717
718/* i/x - immediate/indexed form
719   n/w - no write-back / write-back (1 bit)
720   s/l - store/load (1 bit)
721   u/s - signed/unsigned (1 bit)
722   w/b/h/i - word/byte/half/int allowed (2 bit)
723   It contans 32 items, but not all are different. */
724
725/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
726#define INT_ALIGNED	0x10000
727/* 64-bit only: there is no lwau instruction. */
728#define UPDATE_REQ	0x20000
729
730#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
731#define ARCH_32_64(a, b)	a
732#define INST_CODE_AND_DST(inst, flags, reg) \
733	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
734#else
735#define ARCH_32_64(a, b)	b
736#define INST_CODE_AND_DST(inst, flags, reg) \
737	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
738#endif
739
740static SLJIT_CONST sljit_ins data_transfer_insts[64 + 8] = {
741
742/* -------- Unsigned -------- */
743
744/* Word. */
745
746/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
747/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
748/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
749/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
750
751/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
752/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
753/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
754/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
755
756/* Byte. */
757
758/* u b n i s */ HI(38) /* stb */,
759/* u b n i l */ HI(34) /* lbz */,
760/* u b n x s */ HI(31) | LO(215) /* stbx */,
761/* u b n x l */ HI(31) | LO(87) /* lbzx */,
762
763/* u b w i s */ HI(39) /* stbu */,
764/* u b w i l */ HI(35) /* lbzu */,
765/* u b w x s */ HI(31) | LO(247) /* stbux */,
766/* u b w x l */ HI(31) | LO(119) /* lbzux */,
767
768/* Half. */
769
770/* u h n i s */ HI(44) /* sth */,
771/* u h n i l */ HI(40) /* lhz */,
772/* u h n x s */ HI(31) | LO(407) /* sthx */,
773/* u h n x l */ HI(31) | LO(279) /* lhzx */,
774
775/* u h w i s */ HI(45) /* sthu */,
776/* u h w i l */ HI(41) /* lhzu */,
777/* u h w x s */ HI(31) | LO(439) /* sthux */,
778/* u h w x l */ HI(31) | LO(311) /* lhzux */,
779
780/* Int. */
781
782/* u i n i s */ HI(36) /* stw */,
783/* u i n i l */ HI(32) /* lwz */,
784/* u i n x s */ HI(31) | LO(151) /* stwx */,
785/* u i n x l */ HI(31) | LO(23) /* lwzx */,
786
787/* u i w i s */ HI(37) /* stwu */,
788/* u i w i l */ HI(33) /* lwzu */,
789/* u i w x s */ HI(31) | LO(183) /* stwux */,
790/* u i w x l */ HI(31) | LO(55) /* lwzux */,
791
792/* -------- Signed -------- */
793
794/* Word. */
795
796/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
797/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
798/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
799/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
800
801/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
802/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
803/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
804/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
805
806/* Byte. */
807
808/* s b n i s */ HI(38) /* stb */,
809/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
810/* s b n x s */ HI(31) | LO(215) /* stbx */,
811/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
812
813/* s b w i s */ HI(39) /* stbu */,
814/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
815/* s b w x s */ HI(31) | LO(247) /* stbux */,
816/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
817
818/* Half. */
819
820/* s h n i s */ HI(44) /* sth */,
821/* s h n i l */ HI(42) /* lha */,
822/* s h n x s */ HI(31) | LO(407) /* sthx */,
823/* s h n x l */ HI(31) | LO(343) /* lhax */,
824
825/* s h w i s */ HI(45) /* sthu */,
826/* s h w i l */ HI(43) /* lhau */,
827/* s h w x s */ HI(31) | LO(439) /* sthux */,
828/* s h w x l */ HI(31) | LO(375) /* lhaux */,
829
830/* Int. */
831
832/* s i n i s */ HI(36) /* stw */,
833/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
834/* s i n x s */ HI(31) | LO(151) /* stwx */,
835/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
836
837/* s i w i s */ HI(37) /* stwu */,
838/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
839/* s i w x s */ HI(31) | LO(183) /* stwux */,
840/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
841
842/* -------- Double -------- */
843
844/* d   n i s */ HI(54) /* stfd */,
845/* d   n i l */ HI(50) /* lfd */,
846/* d   n x s */ HI(31) | LO(727) /* stfdx */,
847/* d   n x l */ HI(31) | LO(599) /* lfdx */,
848
849/* s   n i s */ HI(52) /* stfs */,
850/* s   n i l */ HI(48) /* lfs */,
851/* s   n x s */ HI(31) | LO(663) /* stfsx */,
852/* s   n x l */ HI(31) | LO(535) /* lfsx */,
853
854};
855
856#undef ARCH_32_64
857
858/* Simple cases, (no caching is required). */
859static sljit_si getput_arg_fast(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw)
860{
861	sljit_ins inst;
862
863	/* Should work when (arg & REG_MASK) == 0. */
864	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
865	SLJIT_ASSERT(arg & SLJIT_MEM);
866
867	if (arg & OFFS_REG_MASK) {
868		if (argw & 0x3)
869			return 0;
870		if (inp_flags & ARG_TEST)
871			return 1;
872
873		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
874		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
875		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
876		return -1;
877	}
878
879	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
880		inp_flags &= ~WRITE_BACK;
881
882#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
883	inst = data_transfer_insts[inp_flags & MEM_MASK];
884	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
885
886	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
887		return 0;
888	if (inp_flags & ARG_TEST)
889		return 1;
890#endif
891
892#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
893	if (argw > SIMM_MAX || argw < SIMM_MIN)
894		return 0;
895	if (inp_flags & ARG_TEST)
896		return 1;
897
898	inst = data_transfer_insts[inp_flags & MEM_MASK];
899	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
900#endif
901
902	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
903	return -1;
904}
905
906/* See getput_arg below.
907   Note: can_cache is called only for binary operators. Those operator always
908   uses word arguments without write back. */
909static sljit_si can_cache(sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
910{
911	sljit_sw high_short, next_high_short;
912#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
913	sljit_sw diff;
914#endif
915
916	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
917
918	if (arg & OFFS_REG_MASK)
919		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
920
921	if (next_arg & OFFS_REG_MASK)
922		return 0;
923
924#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
925	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
926	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
927	return high_short == next_high_short;
928#else
929	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
930		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
931		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
932		if (high_short == next_high_short)
933			return 1;
934	}
935
936	diff = argw - next_argw;
937	if (!(arg & REG_MASK))
938		return diff <= SIMM_MAX && diff >= SIMM_MIN;
939
940	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
941		return 1;
942
943	return 0;
944#endif
945}
946
947#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
948#define ADJUST_CACHED_IMM(imm) \
949	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
950		/* Adjust cached value. Fortunately this is really a rare case */ \
951		compiler->cache_argw += imm & 0x3; \
952		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
953		imm &= ~0x3; \
954	}
955#endif
956
957/* Emit the necessary instructions. See can_cache above. */
958static sljit_si getput_arg(struct sljit_compiler *compiler, sljit_si inp_flags, sljit_si reg, sljit_si arg, sljit_sw argw, sljit_si next_arg, sljit_sw next_argw)
959{
960	sljit_si tmp_r;
961	sljit_ins inst;
962	sljit_sw high_short, next_high_short;
963#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
964	sljit_sw diff;
965#endif
966
967	SLJIT_ASSERT(arg & SLJIT_MEM);
968
969	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
970	/* Special case for "mov reg, [reg, ... ]". */
971	if ((arg & REG_MASK) == tmp_r)
972		tmp_r = TMP_REG1;
973
974	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
975		argw &= 0x3;
976		/* Otherwise getput_arg_fast would capture it. */
977		SLJIT_ASSERT(argw);
978
979		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
980			tmp_r = TMP_REG3;
981		else {
982			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
983				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
984				compiler->cache_argw = argw;
985				tmp_r = TMP_REG3;
986			}
987#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
988			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
989#else
990			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
991#endif
992		}
993		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
994		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
995		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
996	}
997
998	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
999		inp_flags &= ~WRITE_BACK;
1000
1001	inst = data_transfer_insts[inp_flags & MEM_MASK];
1002	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
1003
1004#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1005	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
1006			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
1007#endif
1008
1009		arg &= REG_MASK;
1010		high_short = (sljit_si)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
1011		/* The getput_arg_fast should handle this otherwise. */
1012#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1013		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
1014#else
1015		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
1016#endif
1017
1018		if (inp_flags & WRITE_BACK) {
1019			if (arg == reg) {
1020				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
1021				reg = tmp_r;
1022			}
1023			tmp_r = arg;
1024			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1025		}
1026		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
1027			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1028				next_high_short = (sljit_si)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1029				if (high_short == next_high_short) {
1030					compiler->cache_arg = SLJIT_MEM | arg;
1031					compiler->cache_argw = high_short;
1032					tmp_r = TMP_REG3;
1033				}
1034			}
1035			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1036		}
1037		else
1038			tmp_r = TMP_REG3;
1039
1040		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1041
1042#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1043	}
1044
1045	/* Everything else is PPC-64 only. */
1046	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1047		diff = argw - compiler->cache_argw;
1048		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1049			ADJUST_CACHED_IMM(diff);
1050			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1051		}
1052
1053		diff = argw - next_argw;
1054		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1055			SLJIT_ASSERT(inp_flags & LOAD_DATA);
1056
1057			compiler->cache_arg = SLJIT_IMM;
1058			compiler->cache_argw = argw;
1059			tmp_r = TMP_REG3;
1060		}
1061
1062		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1063		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1064	}
1065
1066	diff = argw - compiler->cache_argw;
1067	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1068		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1069		ADJUST_CACHED_IMM(diff);
1070		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1071	}
1072
1073	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1074		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1075		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1076		if (compiler->cache_argw != argw) {
1077			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1078			compiler->cache_argw = argw;
1079		}
1080		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1081	}
1082
1083	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1084		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1085		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1086
1087		compiler->cache_arg = SLJIT_IMM;
1088		compiler->cache_argw = argw;
1089
1090		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1091		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1092		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1093	}
1094
1095	diff = argw - next_argw;
1096	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1097		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1098		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1099		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1100
1101		compiler->cache_arg = arg;
1102		compiler->cache_argw = argw;
1103
1104		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1105	}
1106
1107	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1108		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1109		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1110
1111		compiler->cache_arg = SLJIT_IMM;
1112		compiler->cache_argw = argw;
1113		tmp_r = TMP_REG3;
1114	}
1115	else
1116		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1117
1118	/* Get the indexed version instead of the normal one. */
1119	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1120	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1121	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1122#endif
1123}
1124
1125static SLJIT_INLINE sljit_si emit_op_mem2(struct sljit_compiler *compiler, sljit_si flags, sljit_si reg, sljit_si arg1, sljit_sw arg1w, sljit_si arg2, sljit_sw arg2w)
1126{
1127	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1128		return compiler->error;
1129	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1130}
1131
1132static sljit_si emit_op(struct sljit_compiler *compiler, sljit_si op, sljit_si input_flags,
1133	sljit_si dst, sljit_sw dstw,
1134	sljit_si src1, sljit_sw src1w,
1135	sljit_si src2, sljit_sw src2w)
1136{
1137	/* arg1 goes to TMP_REG1 or src reg
1138	   arg2 goes to TMP_REG2, imm or src reg
1139	   TMP_REG3 can be used for caching
1140	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1141	sljit_si dst_r;
1142	sljit_si src1_r;
1143	sljit_si src2_r;
1144	sljit_si sugg_src2_r = TMP_REG2;
1145	sljit_si flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1146
1147	if (!(input_flags & ALT_KEEP_CACHE)) {
1148		compiler->cache_arg = 0;
1149		compiler->cache_argw = 0;
1150	}
1151
1152	/* Destination check. */
1153	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1154		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI && !(src2 & SLJIT_MEM))
1155			return SLJIT_SUCCESS;
1156		dst_r = TMP_REG2;
1157	}
1158	else if (FAST_IS_REG(dst)) {
1159		dst_r = dst;
1160		flags |= REG_DEST;
1161		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1162			sugg_src2_r = dst_r;
1163	}
1164	else {
1165		SLJIT_ASSERT(dst & SLJIT_MEM);
1166		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1167			flags |= FAST_DEST;
1168			dst_r = TMP_REG2;
1169		}
1170		else {
1171			flags |= SLOW_DEST;
1172			dst_r = 0;
1173		}
1174	}
1175
1176	/* Source 1. */
1177	if (FAST_IS_REG(src1)) {
1178		src1_r = src1;
1179		flags |= REG1_SOURCE;
1180	}
1181	else if (src1 & SLJIT_IMM) {
1182		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1183		src1_r = TMP_REG1;
1184	}
1185	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1186		FAIL_IF(compiler->error);
1187		src1_r = TMP_REG1;
1188	}
1189	else
1190		src1_r = 0;
1191
1192	/* Source 2. */
1193	if (FAST_IS_REG(src2)) {
1194		src2_r = src2;
1195		flags |= REG2_SOURCE;
1196		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_SI)
1197			dst_r = src2_r;
1198	}
1199	else if (src2 & SLJIT_IMM) {
1200		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1201		src2_r = sugg_src2_r;
1202	}
1203	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1204		FAIL_IF(compiler->error);
1205		src2_r = sugg_src2_r;
1206	}
1207	else
1208		src2_r = 0;
1209
1210	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
1211	   All arguments are complex addressing modes, and it is a binary operator. */
1212	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1213		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1214			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1215			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1216		}
1217		else {
1218			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1219			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1220		}
1221		src1_r = TMP_REG1;
1222		src2_r = TMP_REG2;
1223	}
1224	else if (src1_r == 0 && src2_r == 0) {
1225		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1226		src1_r = TMP_REG1;
1227	}
1228	else if (src1_r == 0 && dst_r == 0) {
1229		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1230		src1_r = TMP_REG1;
1231	}
1232	else if (src2_r == 0 && dst_r == 0) {
1233		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1234		src2_r = sugg_src2_r;
1235	}
1236
1237	if (dst_r == 0)
1238		dst_r = TMP_REG2;
1239
1240	if (src1_r == 0) {
1241		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1242		src1_r = TMP_REG1;
1243	}
1244
1245	if (src2_r == 0) {
1246		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1247		src2_r = sugg_src2_r;
1248	}
1249
1250	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1251
1252	if (flags & (FAST_DEST | SLOW_DEST)) {
1253		if (flags & FAST_DEST)
1254			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1255		else
1256			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1257	}
1258	return SLJIT_SUCCESS;
1259}
1260
1261SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
1262{
1263#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1264	sljit_si int_op = op & SLJIT_INT_OP;
1265#endif
1266
1267	CHECK_ERROR();
1268	check_sljit_emit_op0(compiler, op);
1269
1270	op = GET_OPCODE(op);
1271	switch (op) {
1272	case SLJIT_BREAKPOINT:
1273	case SLJIT_NOP:
1274		return push_inst(compiler, NOP);
1275	case SLJIT_UMUL:
1276	case SLJIT_SMUL:
1277		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1278#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1279		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1280		return push_inst(compiler, (op == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1281#else
1282		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1283		return push_inst(compiler, (op == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1284#endif
1285	case SLJIT_UDIV:
1286	case SLJIT_SDIV:
1287		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1288#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1289		if (int_op) {
1290			FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1291			FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1292		} else {
1293			FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1294			FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1295		}
1296		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1297#else
1298		FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1299		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1300		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1301#endif
1302	}
1303
1304	return SLJIT_SUCCESS;
1305}
1306
1307#define EMIT_MOV(type, type_flags, type_cast) \
1308	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1309
1310SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1311	sljit_si dst, sljit_sw dstw,
1312	sljit_si src, sljit_sw srcw)
1313{
1314	sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1315	sljit_si op_flags = GET_ALL_FLAGS(op);
1316
1317	CHECK_ERROR();
1318	check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1319	ADJUST_LOCAL_OFFSET(dst, dstw);
1320	ADJUST_LOCAL_OFFSET(src, srcw);
1321
1322	op = GET_OPCODE(op);
1323	if ((src & SLJIT_IMM) && srcw == 0)
1324		src = TMP_ZERO;
1325
1326	if (op_flags & SLJIT_SET_O)
1327		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1328
1329	if (op_flags & SLJIT_INT_OP) {
1330		if (op < SLJIT_NOT) {
1331			if (FAST_IS_REG(src) && src == dst) {
1332				if (!TYPE_CAST_NEEDED(op))
1333					return SLJIT_SUCCESS;
1334			}
1335#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1336			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1337				op = SLJIT_MOV_UI;
1338			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1339				op = SLJIT_MOVU_UI;
1340			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1341				op = SLJIT_MOV_SI;
1342			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1343				op = SLJIT_MOVU_SI;
1344#endif
1345		}
1346#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1347		else {
1348			/* Most operations expect sign extended arguments. */
1349			flags |= INT_DATA | SIGNED_DATA;
1350			if (src & SLJIT_IMM)
1351				srcw = (sljit_si)srcw;
1352		}
1353#endif
1354	}
1355
1356	switch (op) {
1357	case SLJIT_MOV:
1358	case SLJIT_MOV_P:
1359#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1360	case SLJIT_MOV_UI:
1361	case SLJIT_MOV_SI:
1362#endif
1363		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1364
1365#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1366	case SLJIT_MOV_UI:
1367		return EMIT_MOV(SLJIT_MOV_UI, INT_DATA, (sljit_ui));
1368
1369	case SLJIT_MOV_SI:
1370		return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA, (sljit_si));
1371#endif
1372
1373	case SLJIT_MOV_UB:
1374		return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA, (sljit_ub));
1375
1376	case SLJIT_MOV_SB:
1377		return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA, (sljit_sb));
1378
1379	case SLJIT_MOV_UH:
1380		return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA, (sljit_uh));
1381
1382	case SLJIT_MOV_SH:
1383		return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA, (sljit_sh));
1384
1385	case SLJIT_MOVU:
1386	case SLJIT_MOVU_P:
1387#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1388	case SLJIT_MOVU_UI:
1389	case SLJIT_MOVU_SI:
1390#endif
1391		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1392
1393#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1394	case SLJIT_MOVU_UI:
1395		return EMIT_MOV(SLJIT_MOV_UI, INT_DATA | WRITE_BACK, (sljit_ui));
1396
1397	case SLJIT_MOVU_SI:
1398		return EMIT_MOV(SLJIT_MOV_SI, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_si));
1399#endif
1400
1401	case SLJIT_MOVU_UB:
1402		return EMIT_MOV(SLJIT_MOV_UB, BYTE_DATA | WRITE_BACK, (sljit_ub));
1403
1404	case SLJIT_MOVU_SB:
1405		return EMIT_MOV(SLJIT_MOV_SB, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sb));
1406
1407	case SLJIT_MOVU_UH:
1408		return EMIT_MOV(SLJIT_MOV_UH, HALF_DATA | WRITE_BACK, (sljit_uh));
1409
1410	case SLJIT_MOVU_SH:
1411		return EMIT_MOV(SLJIT_MOV_SH, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_sh));
1412
1413	case SLJIT_NOT:
1414		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1415
1416	case SLJIT_NEG:
1417		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1418
1419	case SLJIT_CLZ:
1420#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1421		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_INT_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1422#else
1423		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1424#endif
1425	}
1426
1427	return SLJIT_SUCCESS;
1428}
1429
1430#undef EMIT_MOV
1431
1432#define TEST_SL_IMM(src, srcw) \
1433	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1434
1435#define TEST_UL_IMM(src, srcw) \
1436	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1437
1438#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1439#define TEST_SH_IMM(src, srcw) \
1440	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1441#else
1442#define TEST_SH_IMM(src, srcw) \
1443	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1444#endif
1445
1446#define TEST_UH_IMM(src, srcw) \
1447	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1448
1449#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1450#define TEST_ADD_IMM(src, srcw) \
1451	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1452#else
1453#define TEST_ADD_IMM(src, srcw) \
1454	((src) & SLJIT_IMM)
1455#endif
1456
1457#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1458#define TEST_UI_IMM(src, srcw) \
1459	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1460#else
1461#define TEST_UI_IMM(src, srcw) \
1462	((src) & SLJIT_IMM)
1463#endif
1464
1465SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
1466	sljit_si dst, sljit_sw dstw,
1467	sljit_si src1, sljit_sw src1w,
1468	sljit_si src2, sljit_sw src2w)
1469{
1470	sljit_si flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1471
1472	CHECK_ERROR();
1473	check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1474	ADJUST_LOCAL_OFFSET(dst, dstw);
1475	ADJUST_LOCAL_OFFSET(src1, src1w);
1476	ADJUST_LOCAL_OFFSET(src2, src2w);
1477
1478	if ((src1 & SLJIT_IMM) && src1w == 0)
1479		src1 = TMP_ZERO;
1480	if ((src2 & SLJIT_IMM) && src2w == 0)
1481		src2 = TMP_ZERO;
1482
1483#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1484	if (op & SLJIT_INT_OP) {
1485		/* Most operations expect sign extended arguments. */
1486		flags |= INT_DATA | SIGNED_DATA;
1487		if (src1 & SLJIT_IMM)
1488			src1w = (sljit_si)(src1w);
1489		if (src2 & SLJIT_IMM)
1490			src2w = (sljit_si)(src2w);
1491		if (GET_FLAGS(op))
1492			flags |= ALT_SIGN_EXT;
1493	}
1494#endif
1495	if (op & SLJIT_SET_O)
1496		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1497	if (src2 == TMP_REG2)
1498		flags |= ALT_KEEP_CACHE;
1499
1500	switch (GET_OPCODE(op)) {
1501	case SLJIT_ADD:
1502		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1503			if (TEST_SL_IMM(src2, src2w)) {
1504				compiler->imm = src2w & 0xffff;
1505				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1506			}
1507			if (TEST_SL_IMM(src1, src1w)) {
1508				compiler->imm = src1w & 0xffff;
1509				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1510			}
1511			if (TEST_SH_IMM(src2, src2w)) {
1512				compiler->imm = (src2w >> 16) & 0xffff;
1513				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1514			}
1515			if (TEST_SH_IMM(src1, src1w)) {
1516				compiler->imm = (src1w >> 16) & 0xffff;
1517				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1518			}
1519			/* Range between -1 and -32768 is covered above. */
1520			if (TEST_ADD_IMM(src2, src2w)) {
1521				compiler->imm = src2w & 0xffffffff;
1522				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1523			}
1524			if (TEST_ADD_IMM(src1, src1w)) {
1525				compiler->imm = src1w & 0xffffffff;
1526				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1527			}
1528		}
1529		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1530			if (TEST_SL_IMM(src2, src2w)) {
1531				compiler->imm = src2w & 0xffff;
1532				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1533			}
1534			if (TEST_SL_IMM(src1, src1w)) {
1535				compiler->imm = src1w & 0xffff;
1536				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1537			}
1538		}
1539		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1540
1541	case SLJIT_ADDC:
1542		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1543
1544	case SLJIT_SUB:
1545		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1546			if (TEST_SL_IMM(src2, -src2w)) {
1547				compiler->imm = (-src2w) & 0xffff;
1548				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1549			}
1550			if (TEST_SL_IMM(src1, src1w)) {
1551				compiler->imm = src1w & 0xffff;
1552				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1553			}
1554			if (TEST_SH_IMM(src2, -src2w)) {
1555				compiler->imm = ((-src2w) >> 16) & 0xffff;
1556				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1557			}
1558			/* Range between -1 and -32768 is covered above. */
1559			if (TEST_ADD_IMM(src2, -src2w)) {
1560				compiler->imm = -src2w & 0xffffffff;
1561				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1562			}
1563		}
1564		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1565			if (!(op & SLJIT_SET_U)) {
1566				/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1567				if (TEST_SL_IMM(src2, src2w)) {
1568					compiler->imm = src2w & 0xffff;
1569					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1570				}
1571				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1572					compiler->imm = src1w & 0xffff;
1573					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1574				}
1575			}
1576			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1577				/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1578				if (TEST_UL_IMM(src2, src2w)) {
1579					compiler->imm = src2w & 0xffff;
1580					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1581				}
1582				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1583			}
1584			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1585				compiler->imm = src2w;
1586				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1587			}
1588			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1589		}
1590		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1591			if (TEST_SL_IMM(src2, -src2w)) {
1592				compiler->imm = (-src2w) & 0xffff;
1593				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1594			}
1595		}
1596		/* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */
1597		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1598
1599	case SLJIT_SUBC:
1600		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1601
1602	case SLJIT_MUL:
1603#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1604		if (op & SLJIT_INT_OP)
1605			flags |= ALT_FORM2;
1606#endif
1607		if (!GET_FLAGS(op)) {
1608			if (TEST_SL_IMM(src2, src2w)) {
1609				compiler->imm = src2w & 0xffff;
1610				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1611			}
1612			if (TEST_SL_IMM(src1, src1w)) {
1613				compiler->imm = src1w & 0xffff;
1614				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1615			}
1616		}
1617		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1618
1619	case SLJIT_AND:
1620	case SLJIT_OR:
1621	case SLJIT_XOR:
1622		/* Commutative unsigned operations. */
1623		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1624			if (TEST_UL_IMM(src2, src2w)) {
1625				compiler->imm = src2w;
1626				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1627			}
1628			if (TEST_UL_IMM(src1, src1w)) {
1629				compiler->imm = src1w;
1630				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1631			}
1632			if (TEST_UH_IMM(src2, src2w)) {
1633				compiler->imm = (src2w >> 16) & 0xffff;
1634				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1635			}
1636			if (TEST_UH_IMM(src1, src1w)) {
1637				compiler->imm = (src1w >> 16) & 0xffff;
1638				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1639			}
1640		}
1641		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1642			if (TEST_UI_IMM(src2, src2w)) {
1643				compiler->imm = src2w;
1644				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1645			}
1646			if (TEST_UI_IMM(src1, src1w)) {
1647				compiler->imm = src1w;
1648				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1649			}
1650		}
1651		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1652
1653	case SLJIT_ASHR:
1654		if (op & SLJIT_KEEP_FLAGS)
1655			flags |= ALT_FORM3;
1656		/* Fall through. */
1657	case SLJIT_SHL:
1658	case SLJIT_LSHR:
1659#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1660		if (op & SLJIT_INT_OP)
1661			flags |= ALT_FORM2;
1662#endif
1663		if (src2 & SLJIT_IMM) {
1664			compiler->imm = src2w;
1665			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1666		}
1667		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1668	}
1669
1670	return SLJIT_SUCCESS;
1671}
1672
1673SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
1674{
1675	check_sljit_get_register_index(reg);
1676	return reg_map[reg];
1677}
1678
1679SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
1680{
1681	check_sljit_get_float_register_index(reg);
1682	return reg;
1683}
1684
1685SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
1686	void *instruction, sljit_si size)
1687{
1688	CHECK_ERROR();
1689	check_sljit_emit_op_custom(compiler, instruction, size);
1690	SLJIT_ASSERT(size == 4);
1691
1692	return push_inst(compiler, *(sljit_ins*)instruction);
1693}
1694
1695/* --------------------------------------------------------------------- */
1696/*  Floating point operators                                             */
1697/* --------------------------------------------------------------------- */
1698
1699SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
1700{
1701#ifdef SLJIT_IS_FPU_AVAILABLE
1702	return SLJIT_IS_FPU_AVAILABLE;
1703#else
1704	/* Available by default. */
1705	return 1;
1706#endif
1707}
1708
1709#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_SINGLE_OP) >> 6))
1710#define SELECT_FOP(op, single, double) ((op & SLJIT_SINGLE_OP) ? single : double)
1711
1712#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1713#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1714#else
1715#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1716
1717#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1718#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1719#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1720#else
1721#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1722#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1723#endif
1724
1725#endif /* SLJIT_CONFIG_PPC_64 */
1726
1727static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
1728	sljit_si dst, sljit_sw dstw,
1729	sljit_si src, sljit_sw srcw)
1730{
1731	if (src & SLJIT_MEM) {
1732		/* We can ignore the temporary data store on the stack from caching point of view. */
1733		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1734		src = TMP_FREG1;
1735	}
1736
1737#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1738	op = GET_OPCODE(op);
1739	FAIL_IF(push_inst(compiler, (op == SLJIT_CONVI_FROMD ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1740
1741	if (dst == SLJIT_UNUSED)
1742		return SLJIT_SUCCESS;
1743
1744	if (op == SLJIT_CONVW_FROMD) {
1745		if (FAST_IS_REG(dst)) {
1746			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
1747			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1748		}
1749		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
1750	}
1751
1752#else
1753	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1754
1755	if (dst == SLJIT_UNUSED)
1756		return SLJIT_SUCCESS;
1757#endif
1758
1759	if (FAST_IS_REG(dst)) {
1760		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1761		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1762		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1763	}
1764
1765	SLJIT_ASSERT(dst & SLJIT_MEM);
1766
1767	if (dst & OFFS_REG_MASK) {
1768		dstw &= 0x3;
1769		if (dstw) {
1770#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1771			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1772#else
1773			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1774#endif
1775			dstw = TMP_REG1;
1776		}
1777		else
1778			dstw = OFFS_REG(dst);
1779	}
1780	else {
1781		if ((dst & REG_MASK) && !dstw) {
1782			dstw = dst & REG_MASK;
1783			dst = 0;
1784		}
1785		else {
1786			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1787			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1788			dstw = TMP_REG1;
1789		}
1790	}
1791
1792	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1793}
1794
1795static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
1796	sljit_si dst, sljit_sw dstw,
1797	sljit_si src, sljit_sw srcw)
1798{
1799#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1800
1801	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1802
1803	if (src & SLJIT_IMM) {
1804		if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
1805			srcw = (sljit_si)srcw;
1806		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1807		src = TMP_REG1;
1808	}
1809	else if (GET_OPCODE(op) == SLJIT_CONVD_FROMI) {
1810		if (FAST_IS_REG(src))
1811			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1812		else
1813			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1814		src = TMP_REG1;
1815	}
1816
1817	if (FAST_IS_REG(src)) {
1818		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1819		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
1820	}
1821	else
1822		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1823
1824	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1825
1826	if (dst & SLJIT_MEM)
1827		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1828	if (op & SLJIT_SINGLE_OP)
1829		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1830	return SLJIT_SUCCESS;
1831
1832#else
1833
1834	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1835	sljit_si invert_sign = 1;
1836
1837	if (src & SLJIT_IMM) {
1838		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1839		src = TMP_REG1;
1840		invert_sign = 0;
1841	}
1842	else if (!FAST_IS_REG(src)) {
1843		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1844		src = TMP_REG1;
1845	}
1846
1847	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1848	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1849	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1850	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1851	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
1852	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1853	if (invert_sign)
1854		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1855	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1856	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
1857	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1858	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1859	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1860	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1861
1862	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1863
1864	if (dst & SLJIT_MEM)
1865		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1866	if (op & SLJIT_SINGLE_OP)
1867		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1868	return SLJIT_SUCCESS;
1869
1870#endif
1871}
1872
1873static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
1874	sljit_si src1, sljit_sw src1w,
1875	sljit_si src2, sljit_sw src2w)
1876{
1877	if (src1 & SLJIT_MEM) {
1878		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1879		src1 = TMP_FREG1;
1880	}
1881
1882	if (src2 & SLJIT_MEM) {
1883		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1884		src2 = TMP_FREG2;
1885	}
1886
1887	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1888}
1889
1890SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
1891	sljit_si dst, sljit_sw dstw,
1892	sljit_si src, sljit_sw srcw)
1893{
1894	sljit_si dst_r;
1895
1896	CHECK_ERROR();
1897	compiler->cache_arg = 0;
1898	compiler->cache_argw = 0;
1899
1900	SLJIT_COMPILE_ASSERT((SLJIT_SINGLE_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1901	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1902
1903	if (GET_OPCODE(op) == SLJIT_CONVD_FROMS)
1904		op ^= SLJIT_SINGLE_OP;
1905
1906	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1907
1908	if (src & SLJIT_MEM) {
1909		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1910		src = dst_r;
1911	}
1912
1913	switch (GET_OPCODE(op)) {
1914	case SLJIT_CONVD_FROMS:
1915		op ^= SLJIT_SINGLE_OP;
1916		if (op & SLJIT_SINGLE_OP) {
1917			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1918			break;
1919		}
1920		/* Fall through. */
1921	case SLJIT_MOVD:
1922		if (src != dst_r) {
1923			if (dst_r != TMP_FREG1)
1924				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1925			else
1926				dst_r = src;
1927		}
1928		break;
1929	case SLJIT_NEGD:
1930		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1931		break;
1932	case SLJIT_ABSD:
1933		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1934		break;
1935	}
1936
1937	if (dst & SLJIT_MEM)
1938		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1939	return SLJIT_SUCCESS;
1940}
1941
1942SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
1943	sljit_si dst, sljit_sw dstw,
1944	sljit_si src1, sljit_sw src1w,
1945	sljit_si src2, sljit_sw src2w)
1946{
1947	sljit_si dst_r, flags = 0;
1948
1949	CHECK_ERROR();
1950	check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1951	ADJUST_LOCAL_OFFSET(dst, dstw);
1952	ADJUST_LOCAL_OFFSET(src1, src1w);
1953	ADJUST_LOCAL_OFFSET(src2, src2w);
1954
1955	compiler->cache_arg = 0;
1956	compiler->cache_argw = 0;
1957
1958	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1959
1960	if (src1 & SLJIT_MEM) {
1961		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1962			FAIL_IF(compiler->error);
1963			src1 = TMP_FREG1;
1964		} else
1965			flags |= ALT_FORM1;
1966	}
1967
1968	if (src2 & SLJIT_MEM) {
1969		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1970			FAIL_IF(compiler->error);
1971			src2 = TMP_FREG2;
1972		} else
1973			flags |= ALT_FORM2;
1974	}
1975
1976	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1977		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1978			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1979			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1980		}
1981		else {
1982			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1983			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1984		}
1985	}
1986	else if (flags & ALT_FORM1)
1987		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1988	else if (flags & ALT_FORM2)
1989		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1990
1991	if (flags & ALT_FORM1)
1992		src1 = TMP_FREG1;
1993	if (flags & ALT_FORM2)
1994		src2 = TMP_FREG2;
1995
1996	switch (GET_OPCODE(op)) {
1997	case SLJIT_ADDD:
1998		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
1999		break;
2000
2001	case SLJIT_SUBD:
2002		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
2003		break;
2004
2005	case SLJIT_MULD:
2006		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
2007		break;
2008
2009	case SLJIT_DIVD:
2010		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
2011		break;
2012	}
2013
2014	if (dst_r == TMP_FREG2)
2015		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2016
2017	return SLJIT_SUCCESS;
2018}
2019
2020#undef FLOAT_DATA
2021#undef SELECT_FOP
2022
2023/* --------------------------------------------------------------------- */
2024/*  Other instructions                                                   */
2025/* --------------------------------------------------------------------- */
2026
2027SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw)
2028{
2029	CHECK_ERROR();
2030	check_sljit_emit_fast_enter(compiler, dst, dstw);
2031	ADJUST_LOCAL_OFFSET(dst, dstw);
2032
2033	/* For UNUSED dst. Uncommon, but possible. */
2034	if (dst == SLJIT_UNUSED)
2035		return SLJIT_SUCCESS;
2036
2037	if (FAST_IS_REG(dst))
2038		return push_inst(compiler, MFLR | D(dst));
2039
2040	/* Memory. */
2041	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
2042	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2043}
2044
2045SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_si src, sljit_sw srcw)
2046{
2047	CHECK_ERROR();
2048	check_sljit_emit_fast_return(compiler, src, srcw);
2049	ADJUST_LOCAL_OFFSET(src, srcw);
2050
2051	if (FAST_IS_REG(src))
2052		FAIL_IF(push_inst(compiler, MTLR | S(src)));
2053	else {
2054		if (src & SLJIT_MEM)
2055			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2056		else if (src & SLJIT_IMM)
2057			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2058		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
2059	}
2060	return push_inst(compiler, BLR);
2061}
2062
2063/* --------------------------------------------------------------------- */
2064/*  Conditional instructions                                             */
2065/* --------------------------------------------------------------------- */
2066
2067SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2068{
2069	struct sljit_label *label;
2070
2071	CHECK_ERROR_PTR();
2072	check_sljit_emit_label(compiler);
2073
2074	if (compiler->last_label && compiler->last_label->size == compiler->size)
2075		return compiler->last_label;
2076
2077	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2078	PTR_FAIL_IF(!label);
2079	set_label(label, compiler);
2080	return label;
2081}
2082
2083static sljit_ins get_bo_bi_flags(sljit_si type)
2084{
2085	switch (type) {
2086	case SLJIT_C_EQUAL:
2087		return (12 << 21) | (2 << 16);
2088
2089	case SLJIT_C_NOT_EQUAL:
2090		return (4 << 21) | (2 << 16);
2091
2092	case SLJIT_C_LESS:
2093	case SLJIT_C_FLOAT_LESS:
2094		return (12 << 21) | ((4 + 0) << 16);
2095
2096	case SLJIT_C_GREATER_EQUAL:
2097	case SLJIT_C_FLOAT_GREATER_EQUAL:
2098		return (4 << 21) | ((4 + 0) << 16);
2099
2100	case SLJIT_C_GREATER:
2101	case SLJIT_C_FLOAT_GREATER:
2102		return (12 << 21) | ((4 + 1) << 16);
2103
2104	case SLJIT_C_LESS_EQUAL:
2105	case SLJIT_C_FLOAT_LESS_EQUAL:
2106		return (4 << 21) | ((4 + 1) << 16);
2107
2108	case SLJIT_C_SIG_LESS:
2109		return (12 << 21) | (0 << 16);
2110
2111	case SLJIT_C_SIG_GREATER_EQUAL:
2112		return (4 << 21) | (0 << 16);
2113
2114	case SLJIT_C_SIG_GREATER:
2115		return (12 << 21) | (1 << 16);
2116
2117	case SLJIT_C_SIG_LESS_EQUAL:
2118		return (4 << 21) | (1 << 16);
2119
2120	case SLJIT_C_OVERFLOW:
2121	case SLJIT_C_MUL_OVERFLOW:
2122		return (12 << 21) | (3 << 16);
2123
2124	case SLJIT_C_NOT_OVERFLOW:
2125	case SLJIT_C_MUL_NOT_OVERFLOW:
2126		return (4 << 21) | (3 << 16);
2127
2128	case SLJIT_C_FLOAT_EQUAL:
2129		return (12 << 21) | ((4 + 2) << 16);
2130
2131	case SLJIT_C_FLOAT_NOT_EQUAL:
2132		return (4 << 21) | ((4 + 2) << 16);
2133
2134	case SLJIT_C_FLOAT_UNORDERED:
2135		return (12 << 21) | ((4 + 3) << 16);
2136
2137	case SLJIT_C_FLOAT_ORDERED:
2138		return (4 << 21) | ((4 + 3) << 16);
2139
2140	default:
2141		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
2142		return (20 << 21);
2143	}
2144}
2145
2146SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2147{
2148	struct sljit_jump *jump;
2149	sljit_ins bo_bi_flags;
2150
2151	CHECK_ERROR_PTR();
2152	check_sljit_emit_jump(compiler, type);
2153
2154	bo_bi_flags = get_bo_bi_flags(type & 0xff);
2155	if (!bo_bi_flags)
2156		return NULL;
2157
2158	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2159	PTR_FAIL_IF(!jump);
2160	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2161	type &= 0xff;
2162
2163	/* In PPC, we don't need to touch the arguments. */
2164	if (type < SLJIT_JUMP)
2165		jump->flags |= IS_COND;
2166#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2167	if (type >= SLJIT_CALL0)
2168		jump->flags |= IS_CALL;
2169#endif
2170
2171	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2172	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
2173	jump->addr = compiler->size;
2174	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
2175	return jump;
2176}
2177
2178SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2179{
2180	struct sljit_jump *jump = NULL;
2181	sljit_si src_r;
2182
2183	CHECK_ERROR();
2184	check_sljit_emit_ijump(compiler, type, src, srcw);
2185	ADJUST_LOCAL_OFFSET(src, srcw);
2186
2187	if (FAST_IS_REG(src)) {
2188#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2189		if (type >= SLJIT_CALL0) {
2190			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
2191			src_r = TMP_CALL_REG;
2192		}
2193		else
2194			src_r = src;
2195#else
2196		src_r = src;
2197#endif
2198	} else if (src & SLJIT_IMM) {
2199		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2200		FAIL_IF(!jump);
2201		set_jump(jump, compiler, JUMP_ADDR);
2202		jump->u.target = srcw;
2203#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2204		if (type >= SLJIT_CALL0)
2205			jump->flags |= IS_CALL;
2206#endif
2207		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2208		src_r = TMP_CALL_REG;
2209	}
2210	else {
2211		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2212		src_r = TMP_CALL_REG;
2213	}
2214
2215	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2216	if (jump)
2217		jump->addr = compiler->size;
2218	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2219}
2220
2221/* Get a bit from CR, all other bits are zeroed. */
2222#define GET_CR_BIT(bit, dst) \
2223	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
2224	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
2225
2226#define INVERT_BIT(dst) \
2227	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
2228
2229SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2230	sljit_si dst, sljit_sw dstw,
2231	sljit_si src, sljit_sw srcw,
2232	sljit_si type)
2233{
2234	sljit_si reg, input_flags;
2235	sljit_si flags = GET_ALL_FLAGS(op);
2236	sljit_sw original_dstw = dstw;
2237
2238	CHECK_ERROR();
2239	check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2240	ADJUST_LOCAL_OFFSET(dst, dstw);
2241
2242	if (dst == SLJIT_UNUSED)
2243		return SLJIT_SUCCESS;
2244
2245	op = GET_OPCODE(op);
2246	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2247
2248	compiler->cache_arg = 0;
2249	compiler->cache_argw = 0;
2250	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2251		ADJUST_LOCAL_OFFSET(src, srcw);
2252#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2253		input_flags = (flags & SLJIT_INT_OP) ? INT_DATA : WORD_DATA;
2254#else
2255		input_flags = WORD_DATA;
2256#endif
2257		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2258		src = TMP_REG1;
2259		srcw = 0;
2260	}
2261
2262	switch (type) {
2263	case SLJIT_C_EQUAL:
2264		GET_CR_BIT(2, reg);
2265		break;
2266
2267	case SLJIT_C_NOT_EQUAL:
2268		GET_CR_BIT(2, reg);
2269		INVERT_BIT(reg);
2270		break;
2271
2272	case SLJIT_C_LESS:
2273	case SLJIT_C_FLOAT_LESS:
2274		GET_CR_BIT(4 + 0, reg);
2275		break;
2276
2277	case SLJIT_C_GREATER_EQUAL:
2278	case SLJIT_C_FLOAT_GREATER_EQUAL:
2279		GET_CR_BIT(4 + 0, reg);
2280		INVERT_BIT(reg);
2281		break;
2282
2283	case SLJIT_C_GREATER:
2284	case SLJIT_C_FLOAT_GREATER:
2285		GET_CR_BIT(4 + 1, reg);
2286		break;
2287
2288	case SLJIT_C_LESS_EQUAL:
2289	case SLJIT_C_FLOAT_LESS_EQUAL:
2290		GET_CR_BIT(4 + 1, reg);
2291		INVERT_BIT(reg);
2292		break;
2293
2294	case SLJIT_C_SIG_LESS:
2295		GET_CR_BIT(0, reg);
2296		break;
2297
2298	case SLJIT_C_SIG_GREATER_EQUAL:
2299		GET_CR_BIT(0, reg);
2300		INVERT_BIT(reg);
2301		break;
2302
2303	case SLJIT_C_SIG_GREATER:
2304		GET_CR_BIT(1, reg);
2305		break;
2306
2307	case SLJIT_C_SIG_LESS_EQUAL:
2308		GET_CR_BIT(1, reg);
2309		INVERT_BIT(reg);
2310		break;
2311
2312	case SLJIT_C_OVERFLOW:
2313	case SLJIT_C_MUL_OVERFLOW:
2314		GET_CR_BIT(3, reg);
2315		break;
2316
2317	case SLJIT_C_NOT_OVERFLOW:
2318	case SLJIT_C_MUL_NOT_OVERFLOW:
2319		GET_CR_BIT(3, reg);
2320		INVERT_BIT(reg);
2321		break;
2322
2323	case SLJIT_C_FLOAT_EQUAL:
2324		GET_CR_BIT(4 + 2, reg);
2325		break;
2326
2327	case SLJIT_C_FLOAT_NOT_EQUAL:
2328		GET_CR_BIT(4 + 2, reg);
2329		INVERT_BIT(reg);
2330		break;
2331
2332	case SLJIT_C_FLOAT_UNORDERED:
2333		GET_CR_BIT(4 + 3, reg);
2334		break;
2335
2336	case SLJIT_C_FLOAT_ORDERED:
2337		GET_CR_BIT(4 + 3, reg);
2338		INVERT_BIT(reg);
2339		break;
2340
2341	default:
2342		SLJIT_ASSERT_STOP();
2343		break;
2344	}
2345
2346	if (op < SLJIT_ADD) {
2347#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2348		if (op == SLJIT_MOV)
2349			input_flags = WORD_DATA;
2350		else {
2351			op = SLJIT_MOV_UI;
2352			input_flags = INT_DATA;
2353		}
2354#else
2355		op = SLJIT_MOV;
2356		input_flags = WORD_DATA;
2357#endif
2358		if (reg != TMP_REG2)
2359			return SLJIT_SUCCESS;
2360		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2361	}
2362
2363#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2364	compiler->skip_checks = 1;
2365#endif
2366	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2367}
2368
2369SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2370{
2371	struct sljit_const *const_;
2372	sljit_si reg;
2373
2374	CHECK_ERROR_PTR();
2375	check_sljit_emit_const(compiler, dst, dstw, init_value);
2376	ADJUST_LOCAL_OFFSET(dst, dstw);
2377
2378	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2379	PTR_FAIL_IF(!const_);
2380	set_const(const_, compiler);
2381
2382	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2383
2384	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2385
2386	if (dst & SLJIT_MEM)
2387		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2388	return const_;
2389}
2390