1/*
2 *    Stack-less Just-In-Time compiler
3 *
4 *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 *   1. Redistributions of source code must retain the above copyright notice, this list of
10 *      conditions and the following disclaimer.
11 *
12 *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13 *      of conditions and the following disclaimer in the documentation and/or other materials
14 *      provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28{
29	return "PowerPC" SLJIT_CPUINFO;
30}
31
32/* Length of an instruction word.
33   Both for ppc-32 and ppc-64. */
34typedef sljit_u32 sljit_ins;
35
36#if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38#define SLJIT_PPC_STACK_FRAME_V2 1
39#endif
40
41#ifdef _AIX
42#include <sys/cache.h>
43#endif
44
45#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46#define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47#endif
48
49#if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
50
51static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
52{
53#ifdef _AIX
54	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
55#elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
56#	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
57	/* Cache flush for POWER architecture. */
58	while (from < to) {
59		__asm__ volatile (
60			"clf 0, %0\n"
61			"dcs\n"
62			: : "r"(from)
63		);
64		from++;
65	}
66	__asm__ volatile ( "ics" );
67#	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
68#	error "Cache flush is not implemented for PowerPC/POWER common mode."
69#	else
70	/* Cache flush for PowerPC architecture. */
71	while (from < to) {
72		__asm__ volatile (
73			"dcbf 0, %0\n"
74			"sync\n"
75			"icbi 0, %0\n"
76			: : "r"(from)
77		);
78		from++;
79	}
80	__asm__ volatile ( "isync" );
81#	endif
82#	ifdef __xlc__
83#	warning "This file may fail to compile if -qfuncsect is used"
84#	endif
85#elif defined(__xlc__)
86#error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
87#else
88#error "This platform requires a cache flush implementation."
89#endif /* _AIX */
90}
91
92#endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
93
94#define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
95#define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
96#define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
97#define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
98
99#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
100#define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
101#else
102#define TMP_CALL_REG	TMP_REG2
103#endif
104
105#define TMP_FREG1	(0)
106#define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
107
108static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
109	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
110};
111
112/* --------------------------------------------------------------------- */
113/*  Instrucion forms                                                     */
114/* --------------------------------------------------------------------- */
115#define D(d)		(reg_map[d] << 21)
116#define S(s)		(reg_map[s] << 21)
117#define A(a)		(reg_map[a] << 16)
118#define B(b)		(reg_map[b] << 11)
119#define C(c)		(reg_map[c] << 6)
120#define FD(fd)		((fd) << 21)
121#define FS(fs)		((fs) << 21)
122#define FA(fa)		((fa) << 16)
123#define FB(fb)		((fb) << 11)
124#define FC(fc)		((fc) << 6)
125#define IMM(imm)	((imm) & 0xffff)
126#define CRD(d)		((d) << 21)
127
128/* Instruction bit sections.
129   OE and Rc flag (see ALT_SET_FLAGS). */
130#define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
131/* Rc flag (see ALT_SET_FLAGS). */
132#define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
133#define HI(opcode)	((opcode) << 26)
134#define LO(opcode)	((opcode) << 1)
135
136#define ADD		(HI(31) | LO(266))
137#define ADDC		(HI(31) | LO(10))
138#define ADDE		(HI(31) | LO(138))
139#define ADDI		(HI(14))
140#define ADDIC		(HI(13))
141#define ADDIS		(HI(15))
142#define ADDME		(HI(31) | LO(234))
143#define AND		(HI(31) | LO(28))
144#define ANDI		(HI(28))
145#define ANDIS		(HI(29))
146#define Bx		(HI(18))
147#define BCx		(HI(16))
148#define BCCTR		(HI(19) | LO(528) | (3 << 11))
149#define BLR		(HI(19) | LO(16) | (0x14 << 21))
150#define CNTLZD		(HI(31) | LO(58))
151#define CNTLZW		(HI(31) | LO(26))
152#define CMP		(HI(31) | LO(0))
153#define CMPI		(HI(11))
154#define CMPL		(HI(31) | LO(32))
155#define CMPLI		(HI(10))
156#define CROR		(HI(19) | LO(449))
157#define DIVD		(HI(31) | LO(489))
158#define DIVDU		(HI(31) | LO(457))
159#define DIVW		(HI(31) | LO(491))
160#define DIVWU		(HI(31) | LO(459))
161#define EXTSB		(HI(31) | LO(954))
162#define EXTSH		(HI(31) | LO(922))
163#define EXTSW		(HI(31) | LO(986))
164#define FABS		(HI(63) | LO(264))
165#define FADD		(HI(63) | LO(21))
166#define FADDS		(HI(59) | LO(21))
167#define FCFID		(HI(63) | LO(846))
168#define FCMPU		(HI(63) | LO(0))
169#define FCTIDZ		(HI(63) | LO(815))
170#define FCTIWZ		(HI(63) | LO(15))
171#define FDIV		(HI(63) | LO(18))
172#define FDIVS		(HI(59) | LO(18))
173#define FMR		(HI(63) | LO(72))
174#define FMUL		(HI(63) | LO(25))
175#define FMULS		(HI(59) | LO(25))
176#define FNEG		(HI(63) | LO(40))
177#define FRSP		(HI(63) | LO(12))
178#define FSUB		(HI(63) | LO(20))
179#define FSUBS		(HI(59) | LO(20))
180#define LD		(HI(58) | 0)
181#define LWZ		(HI(32))
182#define MFCR		(HI(31) | LO(19))
183#define MFLR		(HI(31) | LO(339) | 0x80000)
184#define MFXER		(HI(31) | LO(339) | 0x10000)
185#define MTCTR		(HI(31) | LO(467) | 0x90000)
186#define MTLR		(HI(31) | LO(467) | 0x80000)
187#define MTXER		(HI(31) | LO(467) | 0x10000)
188#define MULHD		(HI(31) | LO(73))
189#define MULHDU		(HI(31) | LO(9))
190#define MULHW		(HI(31) | LO(75))
191#define MULHWU		(HI(31) | LO(11))
192#define MULLD		(HI(31) | LO(233))
193#define MULLI		(HI(7))
194#define MULLW		(HI(31) | LO(235))
195#define NEG		(HI(31) | LO(104))
196#define NOP		(HI(24))
197#define NOR		(HI(31) | LO(124))
198#define OR		(HI(31) | LO(444))
199#define ORI		(HI(24))
200#define ORIS		(HI(25))
201#define RLDICL		(HI(30))
202#define RLWINM		(HI(21))
203#define SLD		(HI(31) | LO(27))
204#define SLW		(HI(31) | LO(24))
205#define SRAD		(HI(31) | LO(794))
206#define SRADI		(HI(31) | LO(413 << 1))
207#define SRAW		(HI(31) | LO(792))
208#define SRAWI		(HI(31) | LO(824))
209#define SRD		(HI(31) | LO(539))
210#define SRW		(HI(31) | LO(536))
211#define STD		(HI(62) | 0)
212#define STDU		(HI(62) | 1)
213#define STDUX		(HI(31) | LO(181))
214#define STFIWX		(HI(31) | LO(983))
215#define STW		(HI(36))
216#define STWU		(HI(37))
217#define STWUX		(HI(31) | LO(183))
218#define SUBF		(HI(31) | LO(40))
219#define SUBFC		(HI(31) | LO(8))
220#define SUBFE		(HI(31) | LO(136))
221#define SUBFIC		(HI(8))
222#define XOR		(HI(31) | LO(316))
223#define XORI		(HI(26))
224#define XORIS		(HI(27))
225
226#define SIMM_MAX	(0x7fff)
227#define SIMM_MIN	(-0x8000)
228#define UIMM_MAX	(0xffff)
229
230#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
231SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
232{
233	sljit_sw* ptrs;
234	if (func_ptr)
235		*func_ptr = (void*)context;
236	ptrs = (sljit_sw*)func;
237	context->addr = addr ? addr : ptrs[0];
238	context->r2 = ptrs[1];
239	context->r11 = ptrs[2];
240}
241#endif
242
243static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
244{
245	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
246	FAIL_IF(!ptr);
247	*ptr = ins;
248	compiler->size++;
249	return SLJIT_SUCCESS;
250}
251
252static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
253{
254	sljit_sw diff;
255	sljit_uw target_addr;
256	sljit_sw extra_jump_flags;
257
258#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
259	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
260		return 0;
261#else
262	if (jump->flags & SLJIT_REWRITABLE_JUMP)
263		return 0;
264#endif
265
266	if (jump->flags & JUMP_ADDR)
267		target_addr = jump->u.target;
268	else {
269		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
270		target_addr = (sljit_uw)(code + jump->u.label->size);
271	}
272
273#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
274	if (jump->flags & IS_CALL)
275		goto keep_address;
276#endif
277
278	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
279
280	extra_jump_flags = 0;
281	if (jump->flags & IS_COND) {
282		if (diff <= 0x7fff && diff >= -0x8000) {
283			jump->flags |= PATCH_B;
284			return 1;
285		}
286		if (target_addr <= 0xffff) {
287			jump->flags |= PATCH_B | PATCH_ABS_B;
288			return 1;
289		}
290		extra_jump_flags = REMOVE_COND;
291
292		diff -= sizeof(sljit_ins);
293	}
294
295	if (diff <= 0x01ffffff && diff >= -0x02000000) {
296		jump->flags |= PATCH_B | extra_jump_flags;
297		return 1;
298	}
299	if (target_addr <= 0x03ffffff) {
300		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
301		return 1;
302	}
303
304#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
305#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
306keep_address:
307#endif
308	if (target_addr <= 0x7fffffff) {
309		jump->flags |= PATCH_ABS32;
310		return 1;
311	}
312	if (target_addr <= 0x7fffffffffffl) {
313		jump->flags |= PATCH_ABS48;
314		return 1;
315	}
316#endif
317
318	return 0;
319}
320
321SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
322{
323	struct sljit_memory_fragment *buf;
324	sljit_ins *code;
325	sljit_ins *code_ptr;
326	sljit_ins *buf_ptr;
327	sljit_ins *buf_end;
328	sljit_uw word_count;
329	sljit_uw addr;
330
331	struct sljit_label *label;
332	struct sljit_jump *jump;
333	struct sljit_const *const_;
334
335	CHECK_ERROR_PTR();
336	CHECK_PTR(check_sljit_generate_code(compiler));
337	reverse_buf(compiler);
338
339#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
340#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
341	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
342#else
343	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
344#endif
345#endif
346	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
347	PTR_FAIL_WITH_EXEC_IF(code);
348	buf = compiler->buf;
349
350	code_ptr = code;
351	word_count = 0;
352	label = compiler->labels;
353	jump = compiler->jumps;
354	const_ = compiler->consts;
355	do {
356		buf_ptr = (sljit_ins*)buf->memory;
357		buf_end = buf_ptr + (buf->used_size >> 2);
358		do {
359			*code_ptr = *buf_ptr++;
360			SLJIT_ASSERT(!label || label->size >= word_count);
361			SLJIT_ASSERT(!jump || jump->addr >= word_count);
362			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
363			/* These structures are ordered by their address. */
364			if (label && label->size == word_count) {
365				/* Just recording the address. */
366				label->addr = (sljit_uw)code_ptr;
367				label->size = code_ptr - code;
368				label = label->next;
369			}
370			if (jump && jump->addr == word_count) {
371#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
372				jump->addr = (sljit_uw)(code_ptr - 3);
373#else
374				jump->addr = (sljit_uw)(code_ptr - 6);
375#endif
376				if (detect_jump_type(jump, code_ptr, code)) {
377#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
378					code_ptr[-3] = code_ptr[0];
379					code_ptr -= 3;
380#else
381					if (jump->flags & PATCH_ABS32) {
382						code_ptr -= 3;
383						code_ptr[-1] = code_ptr[2];
384						code_ptr[0] = code_ptr[3];
385					}
386					else if (jump->flags & PATCH_ABS48) {
387						code_ptr--;
388						code_ptr[-1] = code_ptr[0];
389						code_ptr[0] = code_ptr[1];
390						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
391						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
392						code_ptr[-3] ^= 0x8422;
393						/* oris -> ori */
394						code_ptr[-2] ^= 0x4000000;
395					}
396					else {
397						code_ptr[-6] = code_ptr[0];
398						code_ptr -= 6;
399					}
400#endif
401					if (jump->flags & REMOVE_COND) {
402						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
403						code_ptr++;
404						jump->addr += sizeof(sljit_ins);
405						code_ptr[0] = Bx;
406						jump->flags -= IS_COND;
407					}
408				}
409				jump = jump->next;
410			}
411			if (const_ && const_->addr == word_count) {
412				const_->addr = (sljit_uw)code_ptr;
413				const_ = const_->next;
414			}
415			code_ptr ++;
416			word_count ++;
417		} while (buf_ptr < buf_end);
418
419		buf = buf->next;
420	} while (buf);
421
422	if (label && label->size == word_count) {
423		label->addr = (sljit_uw)code_ptr;
424		label->size = code_ptr - code;
425		label = label->next;
426	}
427
428	SLJIT_ASSERT(!label);
429	SLJIT_ASSERT(!jump);
430	SLJIT_ASSERT(!const_);
431#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
432	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
433#else
434	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
435#endif
436
437	jump = compiler->jumps;
438	while (jump) {
439		do {
440			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
441			buf_ptr = (sljit_ins*)jump->addr;
442			if (jump->flags & PATCH_B) {
443				if (jump->flags & IS_COND) {
444					if (!(jump->flags & PATCH_ABS_B)) {
445						addr = addr - jump->addr;
446						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
447						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
448					}
449					else {
450						SLJIT_ASSERT(addr <= 0xffff);
451						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
452					}
453				}
454				else {
455					if (!(jump->flags & PATCH_ABS_B)) {
456						addr = addr - jump->addr;
457						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
458						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
459					}
460					else {
461						SLJIT_ASSERT(addr <= 0x03ffffff);
462						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
463					}
464				}
465				break;
466			}
467			/* Set the fields of immediate loads. */
468#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
469			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
470			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
471#else
472			if (jump->flags & PATCH_ABS32) {
473				SLJIT_ASSERT(addr <= 0x7fffffff);
474				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
475				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
476				break;
477			}
478			if (jump->flags & PATCH_ABS48) {
479				SLJIT_ASSERT(addr <= 0x7fffffffffff);
480				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
481				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
482				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
483				break;
484			}
485			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
486			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
487			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
488			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
489#endif
490		} while (0);
491		jump = jump->next;
492	}
493
494	compiler->error = SLJIT_ERR_COMPILED;
495	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
496	SLJIT_CACHE_FLUSH(code, code_ptr);
497
498#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
499#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
500	if (((sljit_sw)code_ptr) & 0x4)
501		code_ptr++;
502	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
503	return code_ptr;
504#else
505	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
506	return code_ptr;
507#endif
508#else
509	return code;
510#endif
511}
512
513/* --------------------------------------------------------------------- */
514/*  Entry, exit                                                          */
515/* --------------------------------------------------------------------- */
516
517/* inp_flags: */
518
519/* Creates an index in data_transfer_insts array. */
520#define LOAD_DATA	0x01
521#define INDEXED		0x02
522#define WRITE_BACK	0x04
523#define WORD_DATA	0x00
524#define BYTE_DATA	0x08
525#define HALF_DATA	0x10
526#define INT_DATA	0x18
527#define SIGNED_DATA	0x20
528/* Separates integer and floating point registers */
529#define GPR_REG		0x3f
530#define DOUBLE_DATA	0x40
531
532#define MEM_MASK	0x7f
533
534/* Other inp_flags. */
535
536#define ARG_TEST	0x000100
537/* Integer opertion and set flags -> requires exts on 64 bit systems. */
538#define ALT_SIGN_EXT	0x000200
539/* This flag affects the RC() and OERC() macros. */
540#define ALT_SET_FLAGS	0x000400
541#define ALT_KEEP_CACHE	0x000800
542#define ALT_FORM1	0x010000
543#define ALT_FORM2	0x020000
544#define ALT_FORM3	0x040000
545#define ALT_FORM4	0x080000
546#define ALT_FORM5	0x100000
547#define ALT_FORM6	0x200000
548
549/* Source and destination is register. */
550#define REG_DEST	0x000001
551#define REG1_SOURCE	0x000002
552#define REG2_SOURCE	0x000004
553/* getput_arg_fast returned true. */
554#define FAST_DEST	0x000008
555/* Multiple instructions are required. */
556#define SLOW_DEST	0x000010
557/*
558ALT_SIGN_EXT		0x000200
559ALT_SET_FLAGS		0x000400
560ALT_FORM1		0x010000
561...
562ALT_FORM6		0x200000 */
563
564#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
565#include "sljitNativePPC_32.c"
566#else
567#include "sljitNativePPC_64.c"
568#endif
569
570#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
571#define STACK_STORE	STW
572#define STACK_LOAD	LWZ
573#else
574#define STACK_STORE	STD
575#define STACK_LOAD	LD
576#endif
577
578SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
579	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
580	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
581{
582	sljit_s32 i, tmp, offs;
583
584	CHECK_ERROR();
585	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
586	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
587
588	FAIL_IF(push_inst(compiler, MFLR | D(0)));
589	offs = -(sljit_s32)(sizeof(sljit_sw));
590	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
591
592	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
593	for (i = SLJIT_S0; i >= tmp; i--) {
594		offs -= (sljit_s32)(sizeof(sljit_sw));
595		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
596	}
597
598	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
599		offs -= (sljit_s32)(sizeof(sljit_sw));
600		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
601	}
602
603	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
604
605#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
606	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
607#else
608	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
609#endif
610
611	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
612	if (args >= 1)
613		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
614	if (args >= 2)
615		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
616	if (args >= 3)
617		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
618
619	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
620	local_size = (local_size + 15) & ~0xf;
621	compiler->local_size = local_size;
622
623#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
624	if (local_size <= SIMM_MAX)
625		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
626	else {
627		FAIL_IF(load_immediate(compiler, 0, -local_size));
628		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
629	}
630#else
631	if (local_size <= SIMM_MAX)
632		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
633	else {
634		FAIL_IF(load_immediate(compiler, 0, -local_size));
635		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
636	}
637#endif
638
639	return SLJIT_SUCCESS;
640}
641
642SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
643	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
644	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
645{
646	CHECK_ERROR();
647	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
648	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
649
650	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
651	compiler->local_size = (local_size + 15) & ~0xf;
652	return SLJIT_SUCCESS;
653}
654
655SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
656{
657	sljit_s32 i, tmp, offs;
658
659	CHECK_ERROR();
660	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
661
662	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
663
664	if (compiler->local_size <= SIMM_MAX)
665		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
666	else {
667		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
668		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
669	}
670
671#if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
672	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
673#else
674	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
675#endif
676
677	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
678
679	tmp = compiler->scratches;
680	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
681		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
682		offs += (sljit_s32)(sizeof(sljit_sw));
683	}
684
685	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
686	for (i = tmp; i <= SLJIT_S0; i++) {
687		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
688		offs += (sljit_s32)(sizeof(sljit_sw));
689	}
690
691	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
692	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
693
694	FAIL_IF(push_inst(compiler, MTLR | S(0)));
695	FAIL_IF(push_inst(compiler, BLR));
696
697	return SLJIT_SUCCESS;
698}
699
700#undef STACK_STORE
701#undef STACK_LOAD
702
703/* --------------------------------------------------------------------- */
704/*  Operators                                                            */
705/* --------------------------------------------------------------------- */
706
707/* i/x - immediate/indexed form
708   n/w - no write-back / write-back (1 bit)
709   s/l - store/load (1 bit)
710   u/s - signed/unsigned (1 bit)
711   w/b/h/i - word/byte/half/int allowed (2 bit)
712   It contans 32 items, but not all are different. */
713
714/* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
715#define INT_ALIGNED	0x10000
716/* 64-bit only: there is no lwau instruction. */
717#define UPDATE_REQ	0x20000
718
719#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
720#define ARCH_32_64(a, b)	a
721#define INST_CODE_AND_DST(inst, flags, reg) \
722	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
723#else
724#define ARCH_32_64(a, b)	b
725#define INST_CODE_AND_DST(inst, flags, reg) \
726	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
727#endif
728
729static const sljit_ins data_transfer_insts[64 + 8] = {
730
731/* -------- Unsigned -------- */
732
733/* Word. */
734
735/* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
736/* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
737/* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
738/* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
739
740/* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
741/* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
742/* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
743/* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
744
745/* Byte. */
746
747/* u b n i s */ HI(38) /* stb */,
748/* u b n i l */ HI(34) /* lbz */,
749/* u b n x s */ HI(31) | LO(215) /* stbx */,
750/* u b n x l */ HI(31) | LO(87) /* lbzx */,
751
752/* u b w i s */ HI(39) /* stbu */,
753/* u b w i l */ HI(35) /* lbzu */,
754/* u b w x s */ HI(31) | LO(247) /* stbux */,
755/* u b w x l */ HI(31) | LO(119) /* lbzux */,
756
757/* Half. */
758
759/* u h n i s */ HI(44) /* sth */,
760/* u h n i l */ HI(40) /* lhz */,
761/* u h n x s */ HI(31) | LO(407) /* sthx */,
762/* u h n x l */ HI(31) | LO(279) /* lhzx */,
763
764/* u h w i s */ HI(45) /* sthu */,
765/* u h w i l */ HI(41) /* lhzu */,
766/* u h w x s */ HI(31) | LO(439) /* sthux */,
767/* u h w x l */ HI(31) | LO(311) /* lhzux */,
768
769/* Int. */
770
771/* u i n i s */ HI(36) /* stw */,
772/* u i n i l */ HI(32) /* lwz */,
773/* u i n x s */ HI(31) | LO(151) /* stwx */,
774/* u i n x l */ HI(31) | LO(23) /* lwzx */,
775
776/* u i w i s */ HI(37) /* stwu */,
777/* u i w i l */ HI(33) /* lwzu */,
778/* u i w x s */ HI(31) | LO(183) /* stwux */,
779/* u i w x l */ HI(31) | LO(55) /* lwzux */,
780
781/* -------- Signed -------- */
782
783/* Word. */
784
785/* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
786/* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
787/* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
788/* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
789
790/* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
791/* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
792/* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
793/* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
794
795/* Byte. */
796
797/* s b n i s */ HI(38) /* stb */,
798/* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
799/* s b n x s */ HI(31) | LO(215) /* stbx */,
800/* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
801
802/* s b w i s */ HI(39) /* stbu */,
803/* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
804/* s b w x s */ HI(31) | LO(247) /* stbux */,
805/* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
806
807/* Half. */
808
809/* s h n i s */ HI(44) /* sth */,
810/* s h n i l */ HI(42) /* lha */,
811/* s h n x s */ HI(31) | LO(407) /* sthx */,
812/* s h n x l */ HI(31) | LO(343) /* lhax */,
813
814/* s h w i s */ HI(45) /* sthu */,
815/* s h w i l */ HI(43) /* lhau */,
816/* s h w x s */ HI(31) | LO(439) /* sthux */,
817/* s h w x l */ HI(31) | LO(375) /* lhaux */,
818
819/* Int. */
820
821/* s i n i s */ HI(36) /* stw */,
822/* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
823/* s i n x s */ HI(31) | LO(151) /* stwx */,
824/* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
825
826/* s i w i s */ HI(37) /* stwu */,
827/* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
828/* s i w x s */ HI(31) | LO(183) /* stwux */,
829/* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
830
831/* -------- Double -------- */
832
833/* d   n i s */ HI(54) /* stfd */,
834/* d   n i l */ HI(50) /* lfd */,
835/* d   n x s */ HI(31) | LO(727) /* stfdx */,
836/* d   n x l */ HI(31) | LO(599) /* lfdx */,
837
838/* s   n i s */ HI(52) /* stfs */,
839/* s   n i l */ HI(48) /* lfs */,
840/* s   n x s */ HI(31) | LO(663) /* stfsx */,
841/* s   n x l */ HI(31) | LO(535) /* lfsx */,
842
843};
844
845#undef ARCH_32_64
846
847/* Simple cases, (no caching is required). */
848static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
849{
850	sljit_ins inst;
851
852	/* Should work when (arg & REG_MASK) == 0. */
853	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
854	SLJIT_ASSERT(arg & SLJIT_MEM);
855
856	if (arg & OFFS_REG_MASK) {
857		if (argw & 0x3)
858			return 0;
859		if (inp_flags & ARG_TEST)
860			return 1;
861
862		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
863		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
864		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
865		return -1;
866	}
867
868	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
869		inp_flags &= ~WRITE_BACK;
870
871#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
872	inst = data_transfer_insts[inp_flags & MEM_MASK];
873	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
874
875	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
876		return 0;
877	if (inp_flags & ARG_TEST)
878		return 1;
879#endif
880
881#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
882	if (argw > SIMM_MAX || argw < SIMM_MIN)
883		return 0;
884	if (inp_flags & ARG_TEST)
885		return 1;
886
887	inst = data_transfer_insts[inp_flags & MEM_MASK];
888	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
889#endif
890
891	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
892	return -1;
893}
894
895/* See getput_arg below.
896   Note: can_cache is called only for binary operators. Those operator always
897   uses word arguments without write back. */
898static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
899{
900	sljit_sw high_short, next_high_short;
901#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
902	sljit_sw diff;
903#endif
904
905	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
906
907	if (arg & OFFS_REG_MASK)
908		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
909
910	if (next_arg & OFFS_REG_MASK)
911		return 0;
912
913#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
914	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
915	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
916	return high_short == next_high_short;
917#else
918	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
919		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
920		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
921		if (high_short == next_high_short)
922			return 1;
923	}
924
925	diff = argw - next_argw;
926	if (!(arg & REG_MASK))
927		return diff <= SIMM_MAX && diff >= SIMM_MIN;
928
929	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
930		return 1;
931
932	return 0;
933#endif
934}
935
936#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
937#define ADJUST_CACHED_IMM(imm) \
938	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
939		/* Adjust cached value. Fortunately this is really a rare case */ \
940		compiler->cache_argw += imm & 0x3; \
941		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
942		imm &= ~0x3; \
943	}
944#endif
945
946/* Emit the necessary instructions. See can_cache above. */
947static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
948{
949	sljit_s32 tmp_r;
950	sljit_ins inst;
951	sljit_sw high_short, next_high_short;
952#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
953	sljit_sw diff;
954#endif
955
956	SLJIT_ASSERT(arg & SLJIT_MEM);
957
958	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
959	/* Special case for "mov reg, [reg, ... ]". */
960	if ((arg & REG_MASK) == tmp_r)
961		tmp_r = TMP_REG1;
962
963	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
964		argw &= 0x3;
965		/* Otherwise getput_arg_fast would capture it. */
966		SLJIT_ASSERT(argw);
967
968		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
969			tmp_r = TMP_REG3;
970		else {
971			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
972				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
973				compiler->cache_argw = argw;
974				tmp_r = TMP_REG3;
975			}
976#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
977			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
978#else
979			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
980#endif
981		}
982		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
983		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
984		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
985	}
986
987	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
988		inp_flags &= ~WRITE_BACK;
989
990	inst = data_transfer_insts[inp_flags & MEM_MASK];
991	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
992
993#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
994	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
995			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
996#endif
997
998		arg &= REG_MASK;
999		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
1000		/* The getput_arg_fast should handle this otherwise. */
1001#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1002		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
1003#else
1004		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
1005#endif
1006
1007		if (inp_flags & WRITE_BACK) {
1008			if (arg == reg) {
1009				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
1010				reg = tmp_r;
1011			}
1012			tmp_r = arg;
1013			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1014		}
1015		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
1016			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1017				next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1018				if (high_short == next_high_short) {
1019					compiler->cache_arg = SLJIT_MEM | arg;
1020					compiler->cache_argw = high_short;
1021					tmp_r = TMP_REG3;
1022				}
1023			}
1024			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1025		}
1026		else
1027			tmp_r = TMP_REG3;
1028
1029		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1030
1031#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1032	}
1033
1034	/* Everything else is PPC-64 only. */
1035	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1036		diff = argw - compiler->cache_argw;
1037		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1038			ADJUST_CACHED_IMM(diff);
1039			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1040		}
1041
1042		diff = argw - next_argw;
1043		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1044			SLJIT_ASSERT(inp_flags & LOAD_DATA);
1045
1046			compiler->cache_arg = SLJIT_IMM;
1047			compiler->cache_argw = argw;
1048			tmp_r = TMP_REG3;
1049		}
1050
1051		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1052		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1053	}
1054
1055	diff = argw - compiler->cache_argw;
1056	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1057		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1058		ADJUST_CACHED_IMM(diff);
1059		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1060	}
1061
1062	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1063		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1064		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1065		if (compiler->cache_argw != argw) {
1066			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1067			compiler->cache_argw = argw;
1068		}
1069		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1070	}
1071
1072	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1073		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1074		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1075
1076		compiler->cache_arg = SLJIT_IMM;
1077		compiler->cache_argw = argw;
1078
1079		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1080		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1081		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1082	}
1083
1084	diff = argw - next_argw;
1085	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1086		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1087		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1088		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1089
1090		compiler->cache_arg = arg;
1091		compiler->cache_argw = argw;
1092
1093		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1094	}
1095
1096	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1097		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1098		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1099
1100		compiler->cache_arg = SLJIT_IMM;
1101		compiler->cache_argw = argw;
1102		tmp_r = TMP_REG3;
1103	}
1104	else
1105		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1106
1107	/* Get the indexed version instead of the normal one. */
1108	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1109	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1110	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1111#endif
1112}
1113
1114static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1115{
1116	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1117		return compiler->error;
1118	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1119}
1120
1121static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
1122	sljit_s32 dst, sljit_sw dstw,
1123	sljit_s32 src1, sljit_sw src1w,
1124	sljit_s32 src2, sljit_sw src2w)
1125{
1126	/* arg1 goes to TMP_REG1 or src reg
1127	   arg2 goes to TMP_REG2, imm or src reg
1128	   TMP_REG3 can be used for caching
1129	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1130	sljit_s32 dst_r;
1131	sljit_s32 src1_r;
1132	sljit_s32 src2_r;
1133	sljit_s32 sugg_src2_r = TMP_REG2;
1134	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1135
1136	if (!(input_flags & ALT_KEEP_CACHE)) {
1137		compiler->cache_arg = 0;
1138		compiler->cache_argw = 0;
1139	}
1140
1141	/* Destination check. */
1142	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1143		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
1144			return SLJIT_SUCCESS;
1145		dst_r = TMP_REG2;
1146	}
1147	else if (FAST_IS_REG(dst)) {
1148		dst_r = dst;
1149		flags |= REG_DEST;
1150		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1151			sugg_src2_r = dst_r;
1152	}
1153	else {
1154		SLJIT_ASSERT(dst & SLJIT_MEM);
1155		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1156			flags |= FAST_DEST;
1157			dst_r = TMP_REG2;
1158		}
1159		else {
1160			flags |= SLOW_DEST;
1161			dst_r = 0;
1162		}
1163	}
1164
1165	/* Source 1. */
1166	if (FAST_IS_REG(src1)) {
1167		src1_r = src1;
1168		flags |= REG1_SOURCE;
1169	}
1170	else if (src1 & SLJIT_IMM) {
1171		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1172		src1_r = TMP_REG1;
1173	}
1174	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1175		FAIL_IF(compiler->error);
1176		src1_r = TMP_REG1;
1177	}
1178	else
1179		src1_r = 0;
1180
1181	/* Source 2. */
1182	if (FAST_IS_REG(src2)) {
1183		src2_r = src2;
1184		flags |= REG2_SOURCE;
1185		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1186			dst_r = src2_r;
1187	}
1188	else if (src2 & SLJIT_IMM) {
1189		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1190		src2_r = sugg_src2_r;
1191	}
1192	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1193		FAIL_IF(compiler->error);
1194		src2_r = sugg_src2_r;
1195	}
1196	else
1197		src2_r = 0;
1198
1199	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
1200	   All arguments are complex addressing modes, and it is a binary operator. */
1201	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1202		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1203			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1204			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1205		}
1206		else {
1207			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1208			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1209		}
1210		src1_r = TMP_REG1;
1211		src2_r = TMP_REG2;
1212	}
1213	else if (src1_r == 0 && src2_r == 0) {
1214		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1215		src1_r = TMP_REG1;
1216	}
1217	else if (src1_r == 0 && dst_r == 0) {
1218		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1219		src1_r = TMP_REG1;
1220	}
1221	else if (src2_r == 0 && dst_r == 0) {
1222		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1223		src2_r = sugg_src2_r;
1224	}
1225
1226	if (dst_r == 0)
1227		dst_r = TMP_REG2;
1228
1229	if (src1_r == 0) {
1230		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1231		src1_r = TMP_REG1;
1232	}
1233
1234	if (src2_r == 0) {
1235		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1236		src2_r = sugg_src2_r;
1237	}
1238
1239	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1240
1241	if (flags & (FAST_DEST | SLOW_DEST)) {
1242		if (flags & FAST_DEST)
1243			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1244		else
1245			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1246	}
1247	return SLJIT_SUCCESS;
1248}
1249
1250SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1251{
1252#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1253	sljit_s32 int_op = op & SLJIT_I32_OP;
1254#endif
1255
1256	CHECK_ERROR();
1257	CHECK(check_sljit_emit_op0(compiler, op));
1258
1259	op = GET_OPCODE(op);
1260	switch (op) {
1261	case SLJIT_BREAKPOINT:
1262	case SLJIT_NOP:
1263		return push_inst(compiler, NOP);
1264	case SLJIT_LMUL_UW:
1265	case SLJIT_LMUL_SW:
1266		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1267#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1268		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1269		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1270#else
1271		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1272		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1273#endif
1274	case SLJIT_DIVMOD_UW:
1275	case SLJIT_DIVMOD_SW:
1276		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1277#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1278		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1279		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1280#else
1281		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1282		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1283#endif
1284		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1285	case SLJIT_DIV_UW:
1286	case SLJIT_DIV_SW:
1287#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1288		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1289#else
1290		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1291#endif
1292	}
1293
1294	return SLJIT_SUCCESS;
1295}
1296
1297#define EMIT_MOV(type, type_flags, type_cast) \
1298	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1299
1300SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1301	sljit_s32 dst, sljit_sw dstw,
1302	sljit_s32 src, sljit_sw srcw)
1303{
1304	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1305	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1306
1307	CHECK_ERROR();
1308	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1309	ADJUST_LOCAL_OFFSET(dst, dstw);
1310	ADJUST_LOCAL_OFFSET(src, srcw);
1311
1312	op = GET_OPCODE(op);
1313	if ((src & SLJIT_IMM) && srcw == 0)
1314		src = TMP_ZERO;
1315
1316	if (op_flags & SLJIT_SET_O)
1317		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1318
1319	if (op_flags & SLJIT_I32_OP) {
1320		if (op < SLJIT_NOT) {
1321			if (FAST_IS_REG(src) && src == dst) {
1322				if (!TYPE_CAST_NEEDED(op))
1323					return SLJIT_SUCCESS;
1324			}
1325#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1326			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1327				op = SLJIT_MOV_U32;
1328			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1329				op = SLJIT_MOVU_U32;
1330			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1331				op = SLJIT_MOV_S32;
1332			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1333				op = SLJIT_MOVU_S32;
1334#endif
1335		}
1336#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1337		else {
1338			/* Most operations expect sign extended arguments. */
1339			flags |= INT_DATA | SIGNED_DATA;
1340			if (src & SLJIT_IMM)
1341				srcw = (sljit_s32)srcw;
1342		}
1343#endif
1344	}
1345
1346	switch (op) {
1347	case SLJIT_MOV:
1348	case SLJIT_MOV_P:
1349#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1350	case SLJIT_MOV_U32:
1351	case SLJIT_MOV_S32:
1352#endif
1353		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1354
1355#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1356	case SLJIT_MOV_U32:
1357		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
1358
1359	case SLJIT_MOV_S32:
1360		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
1361#endif
1362
1363	case SLJIT_MOV_U8:
1364		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
1365
1366	case SLJIT_MOV_S8:
1367		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
1368
1369	case SLJIT_MOV_U16:
1370		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
1371
1372	case SLJIT_MOV_S16:
1373		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
1374
1375	case SLJIT_MOVU:
1376	case SLJIT_MOVU_P:
1377#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1378	case SLJIT_MOVU_U32:
1379	case SLJIT_MOVU_S32:
1380#endif
1381		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1382
1383#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1384	case SLJIT_MOVU_U32:
1385		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
1386
1387	case SLJIT_MOVU_S32:
1388		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
1389#endif
1390
1391	case SLJIT_MOVU_U8:
1392		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
1393
1394	case SLJIT_MOVU_S8:
1395		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
1396
1397	case SLJIT_MOVU_U16:
1398		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
1399
1400	case SLJIT_MOVU_S16:
1401		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
1402
1403	case SLJIT_NOT:
1404		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1405
1406	case SLJIT_NEG:
1407		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1408
1409	case SLJIT_CLZ:
1410#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1411		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1412#else
1413		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1414#endif
1415	}
1416
1417	return SLJIT_SUCCESS;
1418}
1419
1420#undef EMIT_MOV
1421
1422#define TEST_SL_IMM(src, srcw) \
1423	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1424
1425#define TEST_UL_IMM(src, srcw) \
1426	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1427
1428#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1429#define TEST_SH_IMM(src, srcw) \
1430	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1431#else
1432#define TEST_SH_IMM(src, srcw) \
1433	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1434#endif
1435
1436#define TEST_UH_IMM(src, srcw) \
1437	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1438
1439#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1440#define TEST_ADD_IMM(src, srcw) \
1441	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1442#else
1443#define TEST_ADD_IMM(src, srcw) \
1444	((src) & SLJIT_IMM)
1445#endif
1446
1447#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1448#define TEST_UI_IMM(src, srcw) \
1449	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1450#else
1451#define TEST_UI_IMM(src, srcw) \
1452	((src) & SLJIT_IMM)
1453#endif
1454
1455SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1456	sljit_s32 dst, sljit_sw dstw,
1457	sljit_s32 src1, sljit_sw src1w,
1458	sljit_s32 src2, sljit_sw src2w)
1459{
1460	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
1461
1462	CHECK_ERROR();
1463	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1464	ADJUST_LOCAL_OFFSET(dst, dstw);
1465	ADJUST_LOCAL_OFFSET(src1, src1w);
1466	ADJUST_LOCAL_OFFSET(src2, src2w);
1467
1468	if ((src1 & SLJIT_IMM) && src1w == 0)
1469		src1 = TMP_ZERO;
1470	if ((src2 & SLJIT_IMM) && src2w == 0)
1471		src2 = TMP_ZERO;
1472
1473#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1474	if (op & SLJIT_I32_OP) {
1475		/* Most operations expect sign extended arguments. */
1476		flags |= INT_DATA | SIGNED_DATA;
1477		if (src1 & SLJIT_IMM)
1478			src1w = (sljit_s32)(src1w);
1479		if (src2 & SLJIT_IMM)
1480			src2w = (sljit_s32)(src2w);
1481		if (GET_FLAGS(op))
1482			flags |= ALT_SIGN_EXT;
1483	}
1484#endif
1485	if (op & SLJIT_SET_O)
1486		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1487	if (src2 == TMP_REG2)
1488		flags |= ALT_KEEP_CACHE;
1489
1490	switch (GET_OPCODE(op)) {
1491	case SLJIT_ADD:
1492		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1493			if (TEST_SL_IMM(src2, src2w)) {
1494				compiler->imm = src2w & 0xffff;
1495				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1496			}
1497			if (TEST_SL_IMM(src1, src1w)) {
1498				compiler->imm = src1w & 0xffff;
1499				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1500			}
1501			if (TEST_SH_IMM(src2, src2w)) {
1502				compiler->imm = (src2w >> 16) & 0xffff;
1503				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1504			}
1505			if (TEST_SH_IMM(src1, src1w)) {
1506				compiler->imm = (src1w >> 16) & 0xffff;
1507				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1508			}
1509			/* Range between -1 and -32768 is covered above. */
1510			if (TEST_ADD_IMM(src2, src2w)) {
1511				compiler->imm = src2w & 0xffffffff;
1512				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1513			}
1514			if (TEST_ADD_IMM(src1, src1w)) {
1515				compiler->imm = src1w & 0xffffffff;
1516				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1517			}
1518		}
1519		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
1520			if (TEST_SL_IMM(src2, src2w)) {
1521				compiler->imm = src2w & 0xffff;
1522				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1523			}
1524			if (TEST_SL_IMM(src1, src1w)) {
1525				compiler->imm = src1w & 0xffff;
1526				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1527			}
1528		}
1529		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
1530
1531	case SLJIT_ADDC:
1532		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1533
1534	case SLJIT_SUB:
1535		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1536			if (TEST_SL_IMM(src2, -src2w)) {
1537				compiler->imm = (-src2w) & 0xffff;
1538				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1539			}
1540			if (TEST_SL_IMM(src1, src1w)) {
1541				compiler->imm = src1w & 0xffff;
1542				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1543			}
1544			if (TEST_SH_IMM(src2, -src2w)) {
1545				compiler->imm = ((-src2w) >> 16) & 0xffff;
1546				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1547			}
1548			/* Range between -1 and -32768 is covered above. */
1549			if (TEST_ADD_IMM(src2, -src2w)) {
1550				compiler->imm = -src2w & 0xffffffff;
1551				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1552			}
1553		}
1554		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
1555			if (!(op & SLJIT_SET_U)) {
1556				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1557				if (TEST_SL_IMM(src2, src2w)) {
1558					compiler->imm = src2w & 0xffff;
1559					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1560				}
1561				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
1562					compiler->imm = src1w & 0xffff;
1563					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1564				}
1565			}
1566			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
1567				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1568				if (TEST_UL_IMM(src2, src2w)) {
1569					compiler->imm = src2w & 0xffff;
1570					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1571				}
1572				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1573			}
1574			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
1575				compiler->imm = src2w;
1576				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1577			}
1578			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1579		}
1580		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
1581			if (TEST_SL_IMM(src2, -src2w)) {
1582				compiler->imm = (-src2w) & 0xffff;
1583				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1584			}
1585		}
1586		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1587		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
1588
1589	case SLJIT_SUBC:
1590		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
1591
1592	case SLJIT_MUL:
1593#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1594		if (op & SLJIT_I32_OP)
1595			flags |= ALT_FORM2;
1596#endif
1597		if (!GET_FLAGS(op)) {
1598			if (TEST_SL_IMM(src2, src2w)) {
1599				compiler->imm = src2w & 0xffff;
1600				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1601			}
1602			if (TEST_SL_IMM(src1, src1w)) {
1603				compiler->imm = src1w & 0xffff;
1604				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1605			}
1606		}
1607		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1608
1609	case SLJIT_AND:
1610	case SLJIT_OR:
1611	case SLJIT_XOR:
1612		/* Commutative unsigned operations. */
1613		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1614			if (TEST_UL_IMM(src2, src2w)) {
1615				compiler->imm = src2w;
1616				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1617			}
1618			if (TEST_UL_IMM(src1, src1w)) {
1619				compiler->imm = src1w;
1620				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1621			}
1622			if (TEST_UH_IMM(src2, src2w)) {
1623				compiler->imm = (src2w >> 16) & 0xffff;
1624				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1625			}
1626			if (TEST_UH_IMM(src1, src1w)) {
1627				compiler->imm = (src1w >> 16) & 0xffff;
1628				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1629			}
1630		}
1631		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
1632			if (TEST_UI_IMM(src2, src2w)) {
1633				compiler->imm = src2w;
1634				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1635			}
1636			if (TEST_UI_IMM(src1, src1w)) {
1637				compiler->imm = src1w;
1638				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1639			}
1640		}
1641		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1642
1643	case SLJIT_ASHR:
1644		if (op & SLJIT_KEEP_FLAGS)
1645			flags |= ALT_FORM3;
1646		/* Fall through. */
1647	case SLJIT_SHL:
1648	case SLJIT_LSHR:
1649#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1650		if (op & SLJIT_I32_OP)
1651			flags |= ALT_FORM2;
1652#endif
1653		if (src2 & SLJIT_IMM) {
1654			compiler->imm = src2w;
1655			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1656		}
1657		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1658	}
1659
1660	return SLJIT_SUCCESS;
1661}
1662
1663SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1664{
1665	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1666	return reg_map[reg];
1667}
1668
1669SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1670{
1671	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1672	return reg;
1673}
1674
1675SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1676	void *instruction, sljit_s32 size)
1677{
1678	CHECK_ERROR();
1679	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1680
1681	return push_inst(compiler, *(sljit_ins*)instruction);
1682}
1683
1684/* --------------------------------------------------------------------- */
1685/*  Floating point operators                                             */
1686/* --------------------------------------------------------------------- */
1687
1688SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
1689{
1690#ifdef SLJIT_IS_FPU_AVAILABLE
1691	return SLJIT_IS_FPU_AVAILABLE;
1692#else
1693	/* Available by default. */
1694	return 1;
1695#endif
1696}
1697
1698#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
1699#define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
1700
1701#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1702#define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1703#else
1704#define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1705
1706#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1707#define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1708#define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1709#else
1710#define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1711#define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1712#endif
1713
1714#endif /* SLJIT_CONFIG_PPC_64 */
1715
1716static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1717	sljit_s32 dst, sljit_sw dstw,
1718	sljit_s32 src, sljit_sw srcw)
1719{
1720	if (src & SLJIT_MEM) {
1721		/* We can ignore the temporary data store on the stack from caching point of view. */
1722		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1723		src = TMP_FREG1;
1724	}
1725
1726#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1727	op = GET_OPCODE(op);
1728	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1729
1730	if (dst == SLJIT_UNUSED)
1731		return SLJIT_SUCCESS;
1732
1733	if (op == SLJIT_CONV_SW_FROM_F64) {
1734		if (FAST_IS_REG(dst)) {
1735			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
1736			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1737		}
1738		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
1739	}
1740
1741#else
1742	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1743
1744	if (dst == SLJIT_UNUSED)
1745		return SLJIT_SUCCESS;
1746#endif
1747
1748	if (FAST_IS_REG(dst)) {
1749		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1750		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1751		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1752	}
1753
1754	SLJIT_ASSERT(dst & SLJIT_MEM);
1755
1756	if (dst & OFFS_REG_MASK) {
1757		dstw &= 0x3;
1758		if (dstw) {
1759#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1760			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1761#else
1762			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1763#endif
1764			dstw = TMP_REG1;
1765		}
1766		else
1767			dstw = OFFS_REG(dst);
1768	}
1769	else {
1770		if ((dst & REG_MASK) && !dstw) {
1771			dstw = dst & REG_MASK;
1772			dst = 0;
1773		}
1774		else {
1775			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1776			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1777			dstw = TMP_REG1;
1778		}
1779	}
1780
1781	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1782}
1783
1784static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1785	sljit_s32 dst, sljit_sw dstw,
1786	sljit_s32 src, sljit_sw srcw)
1787{
1788#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1789
1790	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1791
1792	if (src & SLJIT_IMM) {
1793		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1794			srcw = (sljit_s32)srcw;
1795		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1796		src = TMP_REG1;
1797	}
1798	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
1799		if (FAST_IS_REG(src))
1800			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1801		else
1802			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1803		src = TMP_REG1;
1804	}
1805
1806	if (FAST_IS_REG(src)) {
1807		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1808		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
1809	}
1810	else
1811		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1812
1813	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1814
1815	if (dst & SLJIT_MEM)
1816		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1817	if (op & SLJIT_F32_OP)
1818		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1819	return SLJIT_SUCCESS;
1820
1821#else
1822
1823	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1824	sljit_s32 invert_sign = 1;
1825
1826	if (src & SLJIT_IMM) {
1827		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1828		src = TMP_REG1;
1829		invert_sign = 0;
1830	}
1831	else if (!FAST_IS_REG(src)) {
1832		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1833		src = TMP_REG1;
1834	}
1835
1836	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1837	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1838	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1839	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1840	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
1841	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1842	if (invert_sign)
1843		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1844	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1845	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
1846	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1847	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1848	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1849	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1850
1851	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1852
1853	if (dst & SLJIT_MEM)
1854		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1855	if (op & SLJIT_F32_OP)
1856		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1857	return SLJIT_SUCCESS;
1858
1859#endif
1860}
1861
1862static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1863	sljit_s32 src1, sljit_sw src1w,
1864	sljit_s32 src2, sljit_sw src2w)
1865{
1866	if (src1 & SLJIT_MEM) {
1867		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1868		src1 = TMP_FREG1;
1869	}
1870
1871	if (src2 & SLJIT_MEM) {
1872		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1873		src2 = TMP_FREG2;
1874	}
1875
1876	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1877}
1878
1879SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1880	sljit_s32 dst, sljit_sw dstw,
1881	sljit_s32 src, sljit_sw srcw)
1882{
1883	sljit_s32 dst_r;
1884
1885	CHECK_ERROR();
1886	compiler->cache_arg = 0;
1887	compiler->cache_argw = 0;
1888
1889	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1890	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1891
1892	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
1893		op ^= SLJIT_F32_OP;
1894
1895	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1896
1897	if (src & SLJIT_MEM) {
1898		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1899		src = dst_r;
1900	}
1901
1902	switch (GET_OPCODE(op)) {
1903	case SLJIT_CONV_F64_FROM_F32:
1904		op ^= SLJIT_F32_OP;
1905		if (op & SLJIT_F32_OP) {
1906			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1907			break;
1908		}
1909		/* Fall through. */
1910	case SLJIT_MOV_F64:
1911		if (src != dst_r) {
1912			if (dst_r != TMP_FREG1)
1913				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1914			else
1915				dst_r = src;
1916		}
1917		break;
1918	case SLJIT_NEG_F64:
1919		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1920		break;
1921	case SLJIT_ABS_F64:
1922		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1923		break;
1924	}
1925
1926	if (dst & SLJIT_MEM)
1927		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1928	return SLJIT_SUCCESS;
1929}
1930
1931SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1932	sljit_s32 dst, sljit_sw dstw,
1933	sljit_s32 src1, sljit_sw src1w,
1934	sljit_s32 src2, sljit_sw src2w)
1935{
1936	sljit_s32 dst_r, flags = 0;
1937
1938	CHECK_ERROR();
1939	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1940	ADJUST_LOCAL_OFFSET(dst, dstw);
1941	ADJUST_LOCAL_OFFSET(src1, src1w);
1942	ADJUST_LOCAL_OFFSET(src2, src2w);
1943
1944	compiler->cache_arg = 0;
1945	compiler->cache_argw = 0;
1946
1947	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1948
1949	if (src1 & SLJIT_MEM) {
1950		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
1951			FAIL_IF(compiler->error);
1952			src1 = TMP_FREG1;
1953		} else
1954			flags |= ALT_FORM1;
1955	}
1956
1957	if (src2 & SLJIT_MEM) {
1958		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
1959			FAIL_IF(compiler->error);
1960			src2 = TMP_FREG2;
1961		} else
1962			flags |= ALT_FORM2;
1963	}
1964
1965	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
1966		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1967			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
1968			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1969		}
1970		else {
1971			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1972			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1973		}
1974	}
1975	else if (flags & ALT_FORM1)
1976		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
1977	else if (flags & ALT_FORM2)
1978		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
1979
1980	if (flags & ALT_FORM1)
1981		src1 = TMP_FREG1;
1982	if (flags & ALT_FORM2)
1983		src2 = TMP_FREG2;
1984
1985	switch (GET_OPCODE(op)) {
1986	case SLJIT_ADD_F64:
1987		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
1988		break;
1989
1990	case SLJIT_SUB_F64:
1991		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
1992		break;
1993
1994	case SLJIT_MUL_F64:
1995		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
1996		break;
1997
1998	case SLJIT_DIV_F64:
1999		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
2000		break;
2001	}
2002
2003	if (dst_r == TMP_FREG2)
2004		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2005
2006	return SLJIT_SUCCESS;
2007}
2008
2009#undef FLOAT_DATA
2010#undef SELECT_FOP
2011
2012/* --------------------------------------------------------------------- */
2013/*  Other instructions                                                   */
2014/* --------------------------------------------------------------------- */
2015
2016SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2017{
2018	CHECK_ERROR();
2019	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2020	ADJUST_LOCAL_OFFSET(dst, dstw);
2021
2022	/* For UNUSED dst. Uncommon, but possible. */
2023	if (dst == SLJIT_UNUSED)
2024		return SLJIT_SUCCESS;
2025
2026	if (FAST_IS_REG(dst))
2027		return push_inst(compiler, MFLR | D(dst));
2028
2029	/* Memory. */
2030	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
2031	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2032}
2033
2034SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
2035{
2036	CHECK_ERROR();
2037	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
2038	ADJUST_LOCAL_OFFSET(src, srcw);
2039
2040	if (FAST_IS_REG(src))
2041		FAIL_IF(push_inst(compiler, MTLR | S(src)));
2042	else {
2043		if (src & SLJIT_MEM)
2044			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2045		else if (src & SLJIT_IMM)
2046			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2047		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
2048	}
2049	return push_inst(compiler, BLR);
2050}
2051
2052/* --------------------------------------------------------------------- */
2053/*  Conditional instructions                                             */
2054/* --------------------------------------------------------------------- */
2055
2056SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2057{
2058	struct sljit_label *label;
2059
2060	CHECK_ERROR_PTR();
2061	CHECK_PTR(check_sljit_emit_label(compiler));
2062
2063	if (compiler->last_label && compiler->last_label->size == compiler->size)
2064		return compiler->last_label;
2065
2066	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2067	PTR_FAIL_IF(!label);
2068	set_label(label, compiler);
2069	return label;
2070}
2071
2072static sljit_ins get_bo_bi_flags(sljit_s32 type)
2073{
2074	switch (type) {
2075	case SLJIT_EQUAL:
2076		return (12 << 21) | (2 << 16);
2077
2078	case SLJIT_NOT_EQUAL:
2079		return (4 << 21) | (2 << 16);
2080
2081	case SLJIT_LESS:
2082	case SLJIT_LESS_F64:
2083		return (12 << 21) | ((4 + 0) << 16);
2084
2085	case SLJIT_GREATER_EQUAL:
2086	case SLJIT_GREATER_EQUAL_F64:
2087		return (4 << 21) | ((4 + 0) << 16);
2088
2089	case SLJIT_GREATER:
2090	case SLJIT_GREATER_F64:
2091		return (12 << 21) | ((4 + 1) << 16);
2092
2093	case SLJIT_LESS_EQUAL:
2094	case SLJIT_LESS_EQUAL_F64:
2095		return (4 << 21) | ((4 + 1) << 16);
2096
2097	case SLJIT_SIG_LESS:
2098		return (12 << 21) | (0 << 16);
2099
2100	case SLJIT_SIG_GREATER_EQUAL:
2101		return (4 << 21) | (0 << 16);
2102
2103	case SLJIT_SIG_GREATER:
2104		return (12 << 21) | (1 << 16);
2105
2106	case SLJIT_SIG_LESS_EQUAL:
2107		return (4 << 21) | (1 << 16);
2108
2109	case SLJIT_OVERFLOW:
2110	case SLJIT_MUL_OVERFLOW:
2111		return (12 << 21) | (3 << 16);
2112
2113	case SLJIT_NOT_OVERFLOW:
2114	case SLJIT_MUL_NOT_OVERFLOW:
2115		return (4 << 21) | (3 << 16);
2116
2117	case SLJIT_EQUAL_F64:
2118		return (12 << 21) | ((4 + 2) << 16);
2119
2120	case SLJIT_NOT_EQUAL_F64:
2121		return (4 << 21) | ((4 + 2) << 16);
2122
2123	case SLJIT_UNORDERED_F64:
2124		return (12 << 21) | ((4 + 3) << 16);
2125
2126	case SLJIT_ORDERED_F64:
2127		return (4 << 21) | ((4 + 3) << 16);
2128
2129	default:
2130		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
2131		return (20 << 21);
2132	}
2133}
2134
2135SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2136{
2137	struct sljit_jump *jump;
2138	sljit_ins bo_bi_flags;
2139
2140	CHECK_ERROR_PTR();
2141	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2142
2143	bo_bi_flags = get_bo_bi_flags(type & 0xff);
2144	if (!bo_bi_flags)
2145		return NULL;
2146
2147	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2148	PTR_FAIL_IF(!jump);
2149	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2150	type &= 0xff;
2151
2152	/* In PPC, we don't need to touch the arguments. */
2153	if (type < SLJIT_JUMP)
2154		jump->flags |= IS_COND;
2155#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2156	if (type >= SLJIT_CALL0)
2157		jump->flags |= IS_CALL;
2158#endif
2159
2160	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2161	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
2162	jump->addr = compiler->size;
2163	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
2164	return jump;
2165}
2166
2167SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2168{
2169	struct sljit_jump *jump = NULL;
2170	sljit_s32 src_r;
2171
2172	CHECK_ERROR();
2173	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2174	ADJUST_LOCAL_OFFSET(src, srcw);
2175
2176	if (FAST_IS_REG(src)) {
2177#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2178		if (type >= SLJIT_CALL0) {
2179			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
2180			src_r = TMP_CALL_REG;
2181		}
2182		else
2183			src_r = src;
2184#else
2185		src_r = src;
2186#endif
2187	} else if (src & SLJIT_IMM) {
2188		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2189		FAIL_IF(!jump);
2190		set_jump(jump, compiler, JUMP_ADDR);
2191		jump->u.target = srcw;
2192#if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2193		if (type >= SLJIT_CALL0)
2194			jump->flags |= IS_CALL;
2195#endif
2196		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2197		src_r = TMP_CALL_REG;
2198	}
2199	else {
2200		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2201		src_r = TMP_CALL_REG;
2202	}
2203
2204	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2205	if (jump)
2206		jump->addr = compiler->size;
2207	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2208}
2209
2210/* Get a bit from CR, all other bits are zeroed. */
2211#define GET_CR_BIT(bit, dst) \
2212	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
2213	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
2214
2215#define INVERT_BIT(dst) \
2216	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
2217
2218SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2219	sljit_s32 dst, sljit_sw dstw,
2220	sljit_s32 src, sljit_sw srcw,
2221	sljit_s32 type)
2222{
2223	sljit_s32 reg, input_flags;
2224	sljit_s32 flags = GET_ALL_FLAGS(op);
2225	sljit_sw original_dstw = dstw;
2226
2227	CHECK_ERROR();
2228	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2229	ADJUST_LOCAL_OFFSET(dst, dstw);
2230
2231	if (dst == SLJIT_UNUSED)
2232		return SLJIT_SUCCESS;
2233
2234	op = GET_OPCODE(op);
2235	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2236
2237	compiler->cache_arg = 0;
2238	compiler->cache_argw = 0;
2239	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
2240		ADJUST_LOCAL_OFFSET(src, srcw);
2241#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2242		input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
2243#else
2244		input_flags = WORD_DATA;
2245#endif
2246		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2247		src = TMP_REG1;
2248		srcw = 0;
2249	}
2250
2251	switch (type & 0xff) {
2252	case SLJIT_EQUAL:
2253		GET_CR_BIT(2, reg);
2254		break;
2255
2256	case SLJIT_NOT_EQUAL:
2257		GET_CR_BIT(2, reg);
2258		INVERT_BIT(reg);
2259		break;
2260
2261	case SLJIT_LESS:
2262	case SLJIT_LESS_F64:
2263		GET_CR_BIT(4 + 0, reg);
2264		break;
2265
2266	case SLJIT_GREATER_EQUAL:
2267	case SLJIT_GREATER_EQUAL_F64:
2268		GET_CR_BIT(4 + 0, reg);
2269		INVERT_BIT(reg);
2270		break;
2271
2272	case SLJIT_GREATER:
2273	case SLJIT_GREATER_F64:
2274		GET_CR_BIT(4 + 1, reg);
2275		break;
2276
2277	case SLJIT_LESS_EQUAL:
2278	case SLJIT_LESS_EQUAL_F64:
2279		GET_CR_BIT(4 + 1, reg);
2280		INVERT_BIT(reg);
2281		break;
2282
2283	case SLJIT_SIG_LESS:
2284		GET_CR_BIT(0, reg);
2285		break;
2286
2287	case SLJIT_SIG_GREATER_EQUAL:
2288		GET_CR_BIT(0, reg);
2289		INVERT_BIT(reg);
2290		break;
2291
2292	case SLJIT_SIG_GREATER:
2293		GET_CR_BIT(1, reg);
2294		break;
2295
2296	case SLJIT_SIG_LESS_EQUAL:
2297		GET_CR_BIT(1, reg);
2298		INVERT_BIT(reg);
2299		break;
2300
2301	case SLJIT_OVERFLOW:
2302	case SLJIT_MUL_OVERFLOW:
2303		GET_CR_BIT(3, reg);
2304		break;
2305
2306	case SLJIT_NOT_OVERFLOW:
2307	case SLJIT_MUL_NOT_OVERFLOW:
2308		GET_CR_BIT(3, reg);
2309		INVERT_BIT(reg);
2310		break;
2311
2312	case SLJIT_EQUAL_F64:
2313		GET_CR_BIT(4 + 2, reg);
2314		break;
2315
2316	case SLJIT_NOT_EQUAL_F64:
2317		GET_CR_BIT(4 + 2, reg);
2318		INVERT_BIT(reg);
2319		break;
2320
2321	case SLJIT_UNORDERED_F64:
2322		GET_CR_BIT(4 + 3, reg);
2323		break;
2324
2325	case SLJIT_ORDERED_F64:
2326		GET_CR_BIT(4 + 3, reg);
2327		INVERT_BIT(reg);
2328		break;
2329
2330	default:
2331		SLJIT_ASSERT_STOP();
2332		break;
2333	}
2334
2335	if (op < SLJIT_ADD) {
2336#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2337		if (op == SLJIT_MOV)
2338			input_flags = WORD_DATA;
2339		else {
2340			op = SLJIT_MOV_U32;
2341			input_flags = INT_DATA;
2342		}
2343#else
2344		op = SLJIT_MOV;
2345		input_flags = WORD_DATA;
2346#endif
2347		if (reg != TMP_REG2)
2348			return SLJIT_SUCCESS;
2349		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2350	}
2351
2352#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2353		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2354	compiler->skip_checks = 1;
2355#endif
2356	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
2357}
2358
2359SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2360{
2361	struct sljit_const *const_;
2362	sljit_s32 reg;
2363
2364	CHECK_ERROR_PTR();
2365	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2366	ADJUST_LOCAL_OFFSET(dst, dstw);
2367
2368	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2369	PTR_FAIL_IF(!const_);
2370	set_const(const_, compiler);
2371
2372	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
2373
2374	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2375
2376	if (dst & SLJIT_MEM)
2377		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2378	return const_;
2379}
2380