tcg-target.c revision 2e787a1d54ea4a34a6b75dcffe7dc9fa3aecb83f
1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#ifndef NDEBUG
26static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
27#if TCG_TARGET_REG_BITS == 64
28    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
29    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
30#else
31    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
32#endif
33};
34#endif
35
36static const int tcg_target_reg_alloc_order[] = {
37#if TCG_TARGET_REG_BITS == 64
38    TCG_REG_RBP,
39    TCG_REG_RBX,
40    TCG_REG_R12,
41    TCG_REG_R13,
42    TCG_REG_R14,
43    TCG_REG_R15,
44    TCG_REG_R10,
45    TCG_REG_R11,
46    TCG_REG_R9,
47    TCG_REG_R8,
48    TCG_REG_RCX,
49    TCG_REG_RDX,
50    TCG_REG_RSI,
51    TCG_REG_RDI,
52    TCG_REG_RAX,
53#else
54    TCG_REG_EAX,
55    TCG_REG_EDX,
56    TCG_REG_ECX,
57    TCG_REG_EBX,
58    TCG_REG_ESI,
59    TCG_REG_EDI,
60    TCG_REG_EBP,
61#endif
62};
63
64static const int tcg_target_call_iarg_regs[] = {
65#if TCG_TARGET_REG_BITS == 64
66    TCG_REG_RDI,
67    TCG_REG_RSI,
68    TCG_REG_RDX,
69    TCG_REG_RCX,
70    TCG_REG_R8,
71    TCG_REG_R9,
72#else
73    TCG_REG_EAX,
74    TCG_REG_EDX,
75    TCG_REG_ECX
76#endif
77};
78
79static const int tcg_target_call_oarg_regs[2] = {
80    TCG_REG_EAX,
81    TCG_REG_EDX
82};
83
84static uint8_t *tb_ret_addr;
85
86static void patch_reloc(uint8_t *code_ptr, int type,
87                        tcg_target_long value, tcg_target_long addend)
88{
89    value += addend;
90    switch(type) {
91    case R_386_PC32:
92        value -= (uintptr_t)code_ptr;
93        if (value != (int32_t)value) {
94            tcg_abort();
95        }
96        *(uint32_t *)code_ptr = value;
97        break;
98    case R_386_PC8:
99        value -= (uintptr_t)code_ptr;
100        if (value != (int8_t)value) {
101            tcg_abort();
102        }
103        *(uint8_t *)code_ptr = value;
104        break;
105    default:
106        tcg_abort();
107    }
108}
109
110/* maximum number of register used for input function arguments */
111static inline int tcg_target_get_call_iarg_regs_count(int flags)
112{
113    if (TCG_TARGET_REG_BITS == 64) {
114        return 6;
115    }
116
117    flags &= TCG_CALL_TYPE_MASK;
118    switch(flags) {
119    case TCG_CALL_TYPE_STD:
120        return 0;
121    case TCG_CALL_TYPE_REGPARM_1:
122    case TCG_CALL_TYPE_REGPARM_2:
123    case TCG_CALL_TYPE_REGPARM:
124        return flags - TCG_CALL_TYPE_REGPARM_1 + 1;
125    default:
126        tcg_abort();
127    }
128}
129
130/* parse target specific constraints */
131static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
132{
133    const char *ct_str;
134
135    ct_str = *pct_str;
136    switch(ct_str[0]) {
137    case 'a':
138        ct->ct |= TCG_CT_REG;
139        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
140        break;
141    case 'b':
142        ct->ct |= TCG_CT_REG;
143        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
144        break;
145    case 'c':
146        ct->ct |= TCG_CT_REG;
147        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
148        break;
149    case 'd':
150        ct->ct |= TCG_CT_REG;
151        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
152        break;
153    case 'S':
154        ct->ct |= TCG_CT_REG;
155        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
156        break;
157    case 'D':
158        ct->ct |= TCG_CT_REG;
159        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
160        break;
161    case 'q':
162        ct->ct |= TCG_CT_REG;
163        if (TCG_TARGET_REG_BITS == 64) {
164            tcg_regset_set32(ct->u.regs, 0, 0xffff);
165        } else {
166            tcg_regset_set32(ct->u.regs, 0, 0xf);
167        }
168        break;
169    case 'r':
170        ct->ct |= TCG_CT_REG;
171        if (TCG_TARGET_REG_BITS == 64) {
172            tcg_regset_set32(ct->u.regs, 0, 0xffff);
173        } else {
174            tcg_regset_set32(ct->u.regs, 0, 0xff);
175        }
176        break;
177
178        /* qemu_ld/st address constraint */
179    case 'L':
180        ct->ct |= TCG_CT_REG;
181        if (TCG_TARGET_REG_BITS == 64) {
182            tcg_regset_set32(ct->u.regs, 0, 0xffff);
183            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RSI);
184            tcg_regset_reset_reg(ct->u.regs, TCG_REG_RDI);
185        } else {
186            tcg_regset_set32(ct->u.regs, 0, 0xff);
187            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
188            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
189        }
190        break;
191
192    case 'e':
193        ct->ct |= TCG_CT_CONST_S32;
194        break;
195    case 'Z':
196        ct->ct |= TCG_CT_CONST_U32;
197        break;
198
199    default:
200        return -1;
201    }
202    ct_str++;
203    *pct_str = ct_str;
204    return 0;
205}
206
207/* test if a constant matches the constraint */
208static inline int tcg_target_const_match(tcg_target_long val,
209                                         const TCGArgConstraint *arg_ct)
210{
211    int ct = arg_ct->ct;
212    if (ct & TCG_CT_CONST) {
213        return 1;
214    }
215    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
216        return 1;
217    }
218    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
219        return 1;
220    }
221    return 0;
222}
223
224#if TCG_TARGET_REG_BITS == 64
225# define LOWREGMASK(x)	((x) & 7)
226#else
227# define LOWREGMASK(x)	(x)
228#endif
229
230#define P_EXT		0x100		/* 0x0f opcode prefix */
231#define P_DATA16	0x200		/* 0x66 opcode prefix */
232#if TCG_TARGET_REG_BITS == 64
233# define P_ADDR32	0x400		/* 0x67 opcode prefix */
234# define P_REXW		0x800		/* Set REX.W = 1 */
235# define P_REXB_R	0x1000		/* REG field as byte register */
236# define P_REXB_RM	0x2000		/* R/M field as byte register */
237#else
238# define P_ADDR32	0
239# define P_REXW		0
240# define P_REXB_R	0
241# define P_REXB_RM	0
242#endif
243
244#define OPC_ARITH_EvIz	(0x81)
245#define OPC_ARITH_EvIb	(0x83)
246#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
247#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
248#define OPC_BSWAP	(0xc8 | P_EXT)
249#define OPC_CALL_Jz	(0xe8)
250#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
251#define OPC_DEC_r32	(0x48)
252#define OPC_IMUL_GvEv	(0xaf | P_EXT)
253#define OPC_IMUL_GvEvIb	(0x6b)
254#define OPC_IMUL_GvEvIz	(0x69)
255#define OPC_INC_r32	(0x40)
256#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
257#define OPC_JCC_short	(0x70)		/* ... plus condition code */
258#define OPC_JMP_long	(0xe9)
259#define OPC_JMP_short	(0xeb)
260#define OPC_LEA         (0x8d)
261#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
262#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
263#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
264#define OPC_MOVL_EvIz	(0xc7)
265#define OPC_MOVL_Iv     (0xb8)
266#define OPC_MOVSBL	(0xbe | P_EXT)
267#define OPC_MOVSWL	(0xbf | P_EXT)
268#define OPC_MOVSLQ	(0x63 | P_REXW)
269#define OPC_MOVZBL	(0xb6 | P_EXT)
270#define OPC_MOVZWL	(0xb7 | P_EXT)
271#define OPC_POP_r32	(0x58)
272#define OPC_PUSH_r32	(0x50)
273#define OPC_PUSH_Iv	(0x68)
274#define OPC_PUSH_Ib	(0x6a)
275#define OPC_RET		(0xc3)
276#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
277#define OPC_SHIFT_1	(0xd1)
278#define OPC_SHIFT_Ib	(0xc1)
279#define OPC_SHIFT_cl	(0xd3)
280#define OPC_TESTL	(0x85)
281#define OPC_XCHG_ax_r32	(0x90)
282
283#define OPC_GRP3_Ev	(0xf7)
284#define OPC_GRP5	(0xff)
285
286/* Group 1 opcode extensions for 0x80-0x83.
287   These are also used as modifiers for OPC_ARITH.  */
288#define ARITH_ADD 0
289#define ARITH_OR  1
290#define ARITH_ADC 2
291#define ARITH_SBB 3
292#define ARITH_AND 4
293#define ARITH_SUB 5
294#define ARITH_XOR 6
295#define ARITH_CMP 7
296
297/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
298#define SHIFT_ROL 0
299#define SHIFT_ROR 1
300#define SHIFT_SHL 4
301#define SHIFT_SHR 5
302#define SHIFT_SAR 7
303
304/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
305#define EXT3_NOT   2
306#define EXT3_NEG   3
307#define EXT3_MUL   4
308#define EXT3_IMUL  5
309#define EXT3_DIV   6
310#define EXT3_IDIV  7
311
312/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
313#define EXT5_INC_Ev	0
314#define EXT5_DEC_Ev	1
315#define EXT5_CALLN_Ev	2
316#define EXT5_JMPN_Ev	4
317
318/* Condition codes to be added to OPC_JCC_{long,short}.  */
319#define JCC_JMP (-1)
320#define JCC_JO  0x0
321#define JCC_JNO 0x1
322#define JCC_JB  0x2
323#define JCC_JAE 0x3
324#define JCC_JE  0x4
325#define JCC_JNE 0x5
326#define JCC_JBE 0x6
327#define JCC_JA  0x7
328#define JCC_JS  0x8
329#define JCC_JNS 0x9
330#define JCC_JP  0xa
331#define JCC_JNP 0xb
332#define JCC_JL  0xc
333#define JCC_JGE 0xd
334#define JCC_JLE 0xe
335#define JCC_JG  0xf
336
337static const uint8_t tcg_cond_to_jcc[10] = {
338    [TCG_COND_EQ] = JCC_JE,
339    [TCG_COND_NE] = JCC_JNE,
340    [TCG_COND_LT] = JCC_JL,
341    [TCG_COND_GE] = JCC_JGE,
342    [TCG_COND_LE] = JCC_JLE,
343    [TCG_COND_GT] = JCC_JG,
344    [TCG_COND_LTU] = JCC_JB,
345    [TCG_COND_GEU] = JCC_JAE,
346    [TCG_COND_LEU] = JCC_JBE,
347    [TCG_COND_GTU] = JCC_JA,
348};
349
350#if TCG_TARGET_REG_BITS == 64
351static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
352{
353    int rex;
354
355    if (opc & P_DATA16) {
356        /* We should never be asking for both 16 and 64-bit operation.  */
357        assert((opc & P_REXW) == 0);
358        tcg_out8(s, 0x66);
359    }
360    if (opc & P_ADDR32) {
361        tcg_out8(s, 0x67);
362    }
363
364    rex = 0;
365    rex |= (opc & P_REXW) >> 8;		/* REX.W */
366    rex |= (r & 8) >> 1;		/* REX.R */
367    rex |= (x & 8) >> 2;		/* REX.X */
368    rex |= (rm & 8) >> 3;		/* REX.B */
369
370    /* P_REXB_{R,RM} indicates that the given register is the low byte.
371       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
372       as otherwise the encoding indicates %[abcd]h.  Note that the values
373       that are ORed in merely indicate that the REX byte must be present;
374       those bits get discarded in output.  */
375    rex |= opc & (r >= 4 ? P_REXB_R : 0);
376    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
377
378    if (rex) {
379        tcg_out8(s, (uint8_t)(rex | 0x40));
380    }
381
382    if (opc & P_EXT) {
383        tcg_out8(s, 0x0f);
384    }
385    tcg_out8(s, opc);
386}
387#else
388static void tcg_out_opc(TCGContext *s, int opc)
389{
390    if (opc & P_DATA16) {
391        tcg_out8(s, 0x66);
392    }
393    if (opc & P_EXT) {
394        tcg_out8(s, 0x0f);
395    }
396    tcg_out8(s, opc);
397}
398/* Discard the register arguments to tcg_out_opc early, so as not to penalize
399   the 32-bit compilation paths.  This method works with all versions of gcc,
400   whereas relying on optimization may not be able to exclude them.  */
401#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
402#endif
403
404static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
405{
406    tcg_out_opc(s, opc, r, rm, 0);
407    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
408}
409
410/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
411   We handle either RM and INDEX missing with a negative value.  In 64-bit
412   mode for absolute addresses, ~RM is the size of the immediate operand
413   that will follow the instruction.  */
414
415static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
416                                     int index, int shift,
417                                     tcg_target_long offset)
418{
419    int mod, len;
420
421    if (index < 0 && rm < 0) {
422        if (TCG_TARGET_REG_BITS == 64) {
423            /* Try for a rip-relative addressing mode.  This has replaced
424               the 32-bit-mode absolute addressing encoding.  */
425            tcg_target_long pc = (tcg_target_long)s->code_ptr + 5 + ~rm;
426            tcg_target_long disp = offset - pc;
427            if (disp == (int32_t)disp) {
428                tcg_out_opc(s, opc, r, 0, 0);
429                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
430                tcg_out32(s, disp);
431                return;
432            }
433
434            /* Try for an absolute address encoding.  This requires the
435               use of the MODRM+SIB encoding and is therefore larger than
436               rip-relative addressing.  */
437            if (offset == (int32_t)offset) {
438                tcg_out_opc(s, opc, r, 0, 0);
439                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
440                tcg_out8(s, (4 << 3) | 5);
441                tcg_out32(s, offset);
442                return;
443            }
444
445            /* ??? The memory isn't directly addressable.  */
446            tcg_abort();
447        } else {
448            /* Absolute address.  */
449            tcg_out_opc(s, opc, r, 0, 0);
450            tcg_out8(s, (r << 3) | 5);
451            tcg_out32(s, offset);
452            return;
453        }
454    }
455
456    /* Find the length of the immediate addend.  Note that the encoding
457       that would be used for (%ebp) indicates absolute addressing.  */
458    if (rm < 0) {
459        mod = 0, len = 4, rm = 5;
460    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
461        mod = 0, len = 0;
462    } else if (offset == (int8_t)offset) {
463        mod = 0x40, len = 1;
464    } else {
465        mod = 0x80, len = 4;
466    }
467
468    /* Use a single byte MODRM format if possible.  Note that the encoding
469       that would be used for %esp is the escape to the two byte form.  */
470    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
471        /* Single byte MODRM format.  */
472        tcg_out_opc(s, opc, r, rm, 0);
473        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
474    } else {
475        /* Two byte MODRM+SIB format.  */
476
477        /* Note that the encoding that would place %esp into the index
478           field indicates no index register.  In 64-bit mode, the REX.X
479           bit counts, so %r12 can be used as the index.  */
480        if (index < 0) {
481            index = 4;
482        } else {
483            assert(index != TCG_REG_ESP);
484        }
485
486        tcg_out_opc(s, opc, r, rm, index);
487        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
488        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
489    }
490
491    if (len == 1) {
492        tcg_out8(s, offset);
493    } else if (len == 4) {
494        tcg_out32(s, offset);
495    }
496}
497
498/* A simplification of the above with no index or shift.  */
499static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
500                                        int rm, tcg_target_long offset)
501{
502    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
503}
504
505/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
506static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
507{
508    /* Propagate an opcode prefix, such as P_REXW.  */
509    int ext = subop & ~0x7;
510    subop &= 0x7;
511
512    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
513}
514
515static inline void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg)
516{
517    if (arg != ret) {
518        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
519        tcg_out_modrm(s, opc, ret, arg);
520    }
521}
522
523static void tcg_out_movi(TCGContext *s, TCGType type,
524                         int ret, tcg_target_long arg)
525{
526    if (arg == 0) {
527        tgen_arithr(s, ARITH_XOR, ret, ret);
528        return;
529    } else if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
530        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
531        tcg_out32(s, arg);
532    } else if (arg == (int32_t)arg) {
533        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
534        tcg_out32(s, arg);
535    } else {
536        tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
537        tcg_out32(s, arg);
538        tcg_out32(s, arg >> 31 >> 1);
539    }
540}
541
542static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
543{
544    if (val == (int8_t)val) {
545        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
546        tcg_out8(s, val);
547    } else if (val == (int32_t)val) {
548        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
549        tcg_out32(s, val);
550    } else {
551        tcg_abort();
552    }
553}
554
555static inline void tcg_out_push(TCGContext *s, int reg)
556{
557    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
558}
559
560static inline void tcg_out_pop(TCGContext *s, int reg)
561{
562    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
563}
564
565static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
566                              int arg1, tcg_target_long arg2)
567{
568    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
569    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
570}
571
572static inline void tcg_out_st(TCGContext *s, TCGType type, int arg,
573                              int arg1, tcg_target_long arg2)
574{
575    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
576    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
577}
578
579static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
580{
581    /* Propagate an opcode prefix, such as P_DATA16.  */
582    int ext = subopc & ~0x7;
583    subopc &= 0x7;
584
585    if (count == 1) {
586        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
587    } else {
588        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
589        tcg_out8(s, count);
590    }
591}
592
593static inline void tcg_out_bswap32(TCGContext *s, int reg)
594{
595    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
596}
597
598static inline void tcg_out_rolw_8(TCGContext *s, int reg)
599{
600    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
601}
602
603static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
604{
605    /* movzbl */
606    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
607    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
608}
609
610static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
611{
612    /* movsbl */
613    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
614    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
615}
616
617static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
618{
619    /* movzwl */
620    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
621}
622
623static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
624{
625    /* movsw[lq] */
626    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
627}
628
629static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
630{
631    /* 32-bit mov zero extends.  */
632    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
633}
634
635static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
636{
637    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
638}
639
640static inline void tcg_out_bswap64(TCGContext *s, int reg)
641{
642    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
643}
644
645static void tgen_arithi(TCGContext *s, int c, int r0,
646                        tcg_target_long val, int cf)
647{
648    int rexw = 0;
649
650    if (TCG_TARGET_REG_BITS == 64) {
651        rexw = c & -8;
652        c &= 7;
653    }
654
655    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
656       partial flags update stalls on Pentium4 and are not recommended
657       by current Intel optimization manuals.  */
658    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
659        int is_inc = (c == ARITH_ADD) ^ (val < 0);
660        if (TCG_TARGET_REG_BITS == 64) {
661            /* The single-byte increment encodings are re-tasked as the
662               REX prefixes.  Use the MODRM encoding.  */
663            tcg_out_modrm(s, OPC_GRP5 + rexw,
664                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
665        } else {
666            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
667        }
668        return;
669    }
670
671    if (c == ARITH_AND) {
672        if (TCG_TARGET_REG_BITS == 64) {
673            if (val == 0xffffffffu) {
674                tcg_out_ext32u(s, r0, r0);
675                return;
676            }
677            if (val == (uint32_t)val) {
678                /* AND with no high bits set can use a 32-bit operation.  */
679                rexw = 0;
680            }
681        }
682        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
683            tcg_out_ext8u(s, r0, r0);
684            return;
685        }
686        if (val == 0xffffu) {
687            tcg_out_ext16u(s, r0, r0);
688            return;
689        }
690    }
691
692    if (val == (int8_t)val) {
693        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
694        tcg_out8(s, val);
695        return;
696    }
697    if (rexw == 0 || val == (int32_t)val) {
698        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
699        tcg_out32(s, val);
700        return;
701    }
702
703    tcg_abort();
704}
705
706static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
707{
708    if (val != 0) {
709        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
710    }
711}
712
713#undef small  /* for mingw build */
714
715/* Use SMALL != 0 to force a short forward branch.  */
716static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
717{
718    int32_t val, val1;
719    TCGLabel *l = &s->labels[label_index];
720
721    if (l->has_value) {
722        val = l->u.value - (tcg_target_long)s->code_ptr;
723        val1 = val - 2;
724        if ((int8_t)val1 == val1) {
725            if (opc == -1) {
726                tcg_out8(s, OPC_JMP_short);
727            } else {
728                tcg_out8(s, OPC_JCC_short + opc);
729            }
730            tcg_out8(s, val1);
731        } else {
732            if (small) {
733                tcg_abort();
734            }
735            if (opc == -1) {
736                tcg_out8(s, OPC_JMP_long);
737                tcg_out32(s, val - 5);
738            } else {
739                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
740                tcg_out32(s, val - 6);
741            }
742        }
743    } else if (small) {
744        if (opc == -1) {
745            tcg_out8(s, OPC_JMP_short);
746        } else {
747            tcg_out8(s, OPC_JCC_short + opc);
748        }
749        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
750        s->code_ptr += 1;
751    } else {
752        if (opc == -1) {
753            tcg_out8(s, OPC_JMP_long);
754        } else {
755            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
756        }
757        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
758        s->code_ptr += 4;
759    }
760}
761
762static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
763                        int const_arg2, int rexw)
764{
765    if (const_arg2) {
766        if (arg2 == 0) {
767            /* test r, r */
768            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
769        } else {
770            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
771        }
772    } else {
773        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
774    }
775}
776
777static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
778                             TCGArg arg1, TCGArg arg2, int const_arg2,
779                             int label_index, int small)
780{
781    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
782    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
783}
784
785#if TCG_TARGET_REG_BITS == 64
786static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
787                             TCGArg arg1, TCGArg arg2, int const_arg2,
788                             int label_index, int small)
789{
790    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
791    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
792}
793#else
794/* XXX: we implement it at the target level to avoid having to
795   handle cross basic blocks temporaries */
796static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
797                            const int *const_args, int small)
798{
799    int label_next;
800    label_next = gen_new_label();
801    switch(args[4]) {
802    case TCG_COND_EQ:
803        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
804                         label_next, 1);
805        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
806                         args[5], small);
807        break;
808    case TCG_COND_NE:
809        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
810                         args[5], small);
811        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
812                         args[5], small);
813        break;
814    case TCG_COND_LT:
815        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
816                         args[5], small);
817        tcg_out_jxx(s, JCC_JNE, label_next, 1);
818        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
819                         args[5], small);
820        break;
821    case TCG_COND_LE:
822        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
823                         args[5], small);
824        tcg_out_jxx(s, JCC_JNE, label_next, 1);
825        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
826                         args[5], small);
827        break;
828    case TCG_COND_GT:
829        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
830                         args[5], small);
831        tcg_out_jxx(s, JCC_JNE, label_next, 1);
832        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
833                         args[5], small);
834        break;
835    case TCG_COND_GE:
836        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
837                         args[5], small);
838        tcg_out_jxx(s, JCC_JNE, label_next, 1);
839        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
840                         args[5], small);
841        break;
842    case TCG_COND_LTU:
843        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
844                         args[5], small);
845        tcg_out_jxx(s, JCC_JNE, label_next, 1);
846        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
847                         args[5], small);
848        break;
849    case TCG_COND_LEU:
850        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
851                         args[5], small);
852        tcg_out_jxx(s, JCC_JNE, label_next, 1);
853        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
854                         args[5], small);
855        break;
856    case TCG_COND_GTU:
857        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
858                         args[5], small);
859        tcg_out_jxx(s, JCC_JNE, label_next, 1);
860        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
861                         args[5], small);
862        break;
863    case TCG_COND_GEU:
864        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
865                         args[5], small);
866        tcg_out_jxx(s, JCC_JNE, label_next, 1);
867        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
868                         args[5], small);
869        break;
870    default:
871        tcg_abort();
872    }
873    tcg_out_label(s, label_next, (tcg_target_long)s->code_ptr);
874}
875#endif
876
877static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
878                              TCGArg arg1, TCGArg arg2, int const_arg2)
879{
880    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
881    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
882    tcg_out_ext8u(s, dest, dest);
883}
884
885#if TCG_TARGET_REG_BITS == 64
886static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
887                              TCGArg arg1, TCGArg arg2, int const_arg2)
888{
889    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
890    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
891    tcg_out_ext8u(s, dest, dest);
892}
893#else
894static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
895                             const int *const_args)
896{
897    TCGArg new_args[6];
898    int label_true, label_over;
899
900    memcpy(new_args, args+1, 5*sizeof(TCGArg));
901
902    if (args[0] == args[1] || args[0] == args[2]
903        || (!const_args[3] && args[0] == args[3])
904        || (!const_args[4] && args[0] == args[4])) {
905        /* When the destination overlaps with one of the argument
906           registers, don't do anything tricky.  */
907        label_true = gen_new_label();
908        label_over = gen_new_label();
909
910        new_args[5] = label_true;
911        tcg_out_brcond2(s, new_args, const_args+1, 1);
912
913        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
914        tcg_out_jxx(s, JCC_JMP, label_over, 1);
915        tcg_out_label(s, label_true, (tcg_target_long)s->code_ptr);
916
917        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
918        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
919    } else {
920        /* When the destination does not overlap one of the arguments,
921           clear the destination first, jump if cond false, and emit an
922           increment in the true case.  This results in smaller code.  */
923
924        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
925
926        label_over = gen_new_label();
927        new_args[4] = tcg_invert_cond(new_args[4]);
928        new_args[5] = label_over;
929        tcg_out_brcond2(s, new_args, const_args+1, 1);
930
931        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
932        tcg_out_label(s, label_over, (tcg_target_long)s->code_ptr);
933    }
934}
935#endif
936
937static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
938{
939    tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
940
941    if (disp == (int32_t)disp) {
942        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
943        tcg_out32(s, disp);
944    } else {
945        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
946        tcg_out_modrm(s, OPC_GRP5,
947                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
948    }
949}
950
951static inline void tcg_out_calli(TCGContext *s, tcg_target_long dest)
952{
953    tcg_out_branch(s, 1, dest);
954}
955
956static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
957{
958    tcg_out_branch(s, 0, dest);
959}
960
961#if defined(CONFIG_SOFTMMU)
962
963#include "exec/softmmu_defs.h"
964
965static void *qemu_ld_helpers[4] = {
966    __ldb_mmu,
967    __ldw_mmu,
968    __ldl_mmu,
969    __ldq_mmu,
970};
971
972static void *qemu_st_helpers[4] = {
973    __stb_mmu,
974    __stw_mmu,
975    __stl_mmu,
976    __stq_mmu,
977};
978
979/* Perform the TLB load and compare.
980
981   Inputs:
982   ADDRLO_IDX contains the index into ARGS of the low part of the
983   address; the high part of the address is at ADDR_LOW_IDX+1.
984
985   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
986
987   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
988   This should be offsetof addr_read or addr_write.
989
990   Outputs:
991   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
992   positions of the displacements of forward jumps to the TLB miss case.
993
994   First argument register is loaded with the low part of the address.
995   In the TLB hit case, it has been adjusted as indicated by the TLB
996   and so is a host address.  In the TLB miss case, it continues to
997   hold a guest address.
998
999   Second argument register is clobbered.  */
1000
1001static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
1002                                    int mem_index, int s_bits,
1003                                    const TCGArg *args,
1004                                    uint8_t **label_ptr, int which)
1005{
1006    const int addrlo = args[addrlo_idx];
1007    const int r0 = tcg_target_call_iarg_regs[0];
1008    const int r1 = tcg_target_call_iarg_regs[1];
1009    TCGType type = TCG_TYPE_I32;
1010    int rexw = 0;
1011
1012    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) {
1013        type = TCG_TYPE_I64;
1014        rexw = P_REXW;
1015    }
1016
1017    tcg_out_mov(s, type, r1, addrlo);
1018    tcg_out_mov(s, type, r0, addrlo);
1019
1020    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
1021                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1022
1023    tgen_arithi(s, ARITH_AND + rexw, r0,
1024                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1025    tgen_arithi(s, ARITH_AND + rexw, r1,
1026                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1027
1028    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
1029                             offsetof(CPUOldState, tlb_table[mem_index][0])
1030                             + which);
1031
1032    /* cmp 0(r1), r0 */
1033    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
1034
1035    tcg_out_mov(s, type, r0, addrlo);
1036
1037    /* jne label1 */
1038    tcg_out8(s, OPC_JCC_short + JCC_JNE);
1039    label_ptr[0] = s->code_ptr;
1040    s->code_ptr++;
1041
1042    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1043        /* cmp 4(r1), addrhi */
1044        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
1045
1046        /* jne label1 */
1047        tcg_out8(s, OPC_JCC_short + JCC_JNE);
1048        label_ptr[1] = s->code_ptr;
1049        s->code_ptr++;
1050    }
1051
1052    /* TLB Hit.  */
1053
1054    /* add addend(r1), r0 */
1055    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
1056                         offsetof(CPUTLBEntry, addend) - which);
1057}
1058#endif
1059
1060static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
1061                                   int base, tcg_target_long ofs, int sizeop)
1062{
1063#ifdef TARGET_WORDS_BIGENDIAN
1064    const int bswap = 1;
1065#else
1066    const int bswap = 0;
1067#endif
1068    switch (sizeop) {
1069    case 0:
1070        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
1071        break;
1072    case 0 | 4:
1073        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
1074        break;
1075    case 1:
1076        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1077        if (bswap) {
1078            tcg_out_rolw_8(s, datalo);
1079        }
1080        break;
1081    case 1 | 4:
1082        if (bswap) {
1083            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
1084            tcg_out_rolw_8(s, datalo);
1085            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1086        } else {
1087            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
1088        }
1089        break;
1090    case 2:
1091        tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1092        if (bswap) {
1093            tcg_out_bswap32(s, datalo);
1094        }
1095        break;
1096#if TCG_TARGET_REG_BITS == 64
1097    case 2 | 4:
1098        if (bswap) {
1099            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1100            tcg_out_bswap32(s, datalo);
1101            tcg_out_ext32s(s, datalo, datalo);
1102        } else {
1103            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
1104        }
1105        break;
1106#endif
1107    case 3:
1108        if (TCG_TARGET_REG_BITS == 64) {
1109            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
1110            if (bswap) {
1111                tcg_out_bswap64(s, datalo);
1112            }
1113        } else {
1114            if (bswap) {
1115                int t = datalo;
1116                datalo = datahi;
1117                datahi = t;
1118            }
1119            if (base != datalo) {
1120                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1121                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1122            } else {
1123                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1124                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
1125            }
1126            if (bswap) {
1127                tcg_out_bswap32(s, datalo);
1128                tcg_out_bswap32(s, datahi);
1129            }
1130        }
1131        break;
1132    default:
1133        tcg_abort();
1134    }
1135}
1136
1137/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1138   EAX. It will be useful once fixed registers globals are less
1139   common. */
1140static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
1141                            int opc)
1142{
1143    int data_reg, data_reg2 = 0;
1144    int addrlo_idx;
1145#if defined(CONFIG_SOFTMMU)
1146    int mem_index, s_bits, arg_idx;
1147    uint8_t *label_ptr[3];
1148#endif
1149
1150    data_reg = args[0];
1151    addrlo_idx = 1;
1152    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1153        data_reg2 = args[1];
1154        addrlo_idx = 2;
1155    }
1156
1157#if defined(CONFIG_SOFTMMU)
1158    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1159    s_bits = opc & 3;
1160
1161    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1162                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1163
1164    /* TLB Hit.  */
1165    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
1166                           tcg_target_call_iarg_regs[0], 0, opc);
1167
1168    /* jmp label2 */
1169    tcg_out8(s, OPC_JMP_short);
1170    label_ptr[2] = s->code_ptr;
1171    s->code_ptr++;
1172
1173    /* TLB Miss.  */
1174
1175    /* label1: */
1176    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1177    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1178        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1179    }
1180
1181    /* XXX: move that code at the end of the TB */
1182    /* The first argument is already loaded with addrlo.  */
1183    arg_idx = 1;
1184    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
1185        tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx++],
1186                    args[addrlo_idx + 1]);
1187    }
1188    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
1189                 mem_index);
1190    tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
1191
1192    switch(opc) {
1193    case 0 | 4:
1194        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1195        break;
1196    case 1 | 4:
1197        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1198        break;
1199    case 0:
1200        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
1201        break;
1202    case 1:
1203        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
1204        break;
1205    case 2:
1206        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1207        break;
1208#if TCG_TARGET_REG_BITS == 64
1209    case 2 | 4:
1210        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1211        break;
1212#endif
1213    case 3:
1214        if (TCG_TARGET_REG_BITS == 64) {
1215            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1216        } else if (data_reg == TCG_REG_EDX) {
1217            /* xchg %edx, %eax */
1218            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1219            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
1220        } else {
1221            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1222            tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
1223        }
1224        break;
1225    default:
1226        tcg_abort();
1227    }
1228
1229    /* label2: */
1230    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1231#else
1232    {
1233        int32_t offset = GUEST_BASE;
1234        int base = args[addrlo_idx];
1235
1236        if (TCG_TARGET_REG_BITS == 64) {
1237            /* ??? We assume all operations have left us with register
1238               contents that are zero extended.  So far this appears to
1239               be true.  If we want to enforce this, we can either do
1240               an explicit zero-extension here, or (if GUEST_BASE == 0)
1241               use the ADDR32 prefix.  For now, do nothing.  */
1242
1243            if (offset != GUEST_BASE) {
1244                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1245                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1246                base = TCG_REG_RDI, offset = 0;
1247            }
1248        }
1249
1250        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
1251    }
1252#endif
1253}
1254
1255static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
1256                                   int base, tcg_target_long ofs, int sizeop)
1257{
1258#ifdef TARGET_WORDS_BIGENDIAN
1259    const int bswap = 1;
1260#else
1261    const int bswap = 0;
1262#endif
1263    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1264       we could perform the bswap twice to restore the original value
1265       instead of moving to the scratch.  But as it is, the L constraint
1266       means that the second argument reg is definitely free here.  */
1267    int scratch = tcg_target_call_iarg_regs[1];
1268
1269    switch (sizeop) {
1270    case 0:
1271        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
1272        break;
1273    case 1:
1274        if (bswap) {
1275            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1276            tcg_out_rolw_8(s, scratch);
1277            datalo = scratch;
1278        }
1279        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
1280        break;
1281    case 2:
1282        if (bswap) {
1283            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1284            tcg_out_bswap32(s, scratch);
1285            datalo = scratch;
1286        }
1287        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1288        break;
1289    case 3:
1290        if (TCG_TARGET_REG_BITS == 64) {
1291            if (bswap) {
1292                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1293                tcg_out_bswap64(s, scratch);
1294                datalo = scratch;
1295            }
1296            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
1297        } else if (bswap) {
1298            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1299            tcg_out_bswap32(s, scratch);
1300            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
1301            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1302            tcg_out_bswap32(s, scratch);
1303            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
1304        } else {
1305            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
1306            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
1307        }
1308        break;
1309    default:
1310        tcg_abort();
1311    }
1312}
1313
1314static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
1315                            int opc)
1316{
1317    int data_reg, data_reg2 = 0;
1318    int addrlo_idx;
1319#if defined(CONFIG_SOFTMMU)
1320    int mem_index, s_bits;
1321    int stack_adjust;
1322    uint8_t *label_ptr[3];
1323#endif
1324
1325    data_reg = args[0];
1326    addrlo_idx = 1;
1327    if (TCG_TARGET_REG_BITS == 32 && opc == 3) {
1328        data_reg2 = args[1];
1329        addrlo_idx = 2;
1330    }
1331
1332#if defined(CONFIG_SOFTMMU)
1333    mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS)];
1334    s_bits = opc;
1335
1336    tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
1337                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1338
1339    /* TLB Hit.  */
1340    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
1341                           tcg_target_call_iarg_regs[0], 0, opc);
1342
1343    /* jmp label2 */
1344    tcg_out8(s, OPC_JMP_short);
1345    label_ptr[2] = s->code_ptr;
1346    s->code_ptr++;
1347
1348    /* TLB Miss.  */
1349
1350    /* label1: */
1351    *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
1352    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1353        *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
1354    }
1355
1356    /* XXX: move that code at the end of the TB */
1357    if (TCG_TARGET_REG_BITS == 64) {
1358        tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1359                    TCG_REG_RSI, data_reg);
1360        tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_RDX, mem_index);
1361        stack_adjust = 0;
1362    } else if (TARGET_LONG_BITS == 32) {
1363        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, data_reg);
1364        if (opc == 3) {
1365            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg2);
1366            tcg_out_pushi(s, mem_index);
1367            stack_adjust = 4;
1368        } else {
1369            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_ECX, mem_index);
1370            stack_adjust = 0;
1371        }
1372    } else {
1373        if (opc == 3) {
1374            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1375            tcg_out_pushi(s, mem_index);
1376            tcg_out_push(s, data_reg2);
1377            tcg_out_push(s, data_reg);
1378            stack_adjust = 12;
1379        } else {
1380            tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_EDX, args[addrlo_idx + 1]);
1381            switch(opc) {
1382            case 0:
1383                tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
1384                break;
1385            case 1:
1386                tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
1387                break;
1388            case 2:
1389                tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_ECX, data_reg);
1390                break;
1391            }
1392            tcg_out_pushi(s, mem_index);
1393            stack_adjust = 4;
1394        }
1395    }
1396
1397    tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
1398
1399    if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
1400        /* Pop and discard.  This is 2 bytes smaller than the add.  */
1401        tcg_out_pop(s, TCG_REG_ECX);
1402    } else if (stack_adjust != 0) {
1403        tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
1404    }
1405
1406    /* label2: */
1407    *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
1408#else
1409    {
1410        int32_t offset = GUEST_BASE;
1411        int base = args[addrlo_idx];
1412
1413        if (TCG_TARGET_REG_BITS == 64) {
1414            /* ??? We assume all operations have left us with register
1415               contents that are zero extended.  So far this appears to
1416               be true.  If we want to enforce this, we can either do
1417               an explicit zero-extension here, or (if GUEST_BASE == 0)
1418               use the ADDR32 prefix.  For now, do nothing.  */
1419
1420            if (offset != GUEST_BASE) {
1421                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_RDI, GUEST_BASE);
1422                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_RDI, base);
1423                base = TCG_REG_RDI, offset = 0;
1424            }
1425        }
1426
1427        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
1428    }
1429#endif
1430}
1431
1432static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1433                              const TCGArg *args, const int *const_args)
1434{
1435    int c, rexw = 0;
1436
1437#if TCG_TARGET_REG_BITS == 64
1438# define OP_32_64(x) \
1439        case glue(glue(INDEX_op_, x), _i64): \
1440            rexw = P_REXW; /* FALLTHRU */    \
1441        case glue(glue(INDEX_op_, x), _i32)
1442#else
1443# define OP_32_64(x) \
1444        case glue(glue(INDEX_op_, x), _i32)
1445#endif
1446
1447    switch(opc) {
1448    case INDEX_op_exit_tb:
1449        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1450        tcg_out_jmp(s, (tcg_target_long) tb_ret_addr);
1451        break;
1452    case INDEX_op_goto_tb:
1453        if (s->tb_jmp_offset) {
1454            /* direct jump method */
1455            tcg_out8(s, OPC_JMP_long); /* jmp im */
1456            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1457            tcg_out32(s, 0);
1458        } else {
1459            /* indirect jump method */
1460            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1461                                 (tcg_target_long)(s->tb_next + args[0]));
1462        }
1463        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1464        break;
1465    case INDEX_op_call:
1466        if (const_args[0]) {
1467            tcg_out_calli(s, args[0]);
1468        } else {
1469            /* call *reg */
1470            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1471        }
1472        break;
1473    case INDEX_op_jmp:
1474        if (const_args[0]) {
1475            tcg_out_jmp(s, args[0]);
1476        } else {
1477            /* jmp *reg */
1478            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
1479        }
1480        break;
1481    case INDEX_op_br:
1482        tcg_out_jxx(s, JCC_JMP, args[0], 0);
1483        break;
1484    case INDEX_op_movi_i32:
1485        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1486        break;
1487    OP_32_64(ld8u):
1488        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1489        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1490        break;
1491    OP_32_64(ld8s):
1492        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1493        break;
1494    OP_32_64(ld16u):
1495        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1496        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1497        break;
1498    OP_32_64(ld16s):
1499        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1500        break;
1501#if TCG_TARGET_REG_BITS == 64
1502    case INDEX_op_ld32u_i64:
1503#endif
1504    case INDEX_op_ld_i32:
1505        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1506        break;
1507
1508    OP_32_64(st8):
1509        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1510                             args[0], args[1], args[2]);
1511        break;
1512    OP_32_64(st16):
1513        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1514                             args[0], args[1], args[2]);
1515        break;
1516#if TCG_TARGET_REG_BITS == 64
1517    case INDEX_op_st32_i64:
1518#endif
1519    case INDEX_op_st_i32:
1520        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1521        break;
1522
1523    OP_32_64(add):
1524        /* For 3-operand addition, use LEA.  */
1525        if (args[0] != args[1]) {
1526            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1527
1528            if (const_args[2]) {
1529                c3 = a2, a2 = -1;
1530            } else if (a0 == a2) {
1531                /* Watch out for dest = src + dest, since we've removed
1532                   the matching constraint on the add.  */
1533                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1534                break;
1535            }
1536
1537            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1538            break;
1539        }
1540        c = ARITH_ADD;
1541        goto gen_arith;
1542    OP_32_64(sub):
1543        c = ARITH_SUB;
1544        goto gen_arith;
1545    OP_32_64(and):
1546        c = ARITH_AND;
1547        goto gen_arith;
1548    OP_32_64(or):
1549        c = ARITH_OR;
1550        goto gen_arith;
1551    OP_32_64(xor):
1552        c = ARITH_XOR;
1553        goto gen_arith;
1554    gen_arith:
1555        if (const_args[2]) {
1556            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1557        } else {
1558            tgen_arithr(s, c + rexw, args[0], args[2]);
1559        }
1560        break;
1561
1562    OP_32_64(mul):
1563        if (const_args[2]) {
1564            int32_t val;
1565            val = args[2];
1566            if (val == (int8_t)val) {
1567                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1568                tcg_out8(s, val);
1569            } else {
1570                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1571                tcg_out32(s, val);
1572            }
1573        } else {
1574            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1575        }
1576        break;
1577
1578    OP_32_64(div2):
1579        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1580        break;
1581    OP_32_64(divu2):
1582        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1583        break;
1584
1585    OP_32_64(shl):
1586        c = SHIFT_SHL;
1587        goto gen_shift;
1588    OP_32_64(shr):
1589        c = SHIFT_SHR;
1590        goto gen_shift;
1591    OP_32_64(sar):
1592        c = SHIFT_SAR;
1593        goto gen_shift;
1594    OP_32_64(rotl):
1595        c = SHIFT_ROL;
1596        goto gen_shift;
1597    OP_32_64(rotr):
1598        c = SHIFT_ROR;
1599        goto gen_shift;
1600    gen_shift:
1601        if (const_args[2]) {
1602            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1603        } else {
1604            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1605        }
1606        break;
1607
1608    case INDEX_op_brcond_i32:
1609        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1610                         args[3], 0);
1611        break;
1612    case INDEX_op_setcond_i32:
1613        tcg_out_setcond32(s, args[3], args[0], args[1],
1614                          args[2], const_args[2]);
1615        break;
1616
1617    OP_32_64(bswap16):
1618        tcg_out_rolw_8(s, args[0]);
1619        break;
1620    OP_32_64(bswap32):
1621        tcg_out_bswap32(s, args[0]);
1622        break;
1623
1624    OP_32_64(neg):
1625        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1626        break;
1627    OP_32_64(not):
1628        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1629        break;
1630
1631    OP_32_64(ext8s):
1632        tcg_out_ext8s(s, args[0], args[1], rexw);
1633        break;
1634    OP_32_64(ext16s):
1635        tcg_out_ext16s(s, args[0], args[1], rexw);
1636        break;
1637    OP_32_64(ext8u):
1638        tcg_out_ext8u(s, args[0], args[1]);
1639        break;
1640    OP_32_64(ext16u):
1641        tcg_out_ext16u(s, args[0], args[1]);
1642        break;
1643
1644    case INDEX_op_qemu_ld8u:
1645        tcg_out_qemu_ld(s, args, 0);
1646        break;
1647    case INDEX_op_qemu_ld8s:
1648        tcg_out_qemu_ld(s, args, 0 | 4);
1649        break;
1650    case INDEX_op_qemu_ld16u:
1651        tcg_out_qemu_ld(s, args, 1);
1652        break;
1653    case INDEX_op_qemu_ld16s:
1654        tcg_out_qemu_ld(s, args, 1 | 4);
1655        break;
1656#if TCG_TARGET_REG_BITS == 64
1657    case INDEX_op_qemu_ld32u:
1658#endif
1659    case INDEX_op_qemu_ld32:
1660        tcg_out_qemu_ld(s, args, 2);
1661        break;
1662    case INDEX_op_qemu_ld64:
1663        tcg_out_qemu_ld(s, args, 3);
1664        break;
1665
1666    case INDEX_op_qemu_st8:
1667        tcg_out_qemu_st(s, args, 0);
1668        break;
1669    case INDEX_op_qemu_st16:
1670        tcg_out_qemu_st(s, args, 1);
1671        break;
1672    case INDEX_op_qemu_st32:
1673        tcg_out_qemu_st(s, args, 2);
1674        break;
1675    case INDEX_op_qemu_st64:
1676        tcg_out_qemu_st(s, args, 3);
1677        break;
1678
1679#if TCG_TARGET_REG_BITS == 32
1680    case INDEX_op_brcond2_i32:
1681        tcg_out_brcond2(s, args, const_args, 0);
1682        break;
1683    case INDEX_op_setcond2_i32:
1684        tcg_out_setcond2(s, args, const_args);
1685        break;
1686    case INDEX_op_mulu2_i32:
1687        tcg_out_modrm(s, OPC_GRP3_Ev, EXT3_MUL, args[3]);
1688        break;
1689    case INDEX_op_add2_i32:
1690        if (const_args[4]) {
1691            tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
1692        } else {
1693            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
1694        }
1695        if (const_args[5]) {
1696            tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
1697        } else {
1698            tgen_arithr(s, ARITH_ADC, args[1], args[5]);
1699        }
1700        break;
1701    case INDEX_op_sub2_i32:
1702        if (const_args[4]) {
1703            tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
1704        } else {
1705            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
1706        }
1707        if (const_args[5]) {
1708            tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
1709        } else {
1710            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
1711        }
1712        break;
1713#else /* TCG_TARGET_REG_BITS == 64 */
1714    case INDEX_op_movi_i64:
1715        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1716        break;
1717    case INDEX_op_ld32s_i64:
1718        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1719        break;
1720    case INDEX_op_ld_i64:
1721        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1722        break;
1723    case INDEX_op_st_i64:
1724        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1725        break;
1726    case INDEX_op_qemu_ld32s:
1727        tcg_out_qemu_ld(s, args, 2 | 4);
1728        break;
1729
1730    case INDEX_op_brcond_i64:
1731        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1732                         args[3], 0);
1733        break;
1734    case INDEX_op_setcond_i64:
1735        tcg_out_setcond64(s, args[3], args[0], args[1],
1736                          args[2], const_args[2]);
1737        break;
1738
1739    case INDEX_op_bswap64_i64:
1740        tcg_out_bswap64(s, args[0]);
1741        break;
1742    case INDEX_op_ext32u_i64:
1743        tcg_out_ext32u(s, args[0], args[1]);
1744        break;
1745    case INDEX_op_ext32s_i64:
1746        tcg_out_ext32s(s, args[0], args[1]);
1747        break;
1748#endif
1749
1750    default:
1751        tcg_abort();
1752    }
1753
1754#undef OP_32_64
1755}
1756
1757static const TCGTargetOpDef x86_op_defs[] = {
1758    { INDEX_op_exit_tb, { } },
1759    { INDEX_op_goto_tb, { } },
1760    { INDEX_op_call, { "ri" } },
1761    { INDEX_op_jmp, { "ri" } },
1762    { INDEX_op_br, { } },
1763    { INDEX_op_mov_i32, { "r", "r" } },
1764    { INDEX_op_movi_i32, { "r" } },
1765    { INDEX_op_ld8u_i32, { "r", "r" } },
1766    { INDEX_op_ld8s_i32, { "r", "r" } },
1767    { INDEX_op_ld16u_i32, { "r", "r" } },
1768    { INDEX_op_ld16s_i32, { "r", "r" } },
1769    { INDEX_op_ld_i32, { "r", "r" } },
1770    { INDEX_op_st8_i32, { "q", "r" } },
1771    { INDEX_op_st16_i32, { "r", "r" } },
1772    { INDEX_op_st_i32, { "r", "r" } },
1773
1774    { INDEX_op_add_i32, { "r", "r", "ri" } },
1775    { INDEX_op_sub_i32, { "r", "0", "ri" } },
1776    { INDEX_op_mul_i32, { "r", "0", "ri" } },
1777    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1778    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1779    { INDEX_op_and_i32, { "r", "0", "ri" } },
1780    { INDEX_op_or_i32, { "r", "0", "ri" } },
1781    { INDEX_op_xor_i32, { "r", "0", "ri" } },
1782
1783    { INDEX_op_shl_i32, { "r", "0", "ci" } },
1784    { INDEX_op_shr_i32, { "r", "0", "ci" } },
1785    { INDEX_op_sar_i32, { "r", "0", "ci" } },
1786    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1787    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1788
1789    { INDEX_op_brcond_i32, { "r", "ri" } },
1790
1791    { INDEX_op_bswap16_i32, { "r", "0" } },
1792    { INDEX_op_bswap32_i32, { "r", "0" } },
1793
1794    { INDEX_op_neg_i32, { "r", "0" } },
1795
1796    { INDEX_op_not_i32, { "r", "0" } },
1797
1798    { INDEX_op_ext8s_i32, { "r", "q" } },
1799    { INDEX_op_ext16s_i32, { "r", "r" } },
1800    { INDEX_op_ext8u_i32, { "r", "q" } },
1801    { INDEX_op_ext16u_i32, { "r", "r" } },
1802
1803    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1804
1805#if TCG_TARGET_REG_BITS == 32
1806    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1807    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1808    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
1809    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
1810    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
1811#else
1812    { INDEX_op_mov_i64, { "r", "r" } },
1813    { INDEX_op_movi_i64, { "r" } },
1814    { INDEX_op_ld8u_i64, { "r", "r" } },
1815    { INDEX_op_ld8s_i64, { "r", "r" } },
1816    { INDEX_op_ld16u_i64, { "r", "r" } },
1817    { INDEX_op_ld16s_i64, { "r", "r" } },
1818    { INDEX_op_ld32u_i64, { "r", "r" } },
1819    { INDEX_op_ld32s_i64, { "r", "r" } },
1820    { INDEX_op_ld_i64, { "r", "r" } },
1821    { INDEX_op_st8_i64, { "r", "r" } },
1822    { INDEX_op_st16_i64, { "r", "r" } },
1823    { INDEX_op_st32_i64, { "r", "r" } },
1824    { INDEX_op_st_i64, { "r", "r" } },
1825
1826    { INDEX_op_add_i64, { "r", "0", "re" } },
1827    { INDEX_op_mul_i64, { "r", "0", "re" } },
1828    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
1829    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
1830    { INDEX_op_sub_i64, { "r", "0", "re" } },
1831    { INDEX_op_and_i64, { "r", "0", "reZ" } },
1832    { INDEX_op_or_i64, { "r", "0", "re" } },
1833    { INDEX_op_xor_i64, { "r", "0", "re" } },
1834
1835    { INDEX_op_shl_i64, { "r", "0", "ci" } },
1836    { INDEX_op_shr_i64, { "r", "0", "ci" } },
1837    { INDEX_op_sar_i64, { "r", "0", "ci" } },
1838    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
1839    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
1840
1841    { INDEX_op_brcond_i64, { "r", "re" } },
1842    { INDEX_op_setcond_i64, { "r", "r", "re" } },
1843
1844    { INDEX_op_bswap16_i64, { "r", "0" } },
1845    { INDEX_op_bswap32_i64, { "r", "0" } },
1846    { INDEX_op_bswap64_i64, { "r", "0" } },
1847    { INDEX_op_neg_i64, { "r", "0" } },
1848    { INDEX_op_not_i64, { "r", "0" } },
1849
1850    { INDEX_op_ext8s_i64, { "r", "r" } },
1851    { INDEX_op_ext16s_i64, { "r", "r" } },
1852    { INDEX_op_ext32s_i64, { "r", "r" } },
1853    { INDEX_op_ext8u_i64, { "r", "r" } },
1854    { INDEX_op_ext16u_i64, { "r", "r" } },
1855    { INDEX_op_ext32u_i64, { "r", "r" } },
1856#endif
1857
1858#if TCG_TARGET_REG_BITS == 64
1859    { INDEX_op_qemu_ld8u, { "r", "L" } },
1860    { INDEX_op_qemu_ld8s, { "r", "L" } },
1861    { INDEX_op_qemu_ld16u, { "r", "L" } },
1862    { INDEX_op_qemu_ld16s, { "r", "L" } },
1863    { INDEX_op_qemu_ld32, { "r", "L" } },
1864    { INDEX_op_qemu_ld32u, { "r", "L" } },
1865    { INDEX_op_qemu_ld32s, { "r", "L" } },
1866    { INDEX_op_qemu_ld64, { "r", "L" } },
1867
1868    { INDEX_op_qemu_st8, { "L", "L" } },
1869    { INDEX_op_qemu_st16, { "L", "L" } },
1870    { INDEX_op_qemu_st32, { "L", "L" } },
1871    { INDEX_op_qemu_st64, { "L", "L" } },
1872#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
1873    { INDEX_op_qemu_ld8u, { "r", "L" } },
1874    { INDEX_op_qemu_ld8s, { "r", "L" } },
1875    { INDEX_op_qemu_ld16u, { "r", "L" } },
1876    { INDEX_op_qemu_ld16s, { "r", "L" } },
1877    { INDEX_op_qemu_ld32, { "r", "L" } },
1878    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
1879
1880    { INDEX_op_qemu_st8, { "cb", "L" } },
1881    { INDEX_op_qemu_st16, { "L", "L" } },
1882    { INDEX_op_qemu_st32, { "L", "L" } },
1883    { INDEX_op_qemu_st64, { "L", "L", "L" } },
1884#else
1885    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
1886    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
1887    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
1888    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
1889    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
1890    { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
1891
1892    { INDEX_op_qemu_st8, { "cb", "L", "L" } },
1893    { INDEX_op_qemu_st16, { "L", "L", "L" } },
1894    { INDEX_op_qemu_st32, { "L", "L", "L" } },
1895    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
1896#endif
1897    { -1 },
1898};
1899
1900static int tcg_target_callee_save_regs[] = {
1901#if TCG_TARGET_REG_BITS == 64
1902    TCG_REG_RBP,
1903    TCG_REG_RBX,
1904    TCG_REG_R12,
1905    TCG_REG_R13,
1906    TCG_REG_R14, /* Currently used for the global env. */
1907    TCG_REG_R15,
1908#else
1909    TCG_REG_EBP, /* Currently used for the global env. */
1910    TCG_REG_EBX,
1911    TCG_REG_ESI,
1912    TCG_REG_EDI,
1913#endif
1914};
1915
1916/* Generate global QEMU prologue and epilogue code */
1917static void tcg_target_qemu_prologue(TCGContext *s)
1918{
1919    int i, frame_size, push_size, stack_addend;
1920
1921    /* TB prologue */
1922
1923    /* Reserve some stack space, also for TCG temps.  */
1924    push_size = 1 + ARRAY_SIZE(tcg_target_callee_save_regs);
1925    push_size *= TCG_TARGET_REG_BITS / 8;
1926
1927    frame_size = push_size + TCG_STATIC_CALL_ARGS_SIZE +
1928        CPU_TEMP_BUF_NLONGS * sizeof(long);
1929    frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
1930        ~(TCG_TARGET_STACK_ALIGN - 1);
1931    stack_addend = frame_size - push_size;
1932    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
1933                  CPU_TEMP_BUF_NLONGS * sizeof(long));
1934
1935    /* Save all callee saved registers.  */
1936    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
1937        tcg_out_push(s, tcg_target_callee_save_regs[i]);
1938    }
1939
1940    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
1941
1942    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1943
1944    /* jmp *tb.  */
1945    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
1946
1947    /* TB epilogue */
1948    tb_ret_addr = s->code_ptr;
1949
1950    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
1951
1952    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
1953        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
1954    }
1955    tcg_out_opc(s, OPC_RET, 0, 0, 0);
1956}
1957
1958static void tcg_target_init(TCGContext *s)
1959{
1960#if !defined(CONFIG_USER_ONLY)
1961    /* fail safe */
1962    if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry))
1963        tcg_abort();
1964#endif
1965
1966    if (TCG_TARGET_REG_BITS == 64) {
1967        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
1968        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
1969    } else {
1970        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
1971    }
1972
1973    tcg_regset_clear(tcg_target_call_clobber_regs);
1974    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
1975    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
1976    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
1977    if (TCG_TARGET_REG_BITS == 64) {
1978        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
1979        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
1980        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
1981        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
1982        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
1983        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
1984    }
1985
1986    tcg_regset_clear(s->reserved_regs);
1987    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
1988
1989    tcg_add_target_add_op_defs(x86_op_defs);
1990}
1991