1/*
2 * Tiny Code Generator for QEMU
3 *
4 * Copyright (c) 2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "tcg-be-ldst.h"
26
27#ifdef _WIN32
28// For some reason, the Mingw32 headers define the 'small' macro which
29// prevents this source from compiling.
30#undef small
31#endif
32
33#ifndef NDEBUG
34static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
35#if TCG_TARGET_REG_BITS == 64
36    "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
37    "%r8",  "%r9",  "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
38#else
39    "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
40#endif
41};
42#endif
43
44static const int tcg_target_reg_alloc_order[] = {
45#if TCG_TARGET_REG_BITS == 64
46    TCG_REG_RBP,
47    TCG_REG_RBX,
48    TCG_REG_R12,
49    TCG_REG_R13,
50    TCG_REG_R14,
51    TCG_REG_R15,
52    TCG_REG_R10,
53    TCG_REG_R11,
54    TCG_REG_R9,
55    TCG_REG_R8,
56    TCG_REG_RCX,
57    TCG_REG_RDX,
58    TCG_REG_RSI,
59    TCG_REG_RDI,
60    TCG_REG_RAX,
61#else
62    TCG_REG_EBX,
63    TCG_REG_ESI,
64    TCG_REG_EDI,
65    TCG_REG_EBP,
66    TCG_REG_ECX,
67    TCG_REG_EDX,
68    TCG_REG_EAX,
69#endif
70};
71
72static const int tcg_target_call_iarg_regs[] = {
73#if TCG_TARGET_REG_BITS == 64
74#if defined(_WIN64)
75    TCG_REG_RCX,
76    TCG_REG_RDX,
77#else
78    TCG_REG_RDI,
79    TCG_REG_RSI,
80    TCG_REG_RDX,
81    TCG_REG_RCX,
82#endif
83    TCG_REG_R8,
84    TCG_REG_R9,
85#else
86    /* 32 bit mode uses stack based calling convention (GCC default). */
87#endif
88};
89
90static const int tcg_target_call_oarg_regs[] = {
91    TCG_REG_EAX,
92#if TCG_TARGET_REG_BITS == 32
93    TCG_REG_EDX
94#endif
95};
96
97/* Registers used with L constraint, which are the first argument
98   registers on x86_64, and two random call clobbered registers on
99   i386. */
100#if TCG_TARGET_REG_BITS == 64
101# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
102# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
103#else
104# define TCG_REG_L0 TCG_REG_EAX
105# define TCG_REG_L1 TCG_REG_EDX
106#endif
107
108/* For 32-bit, we are going to attempt to determine at runtime whether cmov
109   is available.  However, the host compiler must supply <cpuid.h>, as we're
110   not going to go so far as our own inline assembly.  */
111#if TCG_TARGET_REG_BITS == 64
112# define have_cmov 1
113#elif defined(CONFIG_CPUID_H)
114#include <cpuid.h>
115static bool have_cmov;
116#else
117# define have_cmov 0
118#endif
119
120static uint8_t *tb_ret_addr;
121
122static void patch_reloc(uint8_t *code_ptr, int type,
123                        intptr_t value, intptr_t addend)
124{
125    value += addend;
126    switch(type) {
127    case R_386_PC32:
128        value -= (uintptr_t)code_ptr;
129        if (value != (int32_t)value) {
130            tcg_abort();
131        }
132        *(uint32_t *)code_ptr = value;
133        break;
134    case R_386_PC8:
135        value -= (uintptr_t)code_ptr;
136        if (value != (int8_t)value) {
137            tcg_abort();
138        }
139        *(uint8_t *)code_ptr = value;
140        break;
141    default:
142        tcg_abort();
143    }
144}
145
146/* parse target specific constraints */
147static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
148{
149    const char *ct_str;
150
151    ct_str = *pct_str;
152    switch(ct_str[0]) {
153    case 'a':
154        ct->ct |= TCG_CT_REG;
155        tcg_regset_set_reg(ct->u.regs, TCG_REG_EAX);
156        break;
157    case 'b':
158        ct->ct |= TCG_CT_REG;
159        tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
160        break;
161    case 'c':
162        ct->ct |= TCG_CT_REG;
163        tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
164        break;
165    case 'd':
166        ct->ct |= TCG_CT_REG;
167        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDX);
168        break;
169    case 'S':
170        ct->ct |= TCG_CT_REG;
171        tcg_regset_set_reg(ct->u.regs, TCG_REG_ESI);
172        break;
173    case 'D':
174        ct->ct |= TCG_CT_REG;
175        tcg_regset_set_reg(ct->u.regs, TCG_REG_EDI);
176        break;
177    case 'q':
178        ct->ct |= TCG_CT_REG;
179        if (TCG_TARGET_REG_BITS == 64) {
180            tcg_regset_set32(ct->u.regs, 0, 0xffff);
181        } else {
182            tcg_regset_set32(ct->u.regs, 0, 0xf);
183        }
184        break;
185    case 'Q':
186        ct->ct |= TCG_CT_REG;
187        tcg_regset_set32(ct->u.regs, 0, 0xf);
188        break;
189    case 'r':
190        ct->ct |= TCG_CT_REG;
191        if (TCG_TARGET_REG_BITS == 64) {
192            tcg_regset_set32(ct->u.regs, 0, 0xffff);
193        } else {
194            tcg_regset_set32(ct->u.regs, 0, 0xff);
195        }
196        break;
197
198        /* qemu_ld/st address constraint */
199    case 'L':
200        ct->ct |= TCG_CT_REG;
201        if (TCG_TARGET_REG_BITS == 64) {
202            tcg_regset_set32(ct->u.regs, 0, 0xffff);
203        } else {
204            tcg_regset_set32(ct->u.regs, 0, 0xff);
205        }
206        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
207        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
208        break;
209
210    case 'e':
211        ct->ct |= TCG_CT_CONST_S32;
212        break;
213    case 'Z':
214        ct->ct |= TCG_CT_CONST_U32;
215        break;
216
217    default:
218        return -1;
219    }
220    ct_str++;
221    *pct_str = ct_str;
222    return 0;
223}
224
225/* test if a constant matches the constraint */
226static inline int tcg_target_const_match(tcg_target_long val,
227                                         const TCGArgConstraint *arg_ct)
228{
229    int ct = arg_ct->ct;
230    if (ct & TCG_CT_CONST) {
231        return 1;
232    }
233    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
234        return 1;
235    }
236    if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
237        return 1;
238    }
239    return 0;
240}
241
242#if TCG_TARGET_REG_BITS == 64
243# define LOWREGMASK(x)	((x) & 7)
244#else
245# define LOWREGMASK(x)	(x)
246#endif
247
248#define P_EXT		0x100		/* 0x0f opcode prefix */
249#define P_DATA16	0x200		/* 0x66 opcode prefix */
250#if TCG_TARGET_REG_BITS == 64
251# define P_ADDR32	0x400		/* 0x67 opcode prefix */
252# define P_REXW		0x800		/* Set REX.W = 1 */
253# define P_REXB_R	0x1000		/* REG field as byte register */
254# define P_REXB_RM	0x2000		/* R/M field as byte register */
255# define P_GS           0x4000          /* gs segment override */
256#else
257# define P_ADDR32	0
258# define P_REXW		0
259# define P_REXB_R	0
260# define P_REXB_RM	0
261# define P_GS           0
262#endif
263
264#define OPC_ARITH_EvIz	(0x81)
265#define OPC_ARITH_EvIb	(0x83)
266#define OPC_ARITH_GvEv	(0x03)		/* ... plus (ARITH_FOO << 3) */
267#define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
268#define OPC_BSWAP	(0xc8 | P_EXT)
269#define OPC_CALL_Jz	(0xe8)
270#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
271#define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
272#define OPC_DEC_r32	(0x48)
273#define OPC_IMUL_GvEv	(0xaf | P_EXT)
274#define OPC_IMUL_GvEvIb	(0x6b)
275#define OPC_IMUL_GvEvIz	(0x69)
276#define OPC_INC_r32	(0x40)
277#define OPC_JCC_long	(0x80 | P_EXT)	/* ... plus condition code */
278#define OPC_JCC_short	(0x70)		/* ... plus condition code */
279#define OPC_JMP_long	(0xe9)
280#define OPC_JMP_short	(0xeb)
281#define OPC_LEA         (0x8d)
282#define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
283#define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
284#define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
285#define OPC_MOVB_EvIz   (0xc6)
286#define OPC_MOVL_EvIz	(0xc7)
287#define OPC_MOVL_Iv     (0xb8)
288#define OPC_MOVSBL	(0xbe | P_EXT)
289#define OPC_MOVSWL	(0xbf | P_EXT)
290#define OPC_MOVSLQ	(0x63 | P_REXW)
291#define OPC_MOVZBL	(0xb6 | P_EXT)
292#define OPC_MOVZWL	(0xb7 | P_EXT)
293#define OPC_POP_r32	(0x58)
294#define OPC_PUSH_r32	(0x50)
295#define OPC_PUSH_Iv	(0x68)
296#define OPC_PUSH_Ib	(0x6a)
297#define OPC_RET		(0xc3)
298#define OPC_SETCC	(0x90 | P_EXT | P_REXB_RM) /* ... plus cc */
299#define OPC_SHIFT_1	(0xd1)
300#define OPC_SHIFT_Ib	(0xc1)
301#define OPC_SHIFT_cl	(0xd3)
302#define OPC_TESTL	(0x85)
303#define OPC_XCHG_ax_r32	(0x90)
304
305#define OPC_GRP3_Ev	(0xf7)
306#define OPC_GRP5	(0xff)
307
308/* Group 1 opcode extensions for 0x80-0x83.
309   These are also used as modifiers for OPC_ARITH.  */
310#define ARITH_ADD 0
311#define ARITH_OR  1
312#define ARITH_ADC 2
313#define ARITH_SBB 3
314#define ARITH_AND 4
315#define ARITH_SUB 5
316#define ARITH_XOR 6
317#define ARITH_CMP 7
318
319/* Group 2 opcode extensions for 0xc0, 0xc1, 0xd0-0xd3.  */
320#define SHIFT_ROL 0
321#define SHIFT_ROR 1
322#define SHIFT_SHL 4
323#define SHIFT_SHR 5
324#define SHIFT_SAR 7
325
326/* Group 3 opcode extensions for 0xf6, 0xf7.  To be used with OPC_GRP3.  */
327#define EXT3_NOT   2
328#define EXT3_NEG   3
329#define EXT3_MUL   4
330#define EXT3_IMUL  5
331#define EXT3_DIV   6
332#define EXT3_IDIV  7
333
334/* Group 5 opcode extensions for 0xff.  To be used with OPC_GRP5.  */
335#define EXT5_INC_Ev	0
336#define EXT5_DEC_Ev	1
337#define EXT5_CALLN_Ev	2
338#define EXT5_JMPN_Ev	4
339
340/* Condition codes to be added to OPC_JCC_{long,short}.  */
341#define JCC_JMP (-1)
342#define JCC_JO  0x0
343#define JCC_JNO 0x1
344#define JCC_JB  0x2
345#define JCC_JAE 0x3
346#define JCC_JE  0x4
347#define JCC_JNE 0x5
348#define JCC_JBE 0x6
349#define JCC_JA  0x7
350#define JCC_JS  0x8
351#define JCC_JNS 0x9
352#define JCC_JP  0xa
353#define JCC_JNP 0xb
354#define JCC_JL  0xc
355#define JCC_JGE 0xd
356#define JCC_JLE 0xe
357#define JCC_JG  0xf
358
359static const uint8_t tcg_cond_to_jcc[] = {
360    [TCG_COND_EQ] = JCC_JE,
361    [TCG_COND_NE] = JCC_JNE,
362    [TCG_COND_LT] = JCC_JL,
363    [TCG_COND_GE] = JCC_JGE,
364    [TCG_COND_LE] = JCC_JLE,
365    [TCG_COND_GT] = JCC_JG,
366    [TCG_COND_LTU] = JCC_JB,
367    [TCG_COND_GEU] = JCC_JAE,
368    [TCG_COND_LEU] = JCC_JBE,
369    [TCG_COND_GTU] = JCC_JA,
370};
371
372#if TCG_TARGET_REG_BITS == 64
373static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
374{
375    int rex;
376
377    if (opc & P_GS) {
378        tcg_out8(s, 0x65);
379    }
380    if (opc & P_DATA16) {
381        /* We should never be asking for both 16 and 64-bit operation.  */
382        assert((opc & P_REXW) == 0);
383        tcg_out8(s, 0x66);
384    }
385    if (opc & P_ADDR32) {
386        tcg_out8(s, 0x67);
387    }
388
389    rex = 0;
390    rex |= (opc & P_REXW) >> 8;		/* REX.W */
391    rex |= (r & 8) >> 1;		/* REX.R */
392    rex |= (x & 8) >> 2;		/* REX.X */
393    rex |= (rm & 8) >> 3;		/* REX.B */
394
395    /* P_REXB_{R,RM} indicates that the given register is the low byte.
396       For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
397       as otherwise the encoding indicates %[abcd]h.  Note that the values
398       that are ORed in merely indicate that the REX byte must be present;
399       those bits get discarded in output.  */
400    rex |= opc & (r >= 4 ? P_REXB_R : 0);
401    rex |= opc & (rm >= 4 ? P_REXB_RM : 0);
402
403    if (rex) {
404        tcg_out8(s, (uint8_t)(rex | 0x40));
405    }
406
407    if (opc & P_EXT) {
408        tcg_out8(s, 0x0f);
409    }
410    tcg_out8(s, opc);
411}
412#else
413static void tcg_out_opc(TCGContext *s, int opc)
414{
415    if (opc & P_DATA16) {
416        tcg_out8(s, 0x66);
417    }
418    if (opc & P_EXT) {
419        tcg_out8(s, 0x0f);
420    }
421    tcg_out8(s, opc);
422}
423/* Discard the register arguments to tcg_out_opc early, so as not to penalize
424   the 32-bit compilation paths.  This method works with all versions of gcc,
425   whereas relying on optimization may not be able to exclude them.  */
426#define tcg_out_opc(s, opc, r, rm, x)  (tcg_out_opc)(s, opc)
427#endif
428
429static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
430{
431    tcg_out_opc(s, opc, r, rm, 0);
432    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
433}
434
435/* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
436   We handle either RM and INDEX missing with a negative value.  In 64-bit
437   mode for absolute addresses, ~RM is the size of the immediate operand
438   that will follow the instruction.  */
439
440static void tcg_out_modrm_sib_offset(TCGContext *s, int opc, int r, int rm,
441                                     int index, int shift, intptr_t offset)
442{
443    int mod, len;
444
445    if (index < 0 && rm < 0) {
446        if (TCG_TARGET_REG_BITS == 64) {
447            /* Try for a rip-relative addressing mode.  This has replaced
448               the 32-bit-mode absolute addressing encoding.  */
449            intptr_t pc = (intptr_t)s->code_ptr + 5 + ~rm;
450            intptr_t disp = offset - pc;
451            if (disp == (int32_t)disp) {
452                tcg_out_opc(s, opc, r, 0, 0);
453                tcg_out8(s, (LOWREGMASK(r) << 3) | 5);
454                tcg_out32(s, disp);
455                return;
456            }
457
458            /* Try for an absolute address encoding.  This requires the
459               use of the MODRM+SIB encoding and is therefore larger than
460               rip-relative addressing.  */
461            if (offset == (int32_t)offset) {
462                tcg_out_opc(s, opc, r, 0, 0);
463                tcg_out8(s, (LOWREGMASK(r) << 3) | 4);
464                tcg_out8(s, (4 << 3) | 5);
465                tcg_out32(s, offset);
466                return;
467            }
468
469            /* ??? The memory isn't directly addressable.  */
470            tcg_abort();
471        } else {
472            /* Absolute address.  */
473            tcg_out_opc(s, opc, r, 0, 0);
474            tcg_out8(s, (r << 3) | 5);
475            tcg_out32(s, offset);
476            return;
477        }
478    }
479
480    /* Find the length of the immediate addend.  Note that the encoding
481       that would be used for (%ebp) indicates absolute addressing.  */
482    if (rm < 0) {
483        mod = 0, len = 4, rm = 5;
484    } else if (offset == 0 && LOWREGMASK(rm) != TCG_REG_EBP) {
485        mod = 0, len = 0;
486    } else if (offset == (int8_t)offset) {
487        mod = 0x40, len = 1;
488    } else {
489        mod = 0x80, len = 4;
490    }
491
492    /* Use a single byte MODRM format if possible.  Note that the encoding
493       that would be used for %esp is the escape to the two byte form.  */
494    if (index < 0 && LOWREGMASK(rm) != TCG_REG_ESP) {
495        /* Single byte MODRM format.  */
496        tcg_out_opc(s, opc, r, rm, 0);
497        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
498    } else {
499        /* Two byte MODRM+SIB format.  */
500
501        /* Note that the encoding that would place %esp into the index
502           field indicates no index register.  In 64-bit mode, the REX.X
503           bit counts, so %r12 can be used as the index.  */
504        if (index < 0) {
505            index = 4;
506        } else {
507            assert(index != TCG_REG_ESP);
508        }
509
510        tcg_out_opc(s, opc, r, rm, index);
511        tcg_out8(s, mod | (LOWREGMASK(r) << 3) | 4);
512        tcg_out8(s, (shift << 6) | (LOWREGMASK(index) << 3) | LOWREGMASK(rm));
513    }
514
515    if (len == 1) {
516        tcg_out8(s, offset);
517    } else if (len == 4) {
518        tcg_out32(s, offset);
519    }
520}
521
522/* A simplification of the above with no index or shift.  */
523static inline void tcg_out_modrm_offset(TCGContext *s, int opc, int r,
524                                        int rm, intptr_t offset)
525{
526    tcg_out_modrm_sib_offset(s, opc, r, rm, -1, 0, offset);
527}
528
529/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
530static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
531{
532    /* Propagate an opcode prefix, such as P_REXW.  */
533    int ext = subop & ~0x7;
534    subop &= 0x7;
535
536    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3) + ext, dest, src);
537}
538
539static inline void tcg_out_mov(TCGContext *s, TCGType type,
540                               TCGReg ret, TCGReg arg)
541{
542    if (arg != ret) {
543        int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
544        tcg_out_modrm(s, opc, ret, arg);
545    }
546}
547
548static void tcg_out_movi(TCGContext *s, TCGType type,
549                         TCGReg ret, tcg_target_long arg)
550{
551    tcg_target_long diff;
552
553    if (arg == 0) {
554        tgen_arithr(s, ARITH_XOR, ret, ret);
555        return;
556    }
557    if (arg == (uint32_t)arg || type == TCG_TYPE_I32) {
558        tcg_out_opc(s, OPC_MOVL_Iv + LOWREGMASK(ret), 0, ret, 0);
559        tcg_out32(s, arg);
560        return;
561    }
562    if (arg == (int32_t)arg) {
563        tcg_out_modrm(s, OPC_MOVL_EvIz + P_REXW, 0, ret);
564        tcg_out32(s, arg);
565        return;
566    }
567
568    /* Try a 7 byte pc-relative lea before the 10 byte movq.  */
569    diff = arg - ((uintptr_t)s->code_ptr + 7);
570    if (diff == (int32_t)diff) {
571        tcg_out_opc(s, OPC_LEA | P_REXW, ret, 0, 0);
572        tcg_out8(s, (LOWREGMASK(ret) << 3) | 5);
573        tcg_out32(s, diff);
574        return;
575    }
576
577    tcg_out_opc(s, OPC_MOVL_Iv + P_REXW + LOWREGMASK(ret), 0, ret, 0);
578    tcg_out64(s, arg);
579}
580
581static inline void tcg_out_pushi(TCGContext *s, tcg_target_long val)
582{
583    if (val == (int8_t)val) {
584        tcg_out_opc(s, OPC_PUSH_Ib, 0, 0, 0);
585        tcg_out8(s, val);
586    } else if (val == (int32_t)val) {
587        tcg_out_opc(s, OPC_PUSH_Iv, 0, 0, 0);
588        tcg_out32(s, val);
589    } else {
590        tcg_abort();
591    }
592}
593
594static inline void tcg_out_push(TCGContext *s, int reg)
595{
596    tcg_out_opc(s, OPC_PUSH_r32 + LOWREGMASK(reg), 0, reg, 0);
597}
598
599static inline void tcg_out_pop(TCGContext *s, int reg)
600{
601    tcg_out_opc(s, OPC_POP_r32 + LOWREGMASK(reg), 0, reg, 0);
602}
603
604static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
605                              TCGReg arg1, intptr_t arg2)
606{
607    int opc = OPC_MOVL_GvEv + (type == TCG_TYPE_I64 ? P_REXW : 0);
608    tcg_out_modrm_offset(s, opc, ret, arg1, arg2);
609}
610
611static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
612                              TCGReg arg1, intptr_t arg2)
613{
614    int opc = OPC_MOVL_EvGv + (type == TCG_TYPE_I64 ? P_REXW : 0);
615    tcg_out_modrm_offset(s, opc, arg, arg1, arg2);
616}
617
618static inline void tcg_out_sti(TCGContext *s, TCGType type, TCGReg base,
619                               tcg_target_long ofs, tcg_target_long val)
620{
621    int opc = OPC_MOVL_EvIz + (type == TCG_TYPE_I64 ? P_REXW : 0);
622    tcg_out_modrm_offset(s, opc, 0, base, ofs);
623    tcg_out32(s, val);
624}
625
626static void tcg_out_shifti(TCGContext *s, int subopc, int reg, int count)
627{
628    /* Propagate an opcode prefix, such as P_DATA16.  */
629    int ext = subopc & ~0x7;
630    subopc &= 0x7;
631
632    if (count == 1) {
633        tcg_out_modrm(s, OPC_SHIFT_1 + ext, subopc, reg);
634    } else {
635        tcg_out_modrm(s, OPC_SHIFT_Ib + ext, subopc, reg);
636        tcg_out8(s, count);
637    }
638}
639
640static inline void tcg_out_bswap32(TCGContext *s, int reg)
641{
642    tcg_out_opc(s, OPC_BSWAP + LOWREGMASK(reg), 0, reg, 0);
643}
644
645static inline void tcg_out_rolw_8(TCGContext *s, int reg)
646{
647    tcg_out_shifti(s, SHIFT_ROL + P_DATA16, reg, 8);
648}
649
650static inline void tcg_out_ext8u(TCGContext *s, int dest, int src)
651{
652    /* movzbl */
653    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
654    tcg_out_modrm(s, OPC_MOVZBL + P_REXB_RM, dest, src);
655}
656
657static void tcg_out_ext8s(TCGContext *s, int dest, int src, int rexw)
658{
659    /* movsbl */
660    assert(src < 4 || TCG_TARGET_REG_BITS == 64);
661    tcg_out_modrm(s, OPC_MOVSBL + P_REXB_RM + rexw, dest, src);
662}
663
664static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
665{
666    /* movzwl */
667    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
668}
669
670static inline void tcg_out_ext16s(TCGContext *s, int dest, int src, int rexw)
671{
672    /* movsw[lq] */
673    tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
674}
675
676static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
677{
678    /* 32-bit mov zero extends.  */
679    tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
680}
681
682static inline void tcg_out_ext32s(TCGContext *s, int dest, int src)
683{
684    tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
685}
686
687static inline void tcg_out_bswap64(TCGContext *s, int reg)
688{
689    tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
690}
691
692static void tgen_arithi(TCGContext *s, int c, int r0,
693                        tcg_target_long val, int cf)
694{
695    int rexw = 0;
696
697    if (TCG_TARGET_REG_BITS == 64) {
698        rexw = c & -8;
699        c &= 7;
700    }
701
702    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
703       partial flags update stalls on Pentium4 and are not recommended
704       by current Intel optimization manuals.  */
705    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == -1)) {
706        int is_inc = (c == ARITH_ADD) ^ (val < 0);
707        if (TCG_TARGET_REG_BITS == 64) {
708            /* The single-byte increment encodings are re-tasked as the
709               REX prefixes.  Use the MODRM encoding.  */
710            tcg_out_modrm(s, OPC_GRP5 + rexw,
711                          (is_inc ? EXT5_INC_Ev : EXT5_DEC_Ev), r0);
712        } else {
713            tcg_out8(s, (is_inc ? OPC_INC_r32 : OPC_DEC_r32) + r0);
714        }
715        return;
716    }
717
718    if (c == ARITH_AND) {
719        if (TCG_TARGET_REG_BITS == 64) {
720            if (val == 0xffffffffu) {
721                tcg_out_ext32u(s, r0, r0);
722                return;
723            }
724            if (val == (uint32_t)val) {
725                /* AND with no high bits set can use a 32-bit operation.  */
726                rexw = 0;
727            }
728        }
729        if (val == 0xffu && (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
730            tcg_out_ext8u(s, r0, r0);
731            return;
732        }
733        if (val == 0xffffu) {
734            tcg_out_ext16u(s, r0, r0);
735            return;
736        }
737    }
738
739    if (val == (int8_t)val) {
740        tcg_out_modrm(s, OPC_ARITH_EvIb + rexw, c, r0);
741        tcg_out8(s, val);
742        return;
743    }
744    if (rexw == 0 || val == (int32_t)val) {
745        tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
746        tcg_out32(s, val);
747        return;
748    }
749
750    tcg_abort();
751}
752
753static void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
754{
755    if (val != 0) {
756        tgen_arithi(s, ARITH_ADD + P_REXW, reg, val, 0);
757    }
758}
759
760/* Use SMALL != 0 to force a short forward branch.  */
761static void tcg_out_jxx(TCGContext *s, int opc, int label_index, int small)
762{
763    int32_t val, val1;
764    TCGLabel *l = &s->labels[label_index];
765
766    if (l->has_value) {
767        val = l->u.value - (intptr_t)s->code_ptr;
768        val1 = val - 2;
769        if ((int8_t)val1 == val1) {
770            if (opc == -1) {
771                tcg_out8(s, OPC_JMP_short);
772            } else {
773                tcg_out8(s, OPC_JCC_short + opc);
774            }
775            tcg_out8(s, val1);
776        } else {
777            if (small) {
778                tcg_abort();
779            }
780            if (opc == -1) {
781                tcg_out8(s, OPC_JMP_long);
782                tcg_out32(s, val - 5);
783            } else {
784                tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
785                tcg_out32(s, val - 6);
786            }
787        }
788    } else if (small) {
789        if (opc == -1) {
790            tcg_out8(s, OPC_JMP_short);
791        } else {
792            tcg_out8(s, OPC_JCC_short + opc);
793        }
794        tcg_out_reloc(s, s->code_ptr, R_386_PC8, label_index, -1);
795        s->code_ptr += 1;
796    } else {
797        if (opc == -1) {
798            tcg_out8(s, OPC_JMP_long);
799        } else {
800            tcg_out_opc(s, OPC_JCC_long + opc, 0, 0, 0);
801        }
802        tcg_out_reloc(s, s->code_ptr, R_386_PC32, label_index, -4);
803        s->code_ptr += 4;
804    }
805}
806
807static void tcg_out_cmp(TCGContext *s, TCGArg arg1, TCGArg arg2,
808                        int const_arg2, int rexw)
809{
810    if (const_arg2) {
811        if (arg2 == 0) {
812            /* test r, r */
813            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
814        } else {
815            tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
816        }
817    } else {
818        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
819    }
820}
821
822static void tcg_out_brcond32(TCGContext *s, TCGCond cond,
823                             TCGArg arg1, TCGArg arg2, int const_arg2,
824                             int label_index, int small)
825{
826    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
827    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
828}
829
830#if TCG_TARGET_REG_BITS == 64
831static void tcg_out_brcond64(TCGContext *s, TCGCond cond,
832                             TCGArg arg1, TCGArg arg2, int const_arg2,
833                             int label_index, int small)
834{
835    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
836    tcg_out_jxx(s, tcg_cond_to_jcc[cond], label_index, small);
837}
838#else
839/* XXX: we implement it at the target level to avoid having to
840   handle cross basic blocks temporaries */
841static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
842                            const int *const_args, int small)
843{
844    int label_next;
845    label_next = gen_new_label();
846    switch(args[4]) {
847    case TCG_COND_EQ:
848        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
849                         label_next, 1);
850        tcg_out_brcond32(s, TCG_COND_EQ, args[1], args[3], const_args[3],
851                         args[5], small);
852        break;
853    case TCG_COND_NE:
854        tcg_out_brcond32(s, TCG_COND_NE, args[0], args[2], const_args[2],
855                         args[5], small);
856        tcg_out_brcond32(s, TCG_COND_NE, args[1], args[3], const_args[3],
857                         args[5], small);
858        break;
859    case TCG_COND_LT:
860        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
861                         args[5], small);
862        tcg_out_jxx(s, JCC_JNE, label_next, 1);
863        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
864                         args[5], small);
865        break;
866    case TCG_COND_LE:
867        tcg_out_brcond32(s, TCG_COND_LT, args[1], args[3], const_args[3],
868                         args[5], small);
869        tcg_out_jxx(s, JCC_JNE, label_next, 1);
870        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
871                         args[5], small);
872        break;
873    case TCG_COND_GT:
874        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
875                         args[5], small);
876        tcg_out_jxx(s, JCC_JNE, label_next, 1);
877        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
878                         args[5], small);
879        break;
880    case TCG_COND_GE:
881        tcg_out_brcond32(s, TCG_COND_GT, args[1], args[3], const_args[3],
882                         args[5], small);
883        tcg_out_jxx(s, JCC_JNE, label_next, 1);
884        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
885                         args[5], small);
886        break;
887    case TCG_COND_LTU:
888        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
889                         args[5], small);
890        tcg_out_jxx(s, JCC_JNE, label_next, 1);
891        tcg_out_brcond32(s, TCG_COND_LTU, args[0], args[2], const_args[2],
892                         args[5], small);
893        break;
894    case TCG_COND_LEU:
895        tcg_out_brcond32(s, TCG_COND_LTU, args[1], args[3], const_args[3],
896                         args[5], small);
897        tcg_out_jxx(s, JCC_JNE, label_next, 1);
898        tcg_out_brcond32(s, TCG_COND_LEU, args[0], args[2], const_args[2],
899                         args[5], small);
900        break;
901    case TCG_COND_GTU:
902        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
903                         args[5], small);
904        tcg_out_jxx(s, JCC_JNE, label_next, 1);
905        tcg_out_brcond32(s, TCG_COND_GTU, args[0], args[2], const_args[2],
906                         args[5], small);
907        break;
908    case TCG_COND_GEU:
909        tcg_out_brcond32(s, TCG_COND_GTU, args[1], args[3], const_args[3],
910                         args[5], small);
911        tcg_out_jxx(s, JCC_JNE, label_next, 1);
912        tcg_out_brcond32(s, TCG_COND_GEU, args[0], args[2], const_args[2],
913                         args[5], small);
914        break;
915    default:
916        tcg_abort();
917    }
918    tcg_out_label(s, label_next, s->code_ptr);
919}
920#endif
921
922static void tcg_out_setcond32(TCGContext *s, TCGCond cond, TCGArg dest,
923                              TCGArg arg1, TCGArg arg2, int const_arg2)
924{
925    tcg_out_cmp(s, arg1, arg2, const_arg2, 0);
926    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
927    tcg_out_ext8u(s, dest, dest);
928}
929
930#if TCG_TARGET_REG_BITS == 64
931static void tcg_out_setcond64(TCGContext *s, TCGCond cond, TCGArg dest,
932                              TCGArg arg1, TCGArg arg2, int const_arg2)
933{
934    tcg_out_cmp(s, arg1, arg2, const_arg2, P_REXW);
935    tcg_out_modrm(s, OPC_SETCC | tcg_cond_to_jcc[cond], 0, dest);
936    tcg_out_ext8u(s, dest, dest);
937}
938#else
939static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
940                             const int *const_args)
941{
942    TCGArg new_args[6];
943    int label_true, label_over;
944
945    memcpy(new_args, args+1, 5*sizeof(TCGArg));
946
947    if (args[0] == args[1] || args[0] == args[2]
948        || (!const_args[3] && args[0] == args[3])
949        || (!const_args[4] && args[0] == args[4])) {
950        /* When the destination overlaps with one of the argument
951           registers, don't do anything tricky.  */
952        label_true = gen_new_label();
953        label_over = gen_new_label();
954
955        new_args[5] = label_true;
956        tcg_out_brcond2(s, new_args, const_args+1, 1);
957
958        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
959        tcg_out_jxx(s, JCC_JMP, label_over, 1);
960        tcg_out_label(s, label_true, s->code_ptr);
961
962        tcg_out_movi(s, TCG_TYPE_I32, args[0], 1);
963        tcg_out_label(s, label_over, s->code_ptr);
964    } else {
965        /* When the destination does not overlap one of the arguments,
966           clear the destination first, jump if cond false, and emit an
967           increment in the true case.  This results in smaller code.  */
968
969        tcg_out_movi(s, TCG_TYPE_I32, args[0], 0);
970
971        label_over = gen_new_label();
972        new_args[4] = tcg_invert_cond(new_args[4]);
973        new_args[5] = label_over;
974        tcg_out_brcond2(s, new_args, const_args+1, 1);
975
976        tgen_arithi(s, ARITH_ADD, args[0], 1, 0);
977        tcg_out_label(s, label_over, s->code_ptr);
978    }
979}
980#endif
981
982static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
983                              TCGArg c1, TCGArg c2, int const_c2,
984                              TCGArg v1)
985{
986    tcg_out_cmp(s, c1, c2, const_c2, 0);
987    if (have_cmov) {
988        tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
989    } else {
990        int over = gen_new_label();
991        tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1);
992        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
993        tcg_out_label(s, over, s->code_ptr);
994    }
995}
996
997#if TCG_TARGET_REG_BITS == 64
998static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
999                              TCGArg c1, TCGArg c2, int const_c2,
1000                              TCGArg v1)
1001{
1002    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
1003    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
1004}
1005#endif
1006
1007static void tcg_out_branch(TCGContext *s, int call, uintptr_t dest)
1008{
1009    intptr_t disp = dest - (intptr_t)s->code_ptr - 5;
1010
1011    if (disp == (int32_t)disp) {
1012        tcg_out_opc(s, call ? OPC_CALL_Jz : OPC_JMP_long, 0, 0, 0);
1013        tcg_out32(s, disp);
1014    } else {
1015        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R10, dest);
1016        tcg_out_modrm(s, OPC_GRP5,
1017                      call ? EXT5_CALLN_Ev : EXT5_JMPN_Ev, TCG_REG_R10);
1018    }
1019}
1020
1021static inline void tcg_out_calli(TCGContext *s, uintptr_t dest)
1022{
1023    tcg_out_branch(s, 1, dest);
1024}
1025
1026static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
1027{
1028    tcg_out_branch(s, 0, dest);
1029}
1030
1031#if defined(CONFIG_SOFTMMU)
1032/* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1033 *                                     int mmu_idx, uintptr_t ra)
1034 */
1035static const void * const qemu_ld_helpers[16] = {
1036    [MO_UB]   = helper_ret_ldub_mmu,
1037    [MO_LEUW] = helper_le_lduw_mmu,
1038    [MO_LEUL] = helper_le_ldul_mmu,
1039    [MO_LEQ]  = helper_le_ldq_mmu,
1040    [MO_BEUW] = helper_be_lduw_mmu,
1041    [MO_BEUL] = helper_be_ldul_mmu,
1042    [MO_BEQ]  = helper_be_ldq_mmu,
1043};
1044
1045/* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1046 *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1047 */
1048static const void * const qemu_st_helpers[16] = {
1049    [MO_UB]   = helper_ret_stb_mmu,
1050    [MO_LEUW] = helper_le_stw_mmu,
1051    [MO_LEUL] = helper_le_stl_mmu,
1052    [MO_LEQ]  = helper_le_stq_mmu,
1053    [MO_BEUW] = helper_be_stw_mmu,
1054    [MO_BEUL] = helper_be_stl_mmu,
1055    [MO_BEQ]  = helper_be_stq_mmu,
1056};
1057
1058/* Perform the TLB load and compare.
1059
1060   Inputs:
1061   ADDRLO and ADDRHI contain the low and high part of the address.
1062
1063   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
1064
1065   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
1066   This should be offsetof addr_read or addr_write.
1067
1068   Outputs:
1069   LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
1070   positions of the displacements of forward jumps to the TLB miss case.
1071
1072   Second argument register is loaded with the low part of the address.
1073   In the TLB hit case, it has been adjusted as indicated by the TLB
1074   and so is a host address.  In the TLB miss case, it continues to
1075   hold a guest address.
1076
1077   First argument register is clobbered.  */
1078
1079static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1080                                    int mem_index, TCGMemOp s_bits,
1081                                    uint8_t **label_ptr, int which)
1082{
1083    const TCGReg r0 = TCG_REG_L0;
1084    const TCGReg r1 = TCG_REG_L1;
1085    TCGType ttype = TCG_TYPE_I32;
1086    TCGType htype = TCG_TYPE_I32;
1087    int trexw = 0, hrexw = 0;
1088
1089    if (TCG_TARGET_REG_BITS == 64) {
1090        if (TARGET_LONG_BITS == 64) {
1091            ttype = TCG_TYPE_I64;
1092            trexw = P_REXW;
1093        }
1094        if (TCG_TYPE_PTR == TCG_TYPE_I64) {
1095            htype = TCG_TYPE_I64;
1096            hrexw = P_REXW;
1097        }
1098    }
1099
1100    tcg_out_mov(s, htype, r0, addrlo);
1101    tcg_out_mov(s, ttype, r1, addrlo);
1102
1103    tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
1104                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1105
1106    tgen_arithi(s, ARITH_AND + trexw, r1,
1107                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
1108    tgen_arithi(s, ARITH_AND + hrexw, r0,
1109                (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
1110
1111    tcg_out_modrm_sib_offset(s, OPC_LEA + hrexw, r0, TCG_AREG0, r0, 0,
1112                             offsetof(CPUArchState, tlb_table[mem_index][0])
1113                             + which);
1114
1115    /* cmp 0(r0), r1 */
1116    tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
1117
1118    /* Prepare for both the fast path add of the tlb addend, and the slow
1119       path function argument setup.  There are two cases worth note:
1120       For 32-bit guest and x86_64 host, MOVL zero-extends the guest address
1121       before the fastpath ADDQ below.  For 64-bit guest and x32 host, MOVQ
1122       copies the entire guest address for the slow path, while truncation
1123       for the 32-bit host happens with the fastpath ADDL below.  */
1124    tcg_out_mov(s, ttype, r1, addrlo);
1125
1126    /* jne slow_path */
1127    tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1128    label_ptr[0] = s->code_ptr;
1129    s->code_ptr += 4;
1130
1131    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1132        /* cmp 4(r0), addrhi */
1133        tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
1134
1135        /* jne slow_path */
1136        tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
1137        label_ptr[1] = s->code_ptr;
1138        s->code_ptr += 4;
1139    }
1140
1141    /* TLB Hit.  */
1142
1143    /* add addend(r0), r1 */
1144    tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
1145                         offsetof(CPUTLBEntry, addend) - which);
1146}
1147
1148/*
1149 * Record the context of a call to the out of line helper code for the slow path
1150 * for a load or store, so that we can later generate the correct helper code
1151 */
1152static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1153                                TCGReg datalo, TCGReg datahi,
1154                                TCGReg addrlo, TCGReg addrhi,
1155                                int mem_index, uint8_t *raddr,
1156                                uint8_t **label_ptr)
1157{
1158    TCGLabelQemuLdst *label = new_ldst_label(s);
1159
1160    label->is_ld = is_ld;
1161    label->opc = opc;
1162    label->datalo_reg = datalo;
1163    label->datahi_reg = datahi;
1164    label->addrlo_reg = addrlo;
1165    label->addrhi_reg = addrhi;
1166    label->mem_index = mem_index;
1167    label->raddr = raddr;
1168    label->label_ptr[0] = label_ptr[0];
1169    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1170        label->label_ptr[1] = label_ptr[1];
1171    }
1172}
1173
1174/*
1175 * Generate code for the slow path for a load at the end of block
1176 */
1177static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1178{
1179    TCGMemOp opc = l->opc;
1180    TCGReg data_reg;
1181    uint8_t **label_ptr = &l->label_ptr[0];
1182
1183    /* resolve label address */
1184    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1185    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1186        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1187    }
1188
1189    if (TCG_TARGET_REG_BITS == 32) {
1190        int ofs = 0;
1191
1192        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1193        ofs += 4;
1194
1195        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1196        ofs += 4;
1197
1198        if (TARGET_LONG_BITS == 64) {
1199            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1200            ofs += 4;
1201        }
1202
1203        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1204        ofs += 4;
1205
1206        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
1207    } else {
1208        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1209        /* The second argument is already loaded with addrlo.  */
1210        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
1211                     l->mem_index);
1212        tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
1213                     (uintptr_t)l->raddr);
1214    }
1215
1216    tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1217
1218    data_reg = l->datalo_reg;
1219    switch (opc & MO_SSIZE) {
1220    case MO_SB:
1221        tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
1222        break;
1223    case MO_SW:
1224        tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
1225        break;
1226#if TCG_TARGET_REG_BITS == 64
1227    case MO_SL:
1228        tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
1229        break;
1230#endif
1231    case MO_UB:
1232    case MO_UW:
1233        /* Note that the helpers have zero-extended to tcg_target_long.  */
1234    case MO_UL:
1235        tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1236        break;
1237    case MO_Q:
1238        if (TCG_TARGET_REG_BITS == 64) {
1239            tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
1240        } else if (data_reg == TCG_REG_EDX) {
1241            /* xchg %edx, %eax */
1242            tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
1243            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
1244        } else {
1245            tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
1246            tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
1247        }
1248        break;
1249    default:
1250        tcg_abort();
1251    }
1252
1253    /* Jump to the code corresponding to next IR of qemu_st */
1254    tcg_out_jmp(s, (uintptr_t)l->raddr);
1255}
1256
1257/*
1258 * Generate code for the slow path for a store at the end of block
1259 */
1260static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1261{
1262    TCGMemOp opc = l->opc;
1263    TCGMemOp s_bits = opc & MO_SIZE;
1264    uint8_t **label_ptr = &l->label_ptr[0];
1265    TCGReg retaddr;
1266
1267    /* resolve label address */
1268    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
1269    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
1270        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
1271    }
1272
1273    if (TCG_TARGET_REG_BITS == 32) {
1274        int ofs = 0;
1275
1276        tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
1277        ofs += 4;
1278
1279        tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
1280        ofs += 4;
1281
1282        if (TARGET_LONG_BITS == 64) {
1283            tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
1284            ofs += 4;
1285        }
1286
1287        tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
1288        ofs += 4;
1289
1290        if (s_bits == MO_64) {
1291            tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
1292            ofs += 4;
1293        }
1294
1295        tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
1296        ofs += 4;
1297
1298        retaddr = TCG_REG_EAX;
1299        tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
1300        tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
1301    } else {
1302        tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
1303        /* The second argument is already loaded with addrlo.  */
1304        tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
1305                    tcg_target_call_iarg_regs[2], l->datalo_reg);
1306        tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
1307                     l->mem_index);
1308
1309        if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
1310            retaddr = tcg_target_call_iarg_regs[4];
1311            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1312        } else {
1313            retaddr = TCG_REG_RAX;
1314            tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
1315            tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, 0);
1316        }
1317    }
1318
1319    /* "Tail call" to the helper, with the return address back inline.  */
1320    tcg_out_push(s, retaddr);
1321    tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
1322}
1323#elif defined(__x86_64__) && defined(__linux__)
1324# include <asm/prctl.h>
1325# include <sys/prctl.h>
1326
1327int arch_prctl(int code, unsigned long addr);
1328
1329static int guest_base_flags;
1330static inline void setup_guest_base_seg(void)
1331{
1332    if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
1333        guest_base_flags = P_GS;
1334    }
1335}
1336#else
1337# define guest_base_flags 0
1338static inline void setup_guest_base_seg(void) { }
1339#endif /* SOFTMMU */
1340
1341static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1342                                   TCGReg base, intptr_t ofs, int seg,
1343                                   TCGMemOp memop)
1344{
1345    const TCGMemOp bswap = memop & MO_BSWAP;
1346
1347    switch (memop & MO_SSIZE) {
1348    case MO_UB:
1349        tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
1350        break;
1351    case MO_SB:
1352        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
1353        break;
1354    case MO_UW:
1355        tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1356        if (bswap) {
1357            tcg_out_rolw_8(s, datalo);
1358        }
1359        break;
1360    case MO_SW:
1361        if (bswap) {
1362            tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
1363            tcg_out_rolw_8(s, datalo);
1364            tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
1365        } else {
1366            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
1367                                 datalo, base, ofs);
1368        }
1369        break;
1370    case MO_UL:
1371        tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1372        if (bswap) {
1373            tcg_out_bswap32(s, datalo);
1374        }
1375        break;
1376#if TCG_TARGET_REG_BITS == 64
1377    case MO_SL:
1378        if (bswap) {
1379            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
1380            tcg_out_bswap32(s, datalo);
1381            tcg_out_ext32s(s, datalo, datalo);
1382        } else {
1383            tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
1384        }
1385        break;
1386#endif
1387    case MO_Q:
1388        if (TCG_TARGET_REG_BITS == 64) {
1389            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
1390                                 datalo, base, ofs);
1391            if (bswap) {
1392                tcg_out_bswap64(s, datalo);
1393            }
1394        } else {
1395            if (bswap) {
1396                int t = datalo;
1397                datalo = datahi;
1398                datahi = t;
1399            }
1400            if (base != datalo) {
1401                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1402                                     datalo, base, ofs);
1403                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1404                                     datahi, base, ofs + 4);
1405            } else {
1406                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1407                                     datahi, base, ofs + 4);
1408                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
1409                                     datalo, base, ofs);
1410            }
1411            if (bswap) {
1412                tcg_out_bswap32(s, datalo);
1413                tcg_out_bswap32(s, datahi);
1414            }
1415        }
1416        break;
1417    default:
1418        tcg_abort();
1419    }
1420}
1421
1422/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
1423   EAX. It will be useful once fixed registers globals are less
1424   common. */
1425static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1426{
1427    TCGReg datalo, datahi, addrlo;
1428    TCGReg addrhi __attribute__((unused));
1429    TCGMemOp opc;
1430#if defined(CONFIG_SOFTMMU)
1431    int mem_index;
1432    TCGMemOp s_bits;
1433    uint8_t *label_ptr[2];
1434#endif
1435
1436    datalo = *args++;
1437    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1438    addrlo = *args++;
1439    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1440    opc = *args++;
1441
1442#if defined(CONFIG_SOFTMMU)
1443    mem_index = *args++;
1444    s_bits = opc & MO_SIZE;
1445
1446    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1447                     label_ptr, offsetof(CPUTLBEntry, addr_read));
1448
1449    /* TLB Hit.  */
1450    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1451
1452    /* Record the current context of a load into ldst label */
1453    add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
1454                        mem_index, s->code_ptr, label_ptr);
1455#else
1456    {
1457        int32_t offset = GUEST_BASE;
1458        TCGReg base = addrlo;
1459        int seg = 0;
1460
1461        /* ??? We assume all operations have left us with register contents
1462           that are zero extended.  So far this appears to be true.  If we
1463           want to enforce this, we can either do an explicit zero-extension
1464           here, or (if GUEST_BASE == 0, or a segment register is in use)
1465           use the ADDR32 prefix.  For now, do nothing.  */
1466        if (GUEST_BASE && guest_base_flags) {
1467            seg = guest_base_flags;
1468            offset = 0;
1469        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1470            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1471            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1472            base = TCG_REG_L1;
1473            offset = 0;
1474        }
1475
1476        tcg_out_qemu_ld_direct(s, datalo, datahi, base, offset, seg, opc);
1477    }
1478#endif
1479}
1480
1481static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
1482                                   TCGReg base, intptr_t ofs, int seg,
1483                                   TCGMemOp memop)
1484{
1485    const TCGMemOp bswap = memop & MO_BSWAP;
1486
1487    /* ??? Ideally we wouldn't need a scratch register.  For user-only,
1488       we could perform the bswap twice to restore the original value
1489       instead of moving to the scratch.  But as it is, the L constraint
1490       means that TCG_REG_L0 is definitely free here.  */
1491    const TCGReg scratch = TCG_REG_L0;
1492
1493    switch (memop & MO_SIZE) {
1494    case MO_8:
1495        /* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
1496           Use the scratch register if necessary.  */
1497        if (TCG_TARGET_REG_BITS == 32 && datalo >= 4) {
1498            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1499            datalo = scratch;
1500        }
1501        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
1502                             datalo, base, ofs);
1503        break;
1504    case MO_16:
1505        if (bswap) {
1506            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1507            tcg_out_rolw_8(s, scratch);
1508            datalo = scratch;
1509        }
1510        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
1511                             datalo, base, ofs);
1512        break;
1513    case MO_32:
1514        if (bswap) {
1515            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1516            tcg_out_bswap32(s, scratch);
1517            datalo = scratch;
1518        }
1519        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1520        break;
1521    case MO_64:
1522        if (TCG_TARGET_REG_BITS == 64) {
1523            if (bswap) {
1524                tcg_out_mov(s, TCG_TYPE_I64, scratch, datalo);
1525                tcg_out_bswap64(s, scratch);
1526                datalo = scratch;
1527            }
1528            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
1529                                 datalo, base, ofs);
1530        } else if (bswap) {
1531            tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
1532            tcg_out_bswap32(s, scratch);
1533            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
1534            tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
1535            tcg_out_bswap32(s, scratch);
1536            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
1537        } else {
1538            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
1539            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
1540        }
1541        break;
1542    default:
1543        tcg_abort();
1544    }
1545}
1546
1547static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1548{
1549    TCGReg datalo, datahi, addrlo;
1550    TCGReg addrhi __attribute__((unused));
1551    TCGMemOp opc;
1552#if defined(CONFIG_SOFTMMU)
1553    int mem_index;
1554    TCGMemOp s_bits;
1555    uint8_t *label_ptr[2];
1556#endif
1557
1558    datalo = *args++;
1559    datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
1560    addrlo = *args++;
1561    addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
1562    opc = *args++;
1563
1564#if defined(CONFIG_SOFTMMU)
1565    mem_index = *args++;
1566    s_bits = opc & MO_SIZE;
1567
1568    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
1569                     label_ptr, offsetof(CPUTLBEntry, addr_write));
1570
1571    /* TLB Hit.  */
1572    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
1573
1574    /* Record the current context of a store into ldst label */
1575    add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
1576                        mem_index, s->code_ptr, label_ptr);
1577#else
1578    {
1579        int32_t offset = GUEST_BASE;
1580        TCGReg base = addrlo;
1581        int seg = 0;
1582
1583        /* ??? We assume all operations have left us with register contents
1584           that are zero extended.  So far this appears to be true.  If we
1585           want to enforce this, we can either do an explicit zero-extension
1586           here, or (if GUEST_BASE == 0, or a segment register is in use)
1587           use the ADDR32 prefix.  For now, do nothing.  */
1588        if (GUEST_BASE && guest_base_flags) {
1589            seg = guest_base_flags;
1590            offset = 0;
1591        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
1592            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
1593            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
1594            base = TCG_REG_L1;
1595            offset = 0;
1596        }
1597
1598        tcg_out_qemu_st_direct(s, datalo, datahi, base, offset, seg, opc);
1599    }
1600#endif
1601}
1602
1603static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1604                              const TCGArg *args, const int *const_args)
1605{
1606    int c, rexw = 0;
1607
1608#if TCG_TARGET_REG_BITS == 64
1609# define OP_32_64(x) \
1610        case glue(glue(INDEX_op_, x), _i64): \
1611            rexw = P_REXW; /* FALLTHRU */    \
1612        case glue(glue(INDEX_op_, x), _i32)
1613#else
1614# define OP_32_64(x) \
1615        case glue(glue(INDEX_op_, x), _i32)
1616#endif
1617
1618    switch(opc) {
1619    case INDEX_op_exit_tb:
1620        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, args[0]);
1621        tcg_out_jmp(s, (uintptr_t)tb_ret_addr);
1622        break;
1623    case INDEX_op_goto_tb:
1624        if (s->tb_jmp_offset) {
1625            /* direct jump method */
1626            tcg_out8(s, OPC_JMP_long); /* jmp im */
1627            s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1628            tcg_out32(s, 0);
1629        } else {
1630            /* indirect jump method */
1631            tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
1632                                 (intptr_t)(s->tb_next + args[0]));
1633        }
1634        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1635        break;
1636    case INDEX_op_call:
1637        if (const_args[0]) {
1638            tcg_out_calli(s, args[0]);
1639        } else {
1640            /* call *reg */
1641            tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
1642        }
1643        break;
1644    case INDEX_op_br:
1645        tcg_out_jxx(s, JCC_JMP, args[0], 0);
1646        break;
1647    case INDEX_op_movi_i32:
1648        tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
1649        break;
1650    OP_32_64(ld8u):
1651        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1652        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
1653        break;
1654    OP_32_64(ld8s):
1655        tcg_out_modrm_offset(s, OPC_MOVSBL + rexw, args[0], args[1], args[2]);
1656        break;
1657    OP_32_64(ld16u):
1658        /* Note that we can ignore REXW for the zero-extend to 64-bit.  */
1659        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
1660        break;
1661    OP_32_64(ld16s):
1662        tcg_out_modrm_offset(s, OPC_MOVSWL + rexw, args[0], args[1], args[2]);
1663        break;
1664#if TCG_TARGET_REG_BITS == 64
1665    case INDEX_op_ld32u_i64:
1666#endif
1667    case INDEX_op_ld_i32:
1668        tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1669        break;
1670
1671    OP_32_64(st8):
1672        if (const_args[0]) {
1673            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
1674                                 0, args[1], args[2]);
1675            tcg_out8(s, args[0]);
1676        } else {
1677            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
1678                                 args[0], args[1], args[2]);
1679        }
1680        break;
1681    OP_32_64(st16):
1682        if (const_args[0]) {
1683            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
1684                                 0, args[1], args[2]);
1685            tcg_out16(s, args[0]);
1686        } else {
1687            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
1688                                 args[0], args[1], args[2]);
1689        }
1690        break;
1691#if TCG_TARGET_REG_BITS == 64
1692    case INDEX_op_st32_i64:
1693#endif
1694    case INDEX_op_st_i32:
1695        if (const_args[0]) {
1696            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
1697            tcg_out32(s, args[0]);
1698        } else {
1699            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1700        }
1701        break;
1702
1703    OP_32_64(add):
1704        /* For 3-operand addition, use LEA.  */
1705        if (args[0] != args[1]) {
1706            TCGArg a0 = args[0], a1 = args[1], a2 = args[2], c3 = 0;
1707
1708            if (const_args[2]) {
1709                c3 = a2, a2 = -1;
1710            } else if (a0 == a2) {
1711                /* Watch out for dest = src + dest, since we've removed
1712                   the matching constraint on the add.  */
1713                tgen_arithr(s, ARITH_ADD + rexw, a0, a1);
1714                break;
1715            }
1716
1717            tcg_out_modrm_sib_offset(s, OPC_LEA + rexw, a0, a1, a2, 0, c3);
1718            break;
1719        }
1720        c = ARITH_ADD;
1721        goto gen_arith;
1722    OP_32_64(sub):
1723        c = ARITH_SUB;
1724        goto gen_arith;
1725    OP_32_64(and):
1726        c = ARITH_AND;
1727        goto gen_arith;
1728    OP_32_64(or):
1729        c = ARITH_OR;
1730        goto gen_arith;
1731    OP_32_64(xor):
1732        c = ARITH_XOR;
1733        goto gen_arith;
1734    gen_arith:
1735        if (const_args[2]) {
1736            tgen_arithi(s, c + rexw, args[0], args[2], 0);
1737        } else {
1738            tgen_arithr(s, c + rexw, args[0], args[2]);
1739        }
1740        break;
1741
1742    OP_32_64(mul):
1743        if (const_args[2]) {
1744            int32_t val;
1745            val = args[2];
1746            if (val == (int8_t)val) {
1747                tcg_out_modrm(s, OPC_IMUL_GvEvIb + rexw, args[0], args[0]);
1748                tcg_out8(s, val);
1749            } else {
1750                tcg_out_modrm(s, OPC_IMUL_GvEvIz + rexw, args[0], args[0]);
1751                tcg_out32(s, val);
1752            }
1753        } else {
1754            tcg_out_modrm(s, OPC_IMUL_GvEv + rexw, args[0], args[2]);
1755        }
1756        break;
1757
1758    OP_32_64(div2):
1759        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IDIV, args[4]);
1760        break;
1761    OP_32_64(divu2):
1762        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_DIV, args[4]);
1763        break;
1764
1765    OP_32_64(shl):
1766        c = SHIFT_SHL;
1767        goto gen_shift;
1768    OP_32_64(shr):
1769        c = SHIFT_SHR;
1770        goto gen_shift;
1771    OP_32_64(sar):
1772        c = SHIFT_SAR;
1773        goto gen_shift;
1774    OP_32_64(rotl):
1775        c = SHIFT_ROL;
1776        goto gen_shift;
1777    OP_32_64(rotr):
1778        c = SHIFT_ROR;
1779        goto gen_shift;
1780    gen_shift:
1781        if (const_args[2]) {
1782            tcg_out_shifti(s, c + rexw, args[0], args[2]);
1783        } else {
1784            tcg_out_modrm(s, OPC_SHIFT_cl + rexw, c, args[0]);
1785        }
1786        break;
1787
1788    case INDEX_op_brcond_i32:
1789        tcg_out_brcond32(s, args[2], args[0], args[1], const_args[1],
1790                         args[3], 0);
1791        break;
1792    case INDEX_op_setcond_i32:
1793        tcg_out_setcond32(s, args[3], args[0], args[1],
1794                          args[2], const_args[2]);
1795        break;
1796    case INDEX_op_movcond_i32:
1797        tcg_out_movcond32(s, args[5], args[0], args[1],
1798                          args[2], const_args[2], args[3]);
1799        break;
1800
1801    OP_32_64(bswap16):
1802        tcg_out_rolw_8(s, args[0]);
1803        break;
1804    OP_32_64(bswap32):
1805        tcg_out_bswap32(s, args[0]);
1806        break;
1807
1808    OP_32_64(neg):
1809        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NEG, args[0]);
1810        break;
1811    OP_32_64(not):
1812        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_NOT, args[0]);
1813        break;
1814
1815    OP_32_64(ext8s):
1816        tcg_out_ext8s(s, args[0], args[1], rexw);
1817        break;
1818    OP_32_64(ext16s):
1819        tcg_out_ext16s(s, args[0], args[1], rexw);
1820        break;
1821    OP_32_64(ext8u):
1822        tcg_out_ext8u(s, args[0], args[1]);
1823        break;
1824    OP_32_64(ext16u):
1825        tcg_out_ext16u(s, args[0], args[1]);
1826        break;
1827
1828    case INDEX_op_qemu_ld_i32:
1829        tcg_out_qemu_ld(s, args, 0);
1830        break;
1831    case INDEX_op_qemu_ld_i64:
1832        tcg_out_qemu_ld(s, args, 1);
1833        break;
1834    case INDEX_op_qemu_st_i32:
1835        tcg_out_qemu_st(s, args, 0);
1836        break;
1837    case INDEX_op_qemu_st_i64:
1838        tcg_out_qemu_st(s, args, 1);
1839        break;
1840
1841    OP_32_64(mulu2):
1842        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_MUL, args[3]);
1843        break;
1844    OP_32_64(muls2):
1845        tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_IMUL, args[3]);
1846        break;
1847    OP_32_64(add2):
1848        if (const_args[4]) {
1849            tgen_arithi(s, ARITH_ADD + rexw, args[0], args[4], 1);
1850        } else {
1851            tgen_arithr(s, ARITH_ADD + rexw, args[0], args[4]);
1852        }
1853        if (const_args[5]) {
1854            tgen_arithi(s, ARITH_ADC + rexw, args[1], args[5], 1);
1855        } else {
1856            tgen_arithr(s, ARITH_ADC + rexw, args[1], args[5]);
1857        }
1858        break;
1859    OP_32_64(sub2):
1860        if (const_args[4]) {
1861            tgen_arithi(s, ARITH_SUB + rexw, args[0], args[4], 1);
1862        } else {
1863            tgen_arithr(s, ARITH_SUB + rexw, args[0], args[4]);
1864        }
1865        if (const_args[5]) {
1866            tgen_arithi(s, ARITH_SBB + rexw, args[1], args[5], 1);
1867        } else {
1868            tgen_arithr(s, ARITH_SBB + rexw, args[1], args[5]);
1869        }
1870        break;
1871
1872#if TCG_TARGET_REG_BITS == 32
1873    case INDEX_op_brcond2_i32:
1874        tcg_out_brcond2(s, args, const_args, 0);
1875        break;
1876    case INDEX_op_setcond2_i32:
1877        tcg_out_setcond2(s, args, const_args);
1878        break;
1879#else /* TCG_TARGET_REG_BITS == 64 */
1880    case INDEX_op_movi_i64:
1881        tcg_out_movi(s, TCG_TYPE_I64, args[0], args[1]);
1882        break;
1883    case INDEX_op_ld32s_i64:
1884        tcg_out_modrm_offset(s, OPC_MOVSLQ, args[0], args[1], args[2]);
1885        break;
1886    case INDEX_op_ld_i64:
1887        tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1888        break;
1889    case INDEX_op_st_i64:
1890        if (const_args[0]) {
1891            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
1892                                 0, args[1], args[2]);
1893            tcg_out32(s, args[0]);
1894        } else {
1895            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
1896        }
1897        break;
1898
1899    case INDEX_op_brcond_i64:
1900        tcg_out_brcond64(s, args[2], args[0], args[1], const_args[1],
1901                         args[3], 0);
1902        break;
1903    case INDEX_op_setcond_i64:
1904        tcg_out_setcond64(s, args[3], args[0], args[1],
1905                          args[2], const_args[2]);
1906        break;
1907    case INDEX_op_movcond_i64:
1908        tcg_out_movcond64(s, args[5], args[0], args[1],
1909                          args[2], const_args[2], args[3]);
1910        break;
1911
1912    case INDEX_op_bswap64_i64:
1913        tcg_out_bswap64(s, args[0]);
1914        break;
1915    case INDEX_op_ext32u_i64:
1916        tcg_out_ext32u(s, args[0], args[1]);
1917        break;
1918    case INDEX_op_ext32s_i64:
1919        tcg_out_ext32s(s, args[0], args[1]);
1920        break;
1921#endif
1922
1923    OP_32_64(deposit):
1924        if (args[3] == 0 && args[4] == 8) {
1925            /* load bits 0..7 */
1926            tcg_out_modrm(s, OPC_MOVB_EvGv | P_REXB_R | P_REXB_RM,
1927                          args[2], args[0]);
1928        } else if (args[3] == 8 && args[4] == 8) {
1929            /* load bits 8..15 */
1930            tcg_out_modrm(s, OPC_MOVB_EvGv, args[2], args[0] + 4);
1931        } else if (args[3] == 0 && args[4] == 16) {
1932            /* load bits 0..15 */
1933            tcg_out_modrm(s, OPC_MOVL_EvGv | P_DATA16, args[2], args[0]);
1934        } else {
1935            tcg_abort();
1936        }
1937        break;
1938
1939    default:
1940        tcg_abort();
1941    }
1942
1943#undef OP_32_64
1944}
1945
1946static const TCGTargetOpDef x86_op_defs[] = {
1947    { INDEX_op_exit_tb, { } },
1948    { INDEX_op_goto_tb, { } },
1949    { INDEX_op_call, { "ri" } },
1950    { INDEX_op_br, { } },
1951    { INDEX_op_mov_i32, { "r", "r" } },
1952    { INDEX_op_movi_i32, { "r" } },
1953    { INDEX_op_ld8u_i32, { "r", "r" } },
1954    { INDEX_op_ld8s_i32, { "r", "r" } },
1955    { INDEX_op_ld16u_i32, { "r", "r" } },
1956    { INDEX_op_ld16s_i32, { "r", "r" } },
1957    { INDEX_op_ld_i32, { "r", "r" } },
1958    { INDEX_op_st8_i32, { "qi", "r" } },
1959    { INDEX_op_st16_i32, { "ri", "r" } },
1960    { INDEX_op_st_i32, { "ri", "r" } },
1961
1962    { INDEX_op_add_i32, { "r", "r", "ri" } },
1963    { INDEX_op_sub_i32, { "r", "0", "ri" } },
1964    { INDEX_op_mul_i32, { "r", "0", "ri" } },
1965    { INDEX_op_div2_i32, { "a", "d", "0", "1", "r" } },
1966    { INDEX_op_divu2_i32, { "a", "d", "0", "1", "r" } },
1967    { INDEX_op_and_i32, { "r", "0", "ri" } },
1968    { INDEX_op_or_i32, { "r", "0", "ri" } },
1969    { INDEX_op_xor_i32, { "r", "0", "ri" } },
1970
1971    { INDEX_op_shl_i32, { "r", "0", "ci" } },
1972    { INDEX_op_shr_i32, { "r", "0", "ci" } },
1973    { INDEX_op_sar_i32, { "r", "0", "ci" } },
1974    { INDEX_op_rotl_i32, { "r", "0", "ci" } },
1975    { INDEX_op_rotr_i32, { "r", "0", "ci" } },
1976
1977    { INDEX_op_brcond_i32, { "r", "ri" } },
1978
1979    { INDEX_op_bswap16_i32, { "r", "0" } },
1980    { INDEX_op_bswap32_i32, { "r", "0" } },
1981
1982    { INDEX_op_neg_i32, { "r", "0" } },
1983
1984    { INDEX_op_not_i32, { "r", "0" } },
1985
1986    { INDEX_op_ext8s_i32, { "r", "q" } },
1987    { INDEX_op_ext16s_i32, { "r", "r" } },
1988    { INDEX_op_ext8u_i32, { "r", "q" } },
1989    { INDEX_op_ext16u_i32, { "r", "r" } },
1990
1991    { INDEX_op_setcond_i32, { "q", "r", "ri" } },
1992
1993    { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
1994#if TCG_TARGET_HAS_movcond_i32
1995    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
1996#endif
1997
1998    { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
1999    { INDEX_op_muls2_i32, { "a", "d", "a", "r" } },
2000    { INDEX_op_add2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2001    { INDEX_op_sub2_i32, { "r", "r", "0", "1", "ri", "ri" } },
2002
2003#if TCG_TARGET_REG_BITS == 32
2004    { INDEX_op_brcond2_i32, { "r", "r", "ri", "ri" } },
2005    { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
2006#else
2007    { INDEX_op_mov_i64, { "r", "r" } },
2008    { INDEX_op_movi_i64, { "r" } },
2009    { INDEX_op_ld8u_i64, { "r", "r" } },
2010    { INDEX_op_ld8s_i64, { "r", "r" } },
2011    { INDEX_op_ld16u_i64, { "r", "r" } },
2012    { INDEX_op_ld16s_i64, { "r", "r" } },
2013    { INDEX_op_ld32u_i64, { "r", "r" } },
2014    { INDEX_op_ld32s_i64, { "r", "r" } },
2015    { INDEX_op_ld_i64, { "r", "r" } },
2016    { INDEX_op_st8_i64, { "ri", "r" } },
2017    { INDEX_op_st16_i64, { "ri", "r" } },
2018    { INDEX_op_st32_i64, { "ri", "r" } },
2019    { INDEX_op_st_i64, { "re", "r" } },
2020
2021    { INDEX_op_add_i64, { "r", "r", "re" } },
2022    { INDEX_op_mul_i64, { "r", "0", "re" } },
2023    { INDEX_op_div2_i64, { "a", "d", "0", "1", "r" } },
2024    { INDEX_op_divu2_i64, { "a", "d", "0", "1", "r" } },
2025    { INDEX_op_sub_i64, { "r", "0", "re" } },
2026    { INDEX_op_and_i64, { "r", "0", "reZ" } },
2027    { INDEX_op_or_i64, { "r", "0", "re" } },
2028    { INDEX_op_xor_i64, { "r", "0", "re" } },
2029
2030    { INDEX_op_shl_i64, { "r", "0", "ci" } },
2031    { INDEX_op_shr_i64, { "r", "0", "ci" } },
2032    { INDEX_op_sar_i64, { "r", "0", "ci" } },
2033    { INDEX_op_rotl_i64, { "r", "0", "ci" } },
2034    { INDEX_op_rotr_i64, { "r", "0", "ci" } },
2035
2036    { INDEX_op_brcond_i64, { "r", "re" } },
2037    { INDEX_op_setcond_i64, { "r", "r", "re" } },
2038
2039    { INDEX_op_bswap16_i64, { "r", "0" } },
2040    { INDEX_op_bswap32_i64, { "r", "0" } },
2041    { INDEX_op_bswap64_i64, { "r", "0" } },
2042    { INDEX_op_neg_i64, { "r", "0" } },
2043    { INDEX_op_not_i64, { "r", "0" } },
2044
2045    { INDEX_op_ext8s_i64, { "r", "r" } },
2046    { INDEX_op_ext16s_i64, { "r", "r" } },
2047    { INDEX_op_ext32s_i64, { "r", "r" } },
2048    { INDEX_op_ext8u_i64, { "r", "r" } },
2049    { INDEX_op_ext16u_i64, { "r", "r" } },
2050    { INDEX_op_ext32u_i64, { "r", "r" } },
2051
2052    { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
2053    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
2054
2055    { INDEX_op_mulu2_i64, { "a", "d", "a", "r" } },
2056    { INDEX_op_muls2_i64, { "a", "d", "a", "r" } },
2057    { INDEX_op_add2_i64, { "r", "r", "0", "1", "re", "re" } },
2058    { INDEX_op_sub2_i64, { "r", "r", "0", "1", "re", "re" } },
2059#endif
2060
2061#if TCG_TARGET_REG_BITS == 64
2062    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2063    { INDEX_op_qemu_st_i32, { "L", "L" } },
2064    { INDEX_op_qemu_ld_i64, { "r", "L" } },
2065    { INDEX_op_qemu_st_i64, { "L", "L" } },
2066#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
2067    { INDEX_op_qemu_ld_i32, { "r", "L" } },
2068    { INDEX_op_qemu_st_i32, { "L", "L" } },
2069    { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
2070    { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
2071#else
2072    { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
2073    { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
2074    { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
2075    { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
2076#endif
2077    { -1 },
2078};
2079
2080static int tcg_target_callee_save_regs[] = {
2081#if TCG_TARGET_REG_BITS == 64
2082    TCG_REG_RBP,
2083    TCG_REG_RBX,
2084#if defined(_WIN64)
2085    TCG_REG_RDI,
2086    TCG_REG_RSI,
2087#endif
2088    TCG_REG_R12,
2089    TCG_REG_R13,
2090    TCG_REG_R14, /* Currently used for the global env. */
2091    TCG_REG_R15,
2092#else
2093    TCG_REG_EBP, /* Currently used for the global env. */
2094    TCG_REG_EBX,
2095    TCG_REG_ESI,
2096    TCG_REG_EDI,
2097#endif
2098};
2099
2100/* Compute frame size via macros, to share between tcg_target_qemu_prologue
2101   and tcg_register_jit.  */
2102
2103#define PUSH_SIZE \
2104    ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
2105     * (TCG_TARGET_REG_BITS / 8))
2106
2107#define FRAME_SIZE \
2108    ((PUSH_SIZE \
2109      + TCG_STATIC_CALL_ARGS_SIZE \
2110      + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2111      + TCG_TARGET_STACK_ALIGN - 1) \
2112     & ~(TCG_TARGET_STACK_ALIGN - 1))
2113
2114/* Generate global QEMU prologue and epilogue code */
2115static void tcg_target_qemu_prologue(TCGContext *s)
2116{
2117    int i, stack_addend;
2118
2119    /* TB prologue */
2120
2121    /* Reserve some stack space, also for TCG temps.  */
2122    stack_addend = FRAME_SIZE - PUSH_SIZE;
2123    tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2124                  CPU_TEMP_BUF_NLONGS * sizeof(long));
2125
2126    /* Save all callee saved registers.  */
2127    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
2128        tcg_out_push(s, tcg_target_callee_save_regs[i]);
2129    }
2130
2131#if TCG_TARGET_REG_BITS == 32
2132    tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
2133               (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
2134    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2135    /* jmp *tb.  */
2136    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
2137		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
2138			 + stack_addend);
2139#else
2140    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2141    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
2142    /* jmp *tb.  */
2143    tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
2144#endif
2145
2146    /* TB epilogue */
2147    tb_ret_addr = s->code_ptr;
2148
2149    tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
2150
2151    for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
2152        tcg_out_pop(s, tcg_target_callee_save_regs[i]);
2153    }
2154    tcg_out_opc(s, OPC_RET, 0, 0, 0);
2155
2156#if !defined(CONFIG_SOFTMMU)
2157    /* Try to set up a segment register to point to GUEST_BASE.  */
2158    if (GUEST_BASE) {
2159        setup_guest_base_seg();
2160    }
2161#endif
2162}
2163
2164static void tcg_target_init(TCGContext *s)
2165{
2166    /* For 32-bit, 99% certainty that we're running on hardware that supports
2167       cmov, but we still need to check.  In case cmov is not available, we'll
2168       use a small forward branch.  */
2169#ifndef have_cmov
2170    {
2171        unsigned a, b, c, d;
2172        have_cmov = (__get_cpuid(1, &a, &b, &c, &d) && (d & bit_CMOV));
2173    }
2174#endif
2175
2176    if (TCG_TARGET_REG_BITS == 64) {
2177        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2178        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff);
2179    } else {
2180        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
2181    }
2182
2183    tcg_regset_clear(tcg_target_call_clobber_regs);
2184    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EAX);
2185    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_EDX);
2186    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_ECX);
2187    if (TCG_TARGET_REG_BITS == 64) {
2188#if !defined(_WIN64)
2189        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RDI);
2190        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RSI);
2191#endif
2192        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R8);
2193        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R9);
2194        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R10);
2195        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R11);
2196    }
2197
2198    tcg_regset_clear(s->reserved_regs);
2199    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2200
2201    tcg_add_target_add_op_defs(x86_op_defs);
2202}
2203
2204typedef struct {
2205    DebugFrameCIE cie;
2206    DebugFrameFDEHeader fde;
2207    uint8_t fde_def_cfa[4];
2208    uint8_t fde_reg_ofs[14];
2209} DebugFrame;
2210
2211/* We're expecting a 2 byte uleb128 encoded value.  */
2212QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2213
2214#if !defined(__ELF__)
2215    /* Host machine without ELF. */
2216#elif TCG_TARGET_REG_BITS == 64
2217#define ELF_HOST_MACHINE EM_X86_64
2218static DebugFrame debug_frame = {
2219    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2220    .cie.id = -1,
2221    .cie.version = 1,
2222    .cie.code_align = 1,
2223    .cie.data_align = 0x78,             /* sleb128 -8 */
2224    .cie.return_column = 16,
2225
2226    /* Total FDE size does not include the "len" member.  */
2227    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2228
2229    .fde_def_cfa = {
2230        12, 7,                          /* DW_CFA_def_cfa %rsp, ... */
2231        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2232        (FRAME_SIZE >> 7)
2233    },
2234    .fde_reg_ofs = {
2235        0x90, 1,                        /* DW_CFA_offset, %rip, -8 */
2236        /* The following ordering must match tcg_target_callee_save_regs.  */
2237        0x86, 2,                        /* DW_CFA_offset, %rbp, -16 */
2238        0x83, 3,                        /* DW_CFA_offset, %rbx, -24 */
2239        0x8c, 4,                        /* DW_CFA_offset, %r12, -32 */
2240        0x8d, 5,                        /* DW_CFA_offset, %r13, -40 */
2241        0x8e, 6,                        /* DW_CFA_offset, %r14, -48 */
2242        0x8f, 7,                        /* DW_CFA_offset, %r15, -56 */
2243    }
2244};
2245#else
2246#define ELF_HOST_MACHINE EM_386
2247static DebugFrame debug_frame = {
2248    .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2249    .cie.id = -1,
2250    .cie.version = 1,
2251    .cie.code_align = 1,
2252    .cie.data_align = 0x7c,             /* sleb128 -4 */
2253    .cie.return_column = 8,
2254
2255    /* Total FDE size does not include the "len" member.  */
2256    .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2257
2258    .fde_def_cfa = {
2259        12, 4,                          /* DW_CFA_def_cfa %esp, ... */
2260        (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
2261        (FRAME_SIZE >> 7)
2262    },
2263    .fde_reg_ofs = {
2264        0x88, 1,                        /* DW_CFA_offset, %eip, -4 */
2265        /* The following ordering must match tcg_target_callee_save_regs.  */
2266        0x85, 2,                        /* DW_CFA_offset, %ebp, -8 */
2267        0x83, 3,                        /* DW_CFA_offset, %ebx, -12 */
2268        0x86, 4,                        /* DW_CFA_offset, %esi, -16 */
2269        0x87, 5,                        /* DW_CFA_offset, %edi, -20 */
2270    }
2271};
2272#endif
2273
2274#if defined(ELF_HOST_MACHINE)
2275void tcg_register_jit(void *buf, size_t buf_size)
2276{
2277    debug_frame.fde.func_start = (uintptr_t)buf;
2278    debug_frame.fde.func_len = buf_size;
2279
2280    tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
2281}
2282#endif
2283