x86_lir.h revision fe94578b63380f464c3abd5c156b7b31d068db6c
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
18#define ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
19
20#include "dex/compiler_internals.h"
21
22namespace art {
23
24/*
25 * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64). The ABI
26 * has different conventions and we capture those here. Changing something that is callee save and
27 * making it caller save places a burden on up-calls to save/restore the callee save register,
28 * however, there are few registers that are callee save in the ABI. Changing something that is
29 * caller save and making it callee save places a burden on down-calls to save/restore the callee
30 * save register. For these reasons we aim to match native conventions for caller and callee save.
31 * On x86 only the first 4 registers can be used for byte operations, for this reason they are
32 * preferred for temporary scratch registers.
33 *
34 * General Purpose Register:
35 *  Native: x86    | x86-64 / x32 | ART x86                                         | ART x86-64
36 *  r0/eax: caller | caller       | caller, Method*, scratch, return value          | caller, scratch, return value
37 *  r1/ecx: caller | caller, arg4 | caller, arg1, scratch                           | caller, arg3, scratch
38 *  r2/edx: caller | caller, arg3 | caller, arg2, scratch, high half of long return | caller, arg2, scratch
39 *  r3/ebx: callEE | callEE       | callER, arg3, scratch                           | callee, promotable
40 *  r4/esp: stack pointer
41 *  r5/ebp: callee | callee       | callee, promotable                              | callee, promotable
42 *  r6/esi: callEE | callER, arg2 | callee, promotable                              | caller, arg1, scratch
43 *  r7/edi: callEE | callER, arg1 | callee, promotable                              | caller, Method*, scratch
44 *  ---  x86-64/x32 registers
45 *  Native: x86-64 / x32      | ART
46 *  r8:     caller save, arg5 | caller, arg4, scratch
47 *  r9:     caller save, arg6 | caller, arg5, scratch
48 *  r10:    caller save       | caller, scratch
49 *  r11:    caller save       | caller, scratch
50 *  r12:    callee save       | callee, available for register promotion (promotable)
51 *  r13:    callee save       | callee, available for register promotion (promotable)
52 *  r14:    callee save       | callee, available for register promotion (promotable)
53 *  r15:    callee save       | callee, available for register promotion (promotable)
54 *
55 * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on
56 * x86-64/x32 gs: holds it.
57 *
58 * For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
59 *  Native: x86  | x86-64 / x32 | ART x86                    | ART x86-64
60 *  XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value
61 *  XMM1: caller | caller, arg2 | caller, scratch            | caller, arg2, scratch
62 *  XMM2: caller | caller, arg3 | caller, scratch            | caller, arg3, scratch
63 *  XMM3: caller | caller, arg4 | caller, scratch            | caller, arg4, scratch
64 *  XMM4: caller | caller, arg5 | caller, scratch            | caller, arg5, scratch
65 *  XMM5: caller | caller, arg6 | caller, scratch            | caller, arg6, scratch
66 *  XMM6: caller | caller, arg7 | caller, scratch            | caller, arg7, scratch
67 *  XMM7: caller | caller, arg8 | caller, scratch            | caller, arg8, scratch
68 *  ---  x86-64/x32 registers
69 *  XMM8 .. 15: caller save available as scratch registers for ART.
70 *
71 * X87 is a necessary evil outside of ART code for x86:
72 *  ST0:  x86 float/double native return value, caller save
73 *  ST1 .. ST7: caller save
74 *
75 *  Stack frame diagram (stack grows down, higher addresses at top):
76 *
77 * +------------------------+
78 * | IN[ins-1]              |  {Note: resides in caller's frame}
79 * |       .                |
80 * | IN[0]                  |
81 * | caller's Method*       |
82 * +========================+  {Note: start of callee's frame}
83 * | return address         |  {pushed by call}
84 * | spill region           |  {variable sized}
85 * +------------------------+
86 * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
87 * +------------------------+
88 * | V[locals-1]            |
89 * | V[locals-2]            |
90 * |      .                 |
91 * |      .                 |
92 * | V[1]                   |
93 * | V[0]                   |
94 * +------------------------+
95 * |  0 to 3 words padding  |
96 * +------------------------+
97 * | OUT[outs-1]            |
98 * | OUT[outs-2]            |
99 * |       .                |
100 * | OUT[0]                 |
101 * | cur_method*            | <<== sp w/ 16-byte alignment
102 * +========================+
103 */
104
105enum X86ResourceEncodingPos {
106  kX86GPReg0   = 0,
107  kX86RegSP    = 4,
108  kX86FPReg0   = 16,  // xmm0 .. xmm7/xmm15.
109  kX86FPRegEnd = 32,
110  kX86FPStack  = 33,
111  kX86RegEnd   = kX86FPStack,
112};
113
114#define ENCODE_X86_REG_SP           (1ULL << kX86RegSP)
115#define ENCODE_X86_FP_STACK         (1ULL << kX86FPStack)
116
117// FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum?
118enum X86NativeRegisterPool {
119  r0             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
120  rAX            = r0,
121  r1             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
122  rCX            = r1,
123  r2             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
124  rDX            = r2,
125  r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
126  rBX            = r3,
127  r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
128  rX86_SP_32     = r4sp_32,
129  r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
130  rX86_SP_64     = r4sp_64,
131  r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
132  rBP            = r5,
133  r5sib_no_base  = r5,
134  r6             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
135  rSI            = r6,
136  r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
137  rDI            = r7,
138#ifndef TARGET_REX_SUPPORT
139  // fake return address register for core spill mask.
140  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
141#else
142  r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
143  r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
144  r10            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
145  r11            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
146  r12            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
147  r13            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
148  r14            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
149  r15            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
150  // fake return address register for core spill mask.
151  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
152#endif
153
154  // xmm registers, single precision view.
155  fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
156  fr1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1,
157  fr2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2,
158  fr3  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3,
159  fr4  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4,
160  fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
161  fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
162  fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
163
164  // xmm registers, double precision aliases.
165  dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
166  dr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1,
167  dr2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2,
168  dr3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3,
169  dr4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4,
170  dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
171  dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
172  dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
173
174  // xmm registers aliases.
175  xr0  = RegStorage::k128BitSolo | 0,
176  xr1  = RegStorage::k128BitSolo | 1,
177  xr2  = RegStorage::k128BitSolo | 2,
178  xr3  = RegStorage::k128BitSolo | 3,
179  xr4  = RegStorage::k128BitSolo | 4,
180  xr5  = RegStorage::k128BitSolo | 5,
181  xr6  = RegStorage::k128BitSolo | 6,
182  xr7  = RegStorage::k128BitSolo | 7,
183
184  // TODO: as needed, add 256, 512 and 1024-bit xmm views.
185};
186
187constexpr RegStorage rs_r0(RegStorage::kValid | r0);
188constexpr RegStorage rs_rAX = rs_r0;
189constexpr RegStorage rs_r1(RegStorage::kValid | r1);
190constexpr RegStorage rs_rCX = rs_r1;
191constexpr RegStorage rs_r2(RegStorage::kValid | r2);
192constexpr RegStorage rs_rDX = rs_r2;
193constexpr RegStorage rs_r3(RegStorage::kValid | r3);
194constexpr RegStorage rs_rBX = rs_r3;
195constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
196constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
197extern RegStorage rs_rX86_SP;
198constexpr RegStorage rs_r5(RegStorage::kValid | r5);
199constexpr RegStorage rs_rBP = rs_r5;
200constexpr RegStorage rs_r6(RegStorage::kValid | r6);
201constexpr RegStorage rs_rSI = rs_r6;
202constexpr RegStorage rs_r7(RegStorage::kValid | r7);
203constexpr RegStorage rs_rDI = rs_r7;
204constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
205
206constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
207constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
208constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
209constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
210constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
211constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
212constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
213constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
214
215constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
216constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
217constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
218constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
219constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
220constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
221constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
222constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
223
224constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
225constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
226constexpr RegStorage rs_xr2(RegStorage::kValid | xr2);
227constexpr RegStorage rs_xr3(RegStorage::kValid | xr3);
228constexpr RegStorage rs_xr4(RegStorage::kValid | xr4);
229constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
230constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
231constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
232
233extern X86NativeRegisterPool rX86_ARG0;
234extern X86NativeRegisterPool rX86_ARG1;
235extern X86NativeRegisterPool rX86_ARG2;
236extern X86NativeRegisterPool rX86_ARG3;
237extern X86NativeRegisterPool rX86_FARG0;
238extern X86NativeRegisterPool rX86_FARG1;
239extern X86NativeRegisterPool rX86_FARG2;
240extern X86NativeRegisterPool rX86_FARG3;
241extern X86NativeRegisterPool rX86_RET0;
242extern X86NativeRegisterPool rX86_RET1;
243extern X86NativeRegisterPool rX86_INVOKE_TGT;
244extern X86NativeRegisterPool rX86_COUNT;
245
246extern RegStorage rs_rX86_ARG0;
247extern RegStorage rs_rX86_ARG1;
248extern RegStorage rs_rX86_ARG2;
249extern RegStorage rs_rX86_ARG3;
250extern RegStorage rs_rX86_FARG0;
251extern RegStorage rs_rX86_FARG1;
252extern RegStorage rs_rX86_FARG2;
253extern RegStorage rs_rX86_FARG3;
254extern RegStorage rs_rX86_RET0;
255extern RegStorage rs_rX86_RET1;
256extern RegStorage rs_rX86_INVOKE_TGT;
257extern RegStorage rs_rX86_COUNT;
258
259// RegisterLocation templates return values (r_V0, or r_V0/r_V1).
260const RegLocation x86_loc_c_return
261    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
262     RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG};
263const RegLocation x86_loc_c_return_wide
264    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
265     RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
266const RegLocation x86_loc_c_return_float
267    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
268     RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
269const RegLocation x86_loc_c_return_double
270    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
271     RegStorage(RegStorage::k64BitSolo, dr0), INVALID_SREG, INVALID_SREG};
272
273/*
274 * The following enum defines the list of supported X86 instructions by the
275 * assembler. Their corresponding EncodingMap positions will be defined in
276 * Assemble.cc.
277 */
278enum X86OpCode {
279  kX86First = 0,
280  kX8632BitData = kX86First,  // data [31..0].
281  kX86Bkpt,
282  kX86Nop,
283  // Define groups of binary operations
284  // MR - Memory Register  - opcode [base + disp], reg
285  //             - lir operands - 0: base, 1: disp, 2: reg
286  // AR - Array Register   - opcode [base + index * scale + disp], reg
287  //             - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
288  // TR - Thread Register  - opcode fs:[disp], reg - where fs: is equal to Thread::Current()
289  //             - lir operands - 0: disp, 1: reg
290  // RR - Register Register  - opcode reg1, reg2
291  //             - lir operands - 0: reg1, 1: reg2
292  // RM - Register Memory  - opcode reg, [base + disp]
293  //             - lir operands - 0: reg, 1: base, 2: disp
294  // RA - Register Array   - opcode reg, [base + index * scale + disp]
295  //             - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
296  // RT - Register Thread  - opcode reg, fs:[disp] - where fs: is equal to Thread::Current()
297  //             - lir operands - 0: reg, 1: disp
298  // RI - Register Immediate - opcode reg, #immediate
299  //             - lir operands - 0: reg, 1: immediate
300  // MI - Memory Immediate   - opcode [base + disp], #immediate
301  //             - lir operands - 0: base, 1: disp, 2: immediate
302  // AI - Array Immediate  - opcode [base + index * scale + disp], #immediate
303  //             - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
304  // TI - Thread Immediate  - opcode fs:[disp], imm - where fs: is equal to Thread::Current()
305  //             - lir operands - 0: disp, 1: imm
306#define BinaryOpCode(opcode) \
307  opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \
308  opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \
309  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \
310  opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \
311  opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
312  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
313  opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
314  opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR,  \
315  opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \
316  opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
317  opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
318  BinaryOpCode(kX86Add),
319  BinaryOpCode(kX86Or),
320  BinaryOpCode(kX86Adc),
321  BinaryOpCode(kX86Sbb),
322  BinaryOpCode(kX86And),
323  BinaryOpCode(kX86Sub),
324  BinaryOpCode(kX86Xor),
325  BinaryOpCode(kX86Cmp),
326#undef BinaryOpCode
327  kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI,
328  kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI,
329  kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8,
330  kX86Mov8MR, kX86Mov8AR, kX86Mov8TR,
331  kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT,
332  kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI,
333  kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
334  kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
335  kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
336  kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR,
337  kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT,
338  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI,
339  kX86Lea32RM,
340  kX86Lea32RA,
341  // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
342  //             - lir operands - 0: reg1, 1: reg2, 2: CC
343  kX86Cmov32RRC,
344  // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp]
345  //             - lir operands - 0: reg1, 1: base, 2: disp 3: CC
346  kX86Cmov32RMC,
347
348  // RC - Register CL - opcode reg, CL
349  //          - lir operands - 0: reg, 1: CL
350  // MC - Memory CL   - opcode [base + disp], CL
351  //          - lir operands - 0: base, 1: disp, 2: CL
352  // AC - Array CL  - opcode [base + index * scale + disp], CL
353  //          - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL
354#define BinaryShiftOpCode(opcode) \
355  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \
356  opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \
357  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \
358  opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \
359  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \
360  opcode ## 32RC, opcode ## 32MC, opcode ## 32AC
361  BinaryShiftOpCode(kX86Rol),
362  BinaryShiftOpCode(kX86Ror),
363  BinaryShiftOpCode(kX86Rcl),
364  BinaryShiftOpCode(kX86Rcr),
365  BinaryShiftOpCode(kX86Sal),
366  BinaryShiftOpCode(kX86Shr),
367  BinaryShiftOpCode(kX86Sar),
368#undef BinaryShiftOpcode
369  kX86Cmc,
370  kX86Shld32RRI,
371  kX86Shld32MRI,
372  kX86Shrd32RRI,
373  kX86Shrd32MRI,
374#define UnaryOpcode(opcode, reg, mem, array) \
375  opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
376  opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
377  opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array
378  UnaryOpcode(kX86Test, RI, MI, AI),
379  kX86Test32RR,
380  UnaryOpcode(kX86Not, R, M, A),
381  UnaryOpcode(kX86Neg, R, M, A),
382  UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
383  UnaryOpcode(kX86Imul, DaR, DaM, DaA),
384  UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
385  UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
386  kx86Cdq32Da,
387  kX86Bswap32R,
388  kX86Push32R, kX86Pop32R,
389#undef UnaryOpcode
390#define Binary0fOpCode(opcode) \
391  opcode ## RR, opcode ## RM, opcode ## RA
392  Binary0fOpCode(kX86Movsd),
393  kX86MovsdMR,
394  kX86MovsdAR,
395  Binary0fOpCode(kX86Movss),
396  kX86MovssMR,
397  kX86MovssAR,
398  Binary0fOpCode(kX86Cvtsi2sd),  // int to double
399  Binary0fOpCode(kX86Cvtsi2ss),  // int to float
400  Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
401  Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
402  Binary0fOpCode(kX86Cvtsd2si),  // rounding double to int
403  Binary0fOpCode(kX86Cvtss2si),  // rounding float to int
404  Binary0fOpCode(kX86Ucomisd),  // unordered double compare
405  Binary0fOpCode(kX86Ucomiss),  // unordered float compare
406  Binary0fOpCode(kX86Comisd),   // double compare
407  Binary0fOpCode(kX86Comiss),   // float compare
408  Binary0fOpCode(kX86Orps),     // or of floating point registers
409  Binary0fOpCode(kX86Xorps),    // xor of floating point registers
410  Binary0fOpCode(kX86Addsd),    // double add
411  Binary0fOpCode(kX86Addss),    // float add
412  Binary0fOpCode(kX86Mulsd),    // double multiply
413  Binary0fOpCode(kX86Mulss),    // float multiply
414  Binary0fOpCode(kX86Cvtsd2ss),  // double to float
415  Binary0fOpCode(kX86Cvtss2sd),  // float to double
416  Binary0fOpCode(kX86Subsd),    // double subtract
417  Binary0fOpCode(kX86Subss),    // float subtract
418  Binary0fOpCode(kX86Divsd),    // double divide
419  Binary0fOpCode(kX86Divss),    // float divide
420  Binary0fOpCode(kX86Punpckldq),  // Interleave low-order double words
421  Binary0fOpCode(kX86Sqrtsd),   // square root
422  Binary0fOpCode(kX86Pmulld),   // parallel integer multiply 32 bits x 4
423  Binary0fOpCode(kX86Pmullw),   // parallel integer multiply 16 bits x 8
424  Binary0fOpCode(kX86Mulps),    // parallel FP multiply 32 bits x 4
425  Binary0fOpCode(kX86Mulpd),    // parallel FP multiply 64 bits x 2
426  Binary0fOpCode(kX86Paddb),    // parallel integer addition 8 bits x 16
427  Binary0fOpCode(kX86Paddw),    // parallel integer addition 16 bits x 8
428  Binary0fOpCode(kX86Paddd),    // parallel integer addition 32 bits x 4
429  Binary0fOpCode(kX86Addps),    // parallel FP addition 32 bits x 4
430  Binary0fOpCode(kX86Addpd),    // parallel FP addition 64 bits x 2
431  Binary0fOpCode(kX86Psubb),    // parallel integer subtraction 8 bits x 16
432  Binary0fOpCode(kX86Psubw),    // parallel integer subtraction 16 bits x 8
433  Binary0fOpCode(kX86Psubd),    // parallel integer subtraction 32 bits x 4
434  Binary0fOpCode(kX86Subps),    // parallel FP subtraction 32 bits x 4
435  Binary0fOpCode(kX86Subpd),    // parallel FP subtraction 64 bits x 2
436  Binary0fOpCode(kX86Pand),     // parallel AND 128 bits x 1
437  Binary0fOpCode(kX86Por),      // parallel OR 128 bits x 1
438  Binary0fOpCode(kX86Pxor),     // parallel XOR 128 bits x 1
439  Binary0fOpCode(kX86Phaddw),   // parallel horizontal addition 16 bits x 8
440  Binary0fOpCode(kX86Phaddd),   // parallel horizontal addition 32 bits x 4
441  kX86PextrbRRI,                // Extract 8 bits from XMM into GPR
442  kX86PextrwRRI,                // Extract 16 bits from XMM into GPR
443  kX86PextrdRRI,                // Extract 32 bits from XMM into GPR
444  kX86PshuflwRRI,               // Shuffle 16 bits in lower 64 bits of XMM.
445  kX86PshufdRRI,                // Shuffle 32 bits in XMM.
446  kX86PsrawRI,                  // signed right shift of floating point registers 16 bits x 8
447  kX86PsradRI,                  // signed right shift of floating point registers 32 bits x 4
448  kX86PsrlwRI,                  // logical right shift of floating point registers 16 bits x 8
449  kX86PsrldRI,                  // logical right shift of floating point registers 32 bits x 4
450  kX86PsrlqRI,                  // logical right shift of floating point registers 64 bits x 2
451  kX86PsllwRI,                  // left shift of floating point registers 16 bits x 8
452  kX86PslldRI,                  // left shift of floating point registers 32 bits x 4
453  kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
454  kX86Fild32M,                  // push 32-bit integer on x87 stack
455  kX86Fild64M,                  // push 64-bit integer on x87 stack
456  kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
457  kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
458  Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
459  kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
460  Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
461  kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
462  Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
463  kX86MovapsMR, kX86MovapsAR,   // store aligned packed single FP values from xmm1 to m128
464  kX86MovlpsRM, kX86MovlpsRA,   // load packed single FP values from m64 to low quadword of xmm
465  kX86MovlpsMR, kX86MovlpsAR,   // store packed single FP values from low quadword of xmm to m64
466  kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
467  kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
468  Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
469  kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
470  kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
471  kX86Mfence,                   // memory barrier
472  Binary0fOpCode(kX86Imul16),   // 16bit multiply
473  Binary0fOpCode(kX86Imul32),   // 32bit multiply
474  kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
475  kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
476  kX86LockCmpxchg8bM, kX86LockCmpxchg8bA,  // locked compare and exchange
477  kX86XchgMR,  // exchange memory with register (automatically locked)
478  Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
479  Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
480  Binary0fOpCode(kX86Movsx8),   // sign-extend 8-bit value
481  Binary0fOpCode(kX86Movsx16),  // sign-extend 16-bit value
482#undef Binary0fOpCode
483  kX86Jcc8, kX86Jcc32,  // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
484  kX86Jmp8, kX86Jmp32,  // jmp rel8/32; lir operands - 0: rel, target assigned
485  kX86JmpR,             // jmp reg; lir operands - 0: reg
486  kX86Jecxz8,           // jcexz rel8; jump relative if ECX is zero.
487  kX86JmpT,             // jmp fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
488
489  kX86CallR,            // call reg; lir operands - 0: reg
490  kX86CallM,            // call [base + disp]; lir operands - 0: base, 1: disp
491  kX86CallA,            // call [base + index * scale + disp]
492                        // lir operands - 0: base, 1: index, 2: scale, 3: disp
493  kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
494  kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
495  kX86Ret,              // ret; no lir operands
496  kX86StartOfMethod,    // call 0; pop reg; sub reg, # - generate start of method into reg
497                        // lir operands - 0: reg
498  kX86PcRelLoadRA,      // mov reg, [base + index * scale + PC relative displacement]
499                        // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
500  kX86PcRelAdr,         // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
501  kX86RepneScasw,       // repne scasw
502  kX86Last
503};
504
505/* Instruction assembly field_loc kind */
506enum X86EncodingKind {
507  kData,                                   // Special case for raw data.
508  kNop,                                    // Special case for variable length nop.
509  kNullary,                                // Opcode that takes no arguments.
510  kPrefix2Nullary,                         // Opcode that takes no arguments, but 2 prefixes.
511  kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
512  kReg, kReg64, kMem, kArray,              // R, M and A instruction kinds.
513  kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg,          // MR, AR and TR instruction kinds.
514  kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread,  // RR, RM, RA and RT instruction kinds.
515  kRegRegStore,                            // RR following the store modrm reg-reg encoding rather than the load.
516  kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
517  kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
518  kMovRegImm,                              // Shorter form move RI.
519  kRegRegImmRev,                           // RRI with first reg in r/m
520  kMemRegImm,                              // MRI instruction kinds.
521  kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
522  kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
523  kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
524  kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
525  kRegRegCond,                             // RR instruction kind followed by a condition.
526  kRegMemCond,                             // RM instruction kind followed by a condition.
527  kJmp, kJcc, kCall,                       // Branch instruction kinds.
528  kPcRel,                                  // Operation with displacement that is PC relative
529  kMacro,                                  // An instruction composing multiple others
530  kUnimplemented                           // Encoding used when an instruction isn't yet implemented.
531};
532
533/* Struct used to define the EncodingMap positions for each X86 opcode */
534struct X86EncodingMap {
535  X86OpCode opcode;      // e.g. kOpAddRI
536  X86EncodingKind kind;  // Used to discriminate in the union below
537  uint64_t flags;
538  struct {
539  uint8_t prefix1;       // non-zero => a prefix byte
540  uint8_t prefix2;       // non-zero => a second prefix byte
541  uint8_t opcode;        // 1 byte opcode
542  uint8_t extra_opcode1;  // possible extra opcode byte
543  uint8_t extra_opcode2;  // possible second extra opcode byte
544  // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
545  // encoding kind
546  uint8_t modrm_opcode;
547  uint8_t ax_opcode;  // non-zero => shorter encoding for AX as a destination
548  uint8_t immediate_bytes;  // number of bytes of immediate
549  } skeleton;
550  const char *name;
551  const char* fmt;
552};
553
554
555// FIXME: mem barrier type - what do we do for x86?
556#define kSY 0
557#define kST 0
558
559// Offsets of high and low halves of a 64bit value.
560#define LOWORD_OFFSET 0
561#define HIWORD_OFFSET 4
562
563// Segment override instruction prefix used for quick TLS access to Thread::Current().
564#define THREAD_PREFIX 0x64
565#define THREAD_PREFIX_GS 0x65
566
567// 64 Bit Operand Size
568#define REX_W 0x48
569// Extension of the ModR/M reg field
570
571#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
572#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
573
574extern X86EncodingMap EncodingMap[kX86Last];
575extern X86ConditionCode X86ConditionEncoding(ConditionCode cond);
576
577}  // namespace art
578
579#endif  // ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
580