x86_lir.h revision 1c55703526827b5fc63f5d4b8477f36574649342
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
18#define ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
19
20#include "dex/compiler_internals.h"
21
22namespace art {
23
24/*
25 * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64). The ABI
26 * has different conventions and we capture those here. Changing something that is callee save and
27 * making it caller save places a burden on up-calls to save/restore the callee save register,
28 * however, there are few registers that are callee save in the ABI. Changing something that is
29 * caller save and making it callee save places a burden on down-calls to save/restore the callee
30 * save register. For these reasons we aim to match native conventions for caller and callee save.
31 * On x86 only the first 4 registers can be used for byte operations, for this reason they are
32 * preferred for temporary scratch registers.
33 *
34 * General Purpose Register:
35 *  Native: x86    | x86-64 / x32 | ART x86                                         | ART x86-64
36 *  r0/eax: caller | caller       | caller, Method*, scratch, return value          | caller, scratch, return value
37 *  r1/ecx: caller | caller, arg4 | caller, arg1, scratch                           | caller, arg3, scratch
38 *  r2/edx: caller | caller, arg3 | caller, arg2, scratch, high half of long return | caller, arg2, scratch
39 *  r3/ebx: callEE | callEE       | callER, arg3, scratch                           | callee, promotable
40 *  r4/esp: stack pointer
41 *  r5/ebp: callee | callee       | callee, promotable                              | callee, promotable
42 *  r6/esi: callEE | callER, arg2 | callee, promotable                              | caller, arg1, scratch
43 *  r7/edi: callEE | callER, arg1 | callee, promotable                              | caller, Method*, scratch
44 *  ---  x86-64/x32 registers
45 *  Native: x86-64 / x32      | ART
46 *  r8:     caller save, arg5 | caller, arg4, scratch
47 *  r9:     caller save, arg6 | caller, arg5, scratch
48 *  r10:    caller save       | caller, scratch
49 *  r11:    caller save       | caller, scratch
50 *  r12:    callee save       | callee, available for register promotion (promotable)
51 *  r13:    callee save       | callee, available for register promotion (promotable)
52 *  r14:    callee save       | callee, available for register promotion (promotable)
53 *  r15:    callee save       | callee, available for register promotion (promotable)
54 *
55 * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on
56 * x86-64/x32 gs: holds it.
57 *
58 * For floating point we don't support CPUs without SSE2 support (ie newer than PIII):
59 *  Native: x86  | x86-64 / x32 | ART x86                    | ART x86-64
60 *  XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value
61 *  XMM1: caller | caller, arg2 | caller, scratch            | caller, arg2, scratch
62 *  XMM2: caller | caller, arg3 | caller, scratch            | caller, arg3, scratch
63 *  XMM3: caller | caller, arg4 | caller, scratch            | caller, arg4, scratch
64 *  XMM4: caller | caller, arg5 | caller, scratch            | caller, arg5, scratch
65 *  XMM5: caller | caller, arg6 | caller, scratch            | caller, arg6, scratch
66 *  XMM6: caller | caller, arg7 | caller, scratch            | caller, arg7, scratch
67 *  XMM7: caller | caller, arg8 | caller, scratch            | caller, arg8, scratch
68 *  ---  x86-64/x32 registers
69 *  XMM8 .. 15: caller save available as scratch registers for ART.
70 *
71 * X87 is a necessary evil outside of ART code for x86:
72 *  ST0:  x86 float/double native return value, caller save
73 *  ST1 .. ST7: caller save
74 *
75 *  Stack frame diagram (stack grows down, higher addresses at top):
76 *
77 * +------------------------+
78 * | IN[ins-1]              |  {Note: resides in caller's frame}
79 * |       .                |
80 * | IN[0]                  |
81 * | caller's Method*       |
82 * +========================+  {Note: start of callee's frame}
83 * | return address         |  {pushed by call}
84 * | spill region           |  {variable sized}
85 * +------------------------+
86 * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
87 * +------------------------+
88 * | V[locals-1]            |
89 * | V[locals-2]            |
90 * |      .                 |
91 * |      .                 |
92 * | V[1]                   |
93 * | V[0]                   |
94 * +------------------------+
95 * |  0 to 3 words padding  |
96 * +------------------------+
97 * | OUT[outs-1]            |
98 * | OUT[outs-2]            |
99 * |       .                |
100 * | OUT[0]                 |
101 * | cur_method*            | <<== sp w/ 16-byte alignment
102 * +========================+
103 */
104
105enum X86ResourceEncodingPos {
106  kX86GPReg0   = 0,
107  kX86RegSP    = 4,
108  kX86FPReg0   = 16,  // xmm0 .. xmm7/xmm15.
109  kX86FPRegEnd = 32,
110  kX86FPStack  = 33,
111  kX86RegEnd   = kX86FPStack,
112};
113
114// FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum?
115enum X86NativeRegisterPool {
116  r0             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0,
117  r0q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 0,
118  rAX            = r0,
119  r1             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1,
120  r1q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 1,
121  rCX            = r1,
122  r2             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2,
123  r2q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 2,
124  rDX            = r2,
125  r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
126  r3q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 3,
127  rBX            = r3,
128  r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
129  rX86_SP_32     = r4sp_32,
130  r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
131  rX86_SP_64     = r4sp_64,
132  r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
133  r5q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 5,
134  rBP            = r5,
135  r5sib_no_base  = r5,
136  r6             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6,
137  r6q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 6,
138  rSI            = r6,
139  r7             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7,
140  r7q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 7,
141  rDI            = r7,
142  r8             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8,
143  r8q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 8,
144  r9             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9,
145  r9q            = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 9,
146  r10            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10,
147  r10q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 10,
148  r11            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11,
149  r11q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 11,
150  r12            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12,
151  r12q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 12,
152  r13            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13,
153  r13q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 13,
154  r14            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14,
155  r14q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 14,
156  r15            = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15,
157  r15q           = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 15,
158  // fake return address register for core spill mask.
159  rRET           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16,
160
161  // xmm registers, single precision view.
162  fr0  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0,
163  fr1  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1,
164  fr2  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2,
165  fr3  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3,
166  fr4  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4,
167  fr5  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5,
168  fr6  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6,
169  fr7  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7,
170  fr8  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8,
171  fr9  = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9,
172  fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10,
173  fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11,
174  fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12,
175  fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13,
176  fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14,
177  fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15,
178
179  // xmm registers, double precision aliases.
180  dr0  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0,
181  dr1  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1,
182  dr2  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2,
183  dr3  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3,
184  dr4  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4,
185  dr5  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5,
186  dr6  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6,
187  dr7  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7,
188  dr8  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8,
189  dr9  = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9,
190  dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10,
191  dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11,
192  dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12,
193  dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13,
194  dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14,
195  dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15,
196
197  // xmm registers, quad precision aliases
198  xr0  = RegStorage::k128BitSolo | 0,
199  xr1  = RegStorage::k128BitSolo | 1,
200  xr2  = RegStorage::k128BitSolo | 2,
201  xr3  = RegStorage::k128BitSolo | 3,
202  xr4  = RegStorage::k128BitSolo | 4,
203  xr5  = RegStorage::k128BitSolo | 5,
204  xr6  = RegStorage::k128BitSolo | 6,
205  xr7  = RegStorage::k128BitSolo | 7,
206  xr8  = RegStorage::k128BitSolo | 8,
207  xr9  = RegStorage::k128BitSolo | 9,
208  xr10 = RegStorage::k128BitSolo | 10,
209  xr11 = RegStorage::k128BitSolo | 11,
210  xr12 = RegStorage::k128BitSolo | 12,
211  xr13 = RegStorage::k128BitSolo | 13,
212  xr14 = RegStorage::k128BitSolo | 14,
213  xr15 = RegStorage::k128BitSolo | 15,
214
215  // TODO: as needed, add 256, 512 and 1024-bit xmm views.
216};
217
218constexpr RegStorage rs_r0(RegStorage::kValid | r0);
219constexpr RegStorage rs_r0q(RegStorage::kValid | r0q);
220constexpr RegStorage rs_rAX = rs_r0;
221constexpr RegStorage rs_r1(RegStorage::kValid | r1);
222constexpr RegStorage rs_r1q(RegStorage::kValid | r1q);
223constexpr RegStorage rs_rCX = rs_r1;
224constexpr RegStorage rs_r2(RegStorage::kValid | r2);
225constexpr RegStorage rs_r2q(RegStorage::kValid | r2q);
226constexpr RegStorage rs_rDX = rs_r2;
227constexpr RegStorage rs_r3(RegStorage::kValid | r3);
228constexpr RegStorage rs_r3q(RegStorage::kValid | r3q);
229constexpr RegStorage rs_rBX = rs_r3;
230constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
231constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
232extern RegStorage rs_rX86_SP;
233constexpr RegStorage rs_r5(RegStorage::kValid | r5);
234constexpr RegStorage rs_r5q(RegStorage::kValid | r5q);
235constexpr RegStorage rs_rBP = rs_r5;
236constexpr RegStorage rs_r6(RegStorage::kValid | r6);
237constexpr RegStorage rs_r6q(RegStorage::kValid | r6q);
238constexpr RegStorage rs_rSI = rs_r6;
239constexpr RegStorage rs_r7(RegStorage::kValid | r7);
240constexpr RegStorage rs_r7q(RegStorage::kValid | r7q);
241constexpr RegStorage rs_rDI = rs_r7;
242constexpr RegStorage rs_rRET(RegStorage::kValid | rRET);
243constexpr RegStorage rs_r8(RegStorage::kValid | r8);
244constexpr RegStorage rs_r8q(RegStorage::kValid | r8q);
245constexpr RegStorage rs_r9(RegStorage::kValid | r9);
246constexpr RegStorage rs_r9q(RegStorage::kValid | r9q);
247constexpr RegStorage rs_r10(RegStorage::kValid | r10);
248constexpr RegStorage rs_r10q(RegStorage::kValid | r10q);
249constexpr RegStorage rs_r11(RegStorage::kValid | r11);
250constexpr RegStorage rs_r11q(RegStorage::kValid | r11q);
251constexpr RegStorage rs_r12(RegStorage::kValid | r12);
252constexpr RegStorage rs_r12q(RegStorage::kValid | r12q);
253constexpr RegStorage rs_r13(RegStorage::kValid | r13);
254constexpr RegStorage rs_r13q(RegStorage::kValid | r13q);
255constexpr RegStorage rs_r14(RegStorage::kValid | r14);
256constexpr RegStorage rs_r14q(RegStorage::kValid | r14q);
257constexpr RegStorage rs_r15(RegStorage::kValid | r15);
258constexpr RegStorage rs_r15q(RegStorage::kValid | r15q);
259
260constexpr RegStorage rs_fr0(RegStorage::kValid | fr0);
261constexpr RegStorage rs_fr1(RegStorage::kValid | fr1);
262constexpr RegStorage rs_fr2(RegStorage::kValid | fr2);
263constexpr RegStorage rs_fr3(RegStorage::kValid | fr3);
264constexpr RegStorage rs_fr4(RegStorage::kValid | fr4);
265constexpr RegStorage rs_fr5(RegStorage::kValid | fr5);
266constexpr RegStorage rs_fr6(RegStorage::kValid | fr6);
267constexpr RegStorage rs_fr7(RegStorage::kValid | fr7);
268constexpr RegStorage rs_fr8(RegStorage::kValid | fr8);
269constexpr RegStorage rs_fr9(RegStorage::kValid | fr9);
270constexpr RegStorage rs_fr10(RegStorage::kValid | fr10);
271constexpr RegStorage rs_fr11(RegStorage::kValid | fr11);
272constexpr RegStorage rs_fr12(RegStorage::kValid | fr12);
273constexpr RegStorage rs_fr13(RegStorage::kValid | fr13);
274constexpr RegStorage rs_fr14(RegStorage::kValid | fr14);
275constexpr RegStorage rs_fr15(RegStorage::kValid | fr15);
276
277constexpr RegStorage rs_dr0(RegStorage::kValid | dr0);
278constexpr RegStorage rs_dr1(RegStorage::kValid | dr1);
279constexpr RegStorage rs_dr2(RegStorage::kValid | dr2);
280constexpr RegStorage rs_dr3(RegStorage::kValid | dr3);
281constexpr RegStorage rs_dr4(RegStorage::kValid | dr4);
282constexpr RegStorage rs_dr5(RegStorage::kValid | dr5);
283constexpr RegStorage rs_dr6(RegStorage::kValid | dr6);
284constexpr RegStorage rs_dr7(RegStorage::kValid | dr7);
285constexpr RegStorage rs_dr8(RegStorage::kValid | dr8);
286constexpr RegStorage rs_dr9(RegStorage::kValid | dr9);
287constexpr RegStorage rs_dr10(RegStorage::kValid | dr10);
288constexpr RegStorage rs_dr11(RegStorage::kValid | dr11);
289constexpr RegStorage rs_dr12(RegStorage::kValid | dr12);
290constexpr RegStorage rs_dr13(RegStorage::kValid | dr13);
291constexpr RegStorage rs_dr14(RegStorage::kValid | dr14);
292constexpr RegStorage rs_dr15(RegStorage::kValid | dr15);
293
294constexpr RegStorage rs_xr0(RegStorage::kValid | xr0);
295constexpr RegStorage rs_xr1(RegStorage::kValid | xr1);
296constexpr RegStorage rs_xr2(RegStorage::kValid | xr2);
297constexpr RegStorage rs_xr3(RegStorage::kValid | xr3);
298constexpr RegStorage rs_xr4(RegStorage::kValid | xr4);
299constexpr RegStorage rs_xr5(RegStorage::kValid | xr5);
300constexpr RegStorage rs_xr6(RegStorage::kValid | xr6);
301constexpr RegStorage rs_xr7(RegStorage::kValid | xr7);
302constexpr RegStorage rs_xr8(RegStorage::kValid | xr8);
303constexpr RegStorage rs_xr9(RegStorage::kValid | xr9);
304constexpr RegStorage rs_xr10(RegStorage::kValid | xr10);
305constexpr RegStorage rs_xr11(RegStorage::kValid | xr11);
306constexpr RegStorage rs_xr12(RegStorage::kValid | xr12);
307constexpr RegStorage rs_xr13(RegStorage::kValid | xr13);
308constexpr RegStorage rs_xr14(RegStorage::kValid | xr14);
309constexpr RegStorage rs_xr15(RegStorage::kValid | xr15);
310
311extern X86NativeRegisterPool rX86_ARG0;
312extern X86NativeRegisterPool rX86_ARG1;
313extern X86NativeRegisterPool rX86_ARG2;
314extern X86NativeRegisterPool rX86_ARG3;
315extern X86NativeRegisterPool rX86_ARG4;
316extern X86NativeRegisterPool rX86_ARG5;
317extern X86NativeRegisterPool rX86_FARG0;
318extern X86NativeRegisterPool rX86_FARG1;
319extern X86NativeRegisterPool rX86_FARG2;
320extern X86NativeRegisterPool rX86_FARG3;
321extern X86NativeRegisterPool rX86_FARG4;
322extern X86NativeRegisterPool rX86_FARG5;
323extern X86NativeRegisterPool rX86_FARG6;
324extern X86NativeRegisterPool rX86_FARG7;
325extern X86NativeRegisterPool rX86_RET0;
326extern X86NativeRegisterPool rX86_RET1;
327extern X86NativeRegisterPool rX86_INVOKE_TGT;
328extern X86NativeRegisterPool rX86_COUNT;
329
330extern RegStorage rs_rX86_ARG0;
331extern RegStorage rs_rX86_ARG1;
332extern RegStorage rs_rX86_ARG2;
333extern RegStorage rs_rX86_ARG3;
334extern RegStorage rs_rX86_ARG4;
335extern RegStorage rs_rX86_ARG5;
336extern RegStorage rs_rX86_FARG0;
337extern RegStorage rs_rX86_FARG1;
338extern RegStorage rs_rX86_FARG2;
339extern RegStorage rs_rX86_FARG3;
340extern RegStorage rs_rX86_FARG4;
341extern RegStorage rs_rX86_FARG5;
342extern RegStorage rs_rX86_FARG6;
343extern RegStorage rs_rX86_FARG7;
344extern RegStorage rs_rX86_RET0;
345extern RegStorage rs_rX86_RET1;
346extern RegStorage rs_rX86_INVOKE_TGT;
347extern RegStorage rs_rX86_COUNT;
348
349// RegisterLocation templates return values (r_V0, or r_V0/r_V1).
350const RegLocation x86_loc_c_return
351    {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1,
352     RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG};
353const RegLocation x86_loc_c_return_wide
354    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
355     RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
356const RegLocation x86_64_loc_c_return_wide
357    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
358     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
359const RegLocation x86_loc_c_return_float
360    {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
361     RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
362const RegLocation x86_loc_c_return_double
363    {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1,
364     RegStorage(RegStorage::k64BitSolo, dr0), INVALID_SREG, INVALID_SREG};
365
366/*
367 * The following enum defines the list of supported X86 instructions by the
368 * assembler. Their corresponding EncodingMap positions will be defined in
369 * Assemble.cc.
370 */
371enum X86OpCode {
372  kX86First = 0,
373  kX8632BitData = kX86First,  // data [31..0].
374  kX86Bkpt,
375  kX86Nop,
376  // Define groups of binary operations
377  // MR - Memory Register  - opcode [base + disp], reg
378  //             - lir operands - 0: base, 1: disp, 2: reg
379  // AR - Array Register   - opcode [base + index * scale + disp], reg
380  //             - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
381  // TR - Thread Register  - opcode fs:[disp], reg - where fs: is equal to Thread::Current()
382  //             - lir operands - 0: disp, 1: reg
383  // RR - Register Register  - opcode reg1, reg2
384  //             - lir operands - 0: reg1, 1: reg2
385  // RM - Register Memory  - opcode reg, [base + disp]
386  //             - lir operands - 0: reg, 1: base, 2: disp
387  // RA - Register Array   - opcode reg, [base + index * scale + disp]
388  //             - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
389  // RT - Register Thread  - opcode reg, fs:[disp] - where fs: is equal to Thread::Current()
390  //             - lir operands - 0: reg, 1: disp
391  // RI - Register Immediate - opcode reg, #immediate
392  //             - lir operands - 0: reg, 1: immediate
393  // MI - Memory Immediate   - opcode [base + disp], #immediate
394  //             - lir operands - 0: base, 1: disp, 2: immediate
395  // AI - Array Immediate  - opcode [base + index * scale + disp], #immediate
396  //             - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate
397  // TI - Thread Immediate  - opcode fs:[disp], imm - where fs: is equal to Thread::Current()
398  //             - lir operands - 0: disp, 1: imm
399#define BinaryOpCode(opcode) \
400  opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \
401  opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \
402  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \
403  opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \
404  opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
405  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
406  opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
407  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
408  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
409  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
410  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8, \
411  opcode ## 64MR, opcode ## 64AR, opcode ## 64TR,  \
412  opcode ## 64RR, opcode ## 64RM, opcode ## 64RA, opcode ## 64RT, \
413  opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, opcode ## 64TI, \
414  opcode ## 64RI8, opcode ## 64MI8, opcode ## 64AI8, opcode ## 64TI8
415  BinaryOpCode(kX86Add),
416  BinaryOpCode(kX86Or),
417  BinaryOpCode(kX86Adc),
418  BinaryOpCode(kX86Sbb),
419  BinaryOpCode(kX86And),
420  BinaryOpCode(kX86Sub),
421  BinaryOpCode(kX86Xor),
422  BinaryOpCode(kX86Cmp),
423#undef BinaryOpCode
424  kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI,
425  kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI,
426  kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8,
427  kX86Imul64RRI, kX86Imul64RMI, kX86Imul64RAI,
428  kX86Imul64RRI8, kX86Imul64RMI8, kX86Imul64RAI8,
429  kX86Mov8MR, kX86Mov8AR, kX86Mov8TR,
430  kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT,
431  kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI,
432  kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
433  kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
434  kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
435  kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
436  kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
437  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
438  kX86Lea32RM,
439  kX86Lea32RA,
440  kX86Mov64MR, kX86Mov64AR, kX86Mov64TR,
441  kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT,
442  kX86Mov64RI, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI,
443  kX86Lea64RM,
444  kX86Lea64RA,
445  // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
446  //             - lir operands - 0: reg1, 1: reg2, 2: CC
447  kX86Cmov32RRC,
448  kX86Cmov64RRC,
449  // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp]
450  //             - lir operands - 0: reg1, 1: base, 2: disp 3: CC
451  kX86Cmov32RMC,
452  kX86Cmov64RMC,
453
454  // RC - Register CL - opcode reg, CL
455  //          - lir operands - 0: reg, 1: CL
456  // MC - Memory CL   - opcode [base + disp], CL
457  //          - lir operands - 0: base, 1: disp, 2: CL
458  // AC - Array CL  - opcode [base + index * scale + disp], CL
459  //          - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL
460#define BinaryShiftOpCode(opcode) \
461  opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \
462  opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \
463  opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \
464  opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \
465  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \
466  opcode ## 32RC, opcode ## 32MC, opcode ## 32AC, \
467  opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, \
468  opcode ## 64RC, opcode ## 64MC, opcode ## 64AC
469  BinaryShiftOpCode(kX86Rol),
470  BinaryShiftOpCode(kX86Ror),
471  BinaryShiftOpCode(kX86Rcl),
472  BinaryShiftOpCode(kX86Rcr),
473  BinaryShiftOpCode(kX86Sal),
474  BinaryShiftOpCode(kX86Shr),
475  BinaryShiftOpCode(kX86Sar),
476#undef BinaryShiftOpcode
477  kX86Cmc,
478  kX86Shld32RRI,
479  kX86Shld32MRI,
480  kX86Shrd32RRI,
481  kX86Shrd32MRI,
482  kX86Shld64RRI,
483  kX86Shld64MRI,
484  kX86Shrd64RRI,
485  kX86Shrd64MRI,
486#define UnaryOpcode(opcode, reg, mem, array) \
487  opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \
488  opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \
489  opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array, \
490  opcode ## 64 ## reg, opcode ## 64 ## mem, opcode ## 64 ## array
491  UnaryOpcode(kX86Test, RI, MI, AI),
492  kX86Test32RR,
493  kX86Test64RR,
494  UnaryOpcode(kX86Not, R, M, A),
495  UnaryOpcode(kX86Neg, R, M, A),
496  UnaryOpcode(kX86Mul,  DaR, DaM, DaA),
497  UnaryOpcode(kX86Imul, DaR, DaM, DaA),
498  UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
499  UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
500  kx86Cdq32Da,
501  kx86Cqo64Da,
502  kX86Bswap32R,
503  kX86Push32R, kX86Pop32R,
504#undef UnaryOpcode
505#define Binary0fOpCode(opcode) \
506  opcode ## RR, opcode ## RM, opcode ## RA
507  Binary0fOpCode(kX86Movsd),
508  kX86MovsdMR,
509  kX86MovsdAR,
510  Binary0fOpCode(kX86Movss),
511  kX86MovssMR,
512  kX86MovssAR,
513  Binary0fOpCode(kX86Cvtsi2sd),  // int to double
514  Binary0fOpCode(kX86Cvtsi2ss),  // int to float
515  Binary0fOpCode(kX86Cvtsqi2sd),  // long to double
516  Binary0fOpCode(kX86Cvtsqi2ss),  // long to float
517  Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
518  Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
519  Binary0fOpCode(kX86Cvttsd2sqi),  // truncating double to long
520  Binary0fOpCode(kX86Cvttss2sqi),  // truncating float to long
521  Binary0fOpCode(kX86Cvtsd2si),  // rounding double to int
522  Binary0fOpCode(kX86Cvtss2si),  // rounding float to int
523  Binary0fOpCode(kX86Ucomisd),  // unordered double compare
524  Binary0fOpCode(kX86Ucomiss),  // unordered float compare
525  Binary0fOpCode(kX86Comisd),   // double compare
526  Binary0fOpCode(kX86Comiss),   // float compare
527  Binary0fOpCode(kX86Orps),     // or of floating point registers
528  Binary0fOpCode(kX86Xorps),    // xor of floating point registers
529  Binary0fOpCode(kX86Addsd),    // double add
530  Binary0fOpCode(kX86Addss),    // float add
531  Binary0fOpCode(kX86Mulsd),    // double multiply
532  Binary0fOpCode(kX86Mulss),    // float multiply
533  Binary0fOpCode(kX86Cvtsd2ss),  // double to float
534  Binary0fOpCode(kX86Cvtss2sd),  // float to double
535  Binary0fOpCode(kX86Subsd),    // double subtract
536  Binary0fOpCode(kX86Subss),    // float subtract
537  Binary0fOpCode(kX86Divsd),    // double divide
538  Binary0fOpCode(kX86Divss),    // float divide
539  Binary0fOpCode(kX86Punpckldq),  // Interleave low-order double words
540  Binary0fOpCode(kX86Sqrtsd),   // square root
541  Binary0fOpCode(kX86Pmulld),   // parallel integer multiply 32 bits x 4
542  Binary0fOpCode(kX86Pmullw),   // parallel integer multiply 16 bits x 8
543  Binary0fOpCode(kX86Mulps),    // parallel FP multiply 32 bits x 4
544  Binary0fOpCode(kX86Mulpd),    // parallel FP multiply 64 bits x 2
545  Binary0fOpCode(kX86Paddb),    // parallel integer addition 8 bits x 16
546  Binary0fOpCode(kX86Paddw),    // parallel integer addition 16 bits x 8
547  Binary0fOpCode(kX86Paddd),    // parallel integer addition 32 bits x 4
548  Binary0fOpCode(kX86Addps),    // parallel FP addition 32 bits x 4
549  Binary0fOpCode(kX86Addpd),    // parallel FP addition 64 bits x 2
550  Binary0fOpCode(kX86Psubb),    // parallel integer subtraction 8 bits x 16
551  Binary0fOpCode(kX86Psubw),    // parallel integer subtraction 16 bits x 8
552  Binary0fOpCode(kX86Psubd),    // parallel integer subtraction 32 bits x 4
553  Binary0fOpCode(kX86Subps),    // parallel FP subtraction 32 bits x 4
554  Binary0fOpCode(kX86Subpd),    // parallel FP subtraction 64 bits x 2
555  Binary0fOpCode(kX86Pand),     // parallel AND 128 bits x 1
556  Binary0fOpCode(kX86Por),      // parallel OR 128 bits x 1
557  Binary0fOpCode(kX86Pxor),     // parallel XOR 128 bits x 1
558  Binary0fOpCode(kX86Phaddw),   // parallel horizontal addition 16 bits x 8
559  Binary0fOpCode(kX86Phaddd),   // parallel horizontal addition 32 bits x 4
560  kX86PextrbRRI,                // Extract 8 bits from XMM into GPR
561  kX86PextrwRRI,                // Extract 16 bits from XMM into GPR
562  kX86PextrdRRI,                // Extract 32 bits from XMM into GPR
563  kX86PshuflwRRI,               // Shuffle 16 bits in lower 64 bits of XMM.
564  kX86PshufdRRI,                // Shuffle 32 bits in XMM.
565  kX86PsrawRI,                  // signed right shift of floating point registers 16 bits x 8
566  kX86PsradRI,                  // signed right shift of floating point registers 32 bits x 4
567  kX86PsrlwRI,                  // logical right shift of floating point registers 16 bits x 8
568  kX86PsrldRI,                  // logical right shift of floating point registers 32 bits x 4
569  kX86PsrlqRI,                  // logical right shift of floating point registers 64 bits x 2
570  kX86PsllwRI,                  // left shift of floating point registers 16 bits x 8
571  kX86PslldRI,                  // left shift of floating point registers 32 bits x 4
572  kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
573  kX86Fild32M,                  // push 32-bit integer on x87 stack
574  kX86Fild64M,                  // push 64-bit integer on x87 stack
575  kX86Fld32M,                   // push float on x87 stack
576  kX86Fld64M,                   // push double on x87 stack
577  kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
578  kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
579  kX86Fst32M,                   // do 32-bit store
580  kX86Fst64M,                   // do 64-bit store
581  kX86Fprem,                    // remainder from dividing of two floating point values
582  kX86Fucompp,                  // compare floating point values and pop x87 fp stack twice
583  kX86Fstsw16R,                 // store FPU status word
584  Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
585  kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
586  Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
587  kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
588  Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
589  kX86MovapsMR, kX86MovapsAR,   // store aligned packed single FP values from xmm1 to m128
590  kX86MovlpsRM, kX86MovlpsRA,   // load packed single FP values from m64 to low quadword of xmm
591  kX86MovlpsMR, kX86MovlpsAR,   // store packed single FP values from low quadword of xmm to m64
592  kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
593  kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
594  Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
595  Binary0fOpCode(kX86Movqxr),   // move into xmm from 64 bit gpr
596  kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR,  // move into 64 bit reg from xmm
597  kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
598  kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA,  // move 32 bit to 64 bit with sign extension
599  kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
600  kX86Mfence,                   // memory barrier
601  Binary0fOpCode(kX86Imul16),   // 16bit multiply
602  Binary0fOpCode(kX86Imul32),   // 32bit multiply
603  Binary0fOpCode(kX86Imul64),   // 64bit multiply
604  kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
605  kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
606  kX86LockCmpxchg64M, kX86LockCmpxchg64A,  // locked compare and exchange
607  kX86XchgMR,  // exchange memory with register (automatically locked)
608  Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
609  Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
610  Binary0fOpCode(kX86Movsx8),   // sign-extend 8-bit value
611  Binary0fOpCode(kX86Movsx16),  // sign-extend 16-bit value
612  Binary0fOpCode(kX86Movzx8q),   // zero-extend 8-bit value to quad word
613  Binary0fOpCode(kX86Movzx16q),  // zero-extend 16-bit value to quad word
614  Binary0fOpCode(kX86Movsx8q),   // sign-extend 8-bit value to quad word
615  Binary0fOpCode(kX86Movsx16q),  // sign-extend 16-bit value to quad word
616#undef Binary0fOpCode
617  kX86Jcc8, kX86Jcc32,  // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned
618  kX86Jmp8, kX86Jmp32,  // jmp rel8/32; lir operands - 0: rel, target assigned
619  kX86JmpR,             // jmp reg; lir operands - 0: reg
620  kX86Jecxz8,           // jcexz rel8; jump relative if ECX is zero.
621  kX86JmpT,             // jmp fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
622
623  kX86CallR,            // call reg; lir operands - 0: reg
624  kX86CallM,            // call [base + disp]; lir operands - 0: base, 1: disp
625  kX86CallA,            // call [base + index * scale + disp]
626                        // lir operands - 0: base, 1: index, 2: scale, 3: disp
627  kX86CallT,            // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp
628  kX86CallI,            // call <relative> - 0: disp; Used for core.oat linking only
629  kX86Ret,              // ret; no lir operands
630  kX86StartOfMethod,    // call 0; pop reg; sub reg, # - generate start of method into reg
631                        // lir operands - 0: reg
632  kX86PcRelLoadRA,      // mov reg, [base + index * scale + PC relative displacement]
633                        // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table
634  kX86PcRelAdr,         // mov reg, PC relative displacement; lir operands - 0: reg, 1: table
635  kX86RepneScasw,       // repne scasw
636  kX86Last
637};
638
639/* Instruction assembly field_loc kind */
640enum X86EncodingKind {
641  kData,                                    // Special case for raw data.
642  kNop,                                     // Special case for variable length nop.
643  kNullary,                                 // Opcode that takes no arguments.
644  kRegOpcode,                               // Shorter form of R instruction kind (opcode+rd)
645  kReg, kMem, kArray,                       // R, M and A instruction kinds.
646  kMemReg, kArrayReg, kThreadReg,           // MR, AR and TR instruction kinds.
647  kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.
648  kRegRegStore,                             // RR following the store modrm reg-reg encoding rather than the load.
649  kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
650  kRegRegImm, kRegMemImm, kRegArrayImm,     // RRI, RMI and RAI instruction kinds.
651  kMovRegImm,                               // Shorter form move RI.
652  kRegRegImmStore,                          // RRI following the store modrm reg-reg encoding rather than the load.
653  kMemRegImm,                               // MRI instruction kinds.
654  kShiftRegImm, kShiftMemImm, kShiftArrayImm,  // Shift opcode with immediate.
655  kShiftRegCl, kShiftMemCl, kShiftArrayCl,     // Shift opcode with register CL.
656  // kRegRegReg, kRegRegMem, kRegRegArray,    // RRR, RRM, RRA instruction kinds.
657  kRegCond, kMemCond, kArrayCond,          // R, M, A instruction kinds following by a condition.
658  kRegRegCond,                             // RR instruction kind followed by a condition.
659  kRegMemCond,                             // RM instruction kind followed by a condition.
660  kJmp, kJcc, kCall,                       // Branch instruction kinds.
661  kPcRel,                                  // Operation with displacement that is PC relative
662  kMacro,                                  // An instruction composing multiple others
663  kUnimplemented                           // Encoding used when an instruction isn't yet implemented.
664};
665
666/* Struct used to define the EncodingMap positions for each X86 opcode */
667struct X86EncodingMap {
668  X86OpCode opcode;      // e.g. kOpAddRI
669  // The broad category the instruction conforms to, such as kRegReg. Identifies which LIR operands
670  // hold meaning for the opcode.
671  X86EncodingKind kind;
672  uint64_t flags;
673  struct {
674  uint8_t prefix1;       // Non-zero => a prefix byte.
675  uint8_t prefix2;       // Non-zero => a second prefix byte.
676  uint8_t opcode;        // 1 byte opcode.
677  uint8_t extra_opcode1;  // Possible extra opcode byte.
678  uint8_t extra_opcode2;  // Possible second extra opcode byte.
679  // 3-bit opcode that gets encoded in the register bits of the modrm byte, use determined by the
680  // encoding kind.
681  uint8_t modrm_opcode;
682  uint8_t ax_opcode;  // Non-zero => shorter encoding for AX as a destination.
683  uint8_t immediate_bytes;  // Number of bytes of immediate.
684  // Does the instruction address a byte register? In 32-bit mode the registers ah, bh, ch and dh
685  // are not used. In 64-bit mode the REX prefix is used to normalize and allow any byte register
686  // to be addressed.
687  bool r8_form;
688  } skeleton;
689  const char *name;
690  const char* fmt;
691};
692
693
694// FIXME: mem barrier type - what do we do for x86?
695#define kSY 0
696#define kST 0
697
698// Offsets of high and low halves of a 64bit value.
699#define LOWORD_OFFSET 0
700#define HIWORD_OFFSET 4
701
702// Segment override instruction prefix used for quick TLS access to Thread::Current().
703#define THREAD_PREFIX 0x64
704#define THREAD_PREFIX_GS 0x65
705
706// 64 Bit Operand Size
707#define REX_W 0x48
708// Extension of the ModR/M reg field
709#define REX_R 0x44
710// Extension of the SIB index field
711#define REX_X 0x42
712// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field
713#define REX_B 0x41
714// Extended register set
715#define REX 0x40
716// Mask extracting the least 3 bits of r0..r15
717#define kRegNumMask32 0x07
718// Value indicating that base or reg is not used
719#define NO_REG 0
720
721#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
722#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
723#define IS_SIMM32(v) ((INT64_C(-2147483648) <= (v)) && ((v) <= INT64_C(2147483647)))
724
725extern X86EncodingMap EncodingMap[kX86Last];
726extern X86ConditionCode X86ConditionEncoding(ConditionCode cond);
727
728}  // namespace art
729
730#endif  // ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_
731