x86_lir.h revision fe94578b63380f464c3abd5c156b7b31d068db6c
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#ifndef ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_ 18#define ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_ 19 20#include "dex/compiler_internals.h" 21 22namespace art { 23 24/* 25 * Runtime register conventions. We consider both x86, x86-64 and x32 (32bit mode x86-64). The ABI 26 * has different conventions and we capture those here. Changing something that is callee save and 27 * making it caller save places a burden on up-calls to save/restore the callee save register, 28 * however, there are few registers that are callee save in the ABI. Changing something that is 29 * caller save and making it callee save places a burden on down-calls to save/restore the callee 30 * save register. For these reasons we aim to match native conventions for caller and callee save. 31 * On x86 only the first 4 registers can be used for byte operations, for this reason they are 32 * preferred for temporary scratch registers. 33 * 34 * General Purpose Register: 35 * Native: x86 | x86-64 / x32 | ART x86 | ART x86-64 36 * r0/eax: caller | caller | caller, Method*, scratch, return value | caller, scratch, return value 37 * r1/ecx: caller | caller, arg4 | caller, arg1, scratch | caller, arg3, scratch 38 * r2/edx: caller | caller, arg3 | caller, arg2, scratch, high half of long return | caller, arg2, scratch 39 * r3/ebx: callEE | callEE | callER, arg3, scratch | callee, promotable 40 * r4/esp: stack pointer 41 * r5/ebp: callee | callee | callee, promotable | callee, promotable 42 * r6/esi: callEE | callER, arg2 | callee, promotable | caller, arg1, scratch 43 * r7/edi: callEE | callER, arg1 | callee, promotable | caller, Method*, scratch 44 * --- x86-64/x32 registers 45 * Native: x86-64 / x32 | ART 46 * r8: caller save, arg5 | caller, arg4, scratch 47 * r9: caller save, arg6 | caller, arg5, scratch 48 * r10: caller save | caller, scratch 49 * r11: caller save | caller, scratch 50 * r12: callee save | callee, available for register promotion (promotable) 51 * r13: callee save | callee, available for register promotion (promotable) 52 * r14: callee save | callee, available for register promotion (promotable) 53 * r15: callee save | callee, available for register promotion (promotable) 54 * 55 * There is no rSELF, instead on x86 fs: has a base address of Thread::Current, whereas on 56 * x86-64/x32 gs: holds it. 57 * 58 * For floating point we don't support CPUs without SSE2 support (ie newer than PIII): 59 * Native: x86 | x86-64 / x32 | ART x86 | ART x86-64 60 * XMM0: caller | caller, arg1 | caller, float return value | caller, arg1, float return value 61 * XMM1: caller | caller, arg2 | caller, scratch | caller, arg2, scratch 62 * XMM2: caller | caller, arg3 | caller, scratch | caller, arg3, scratch 63 * XMM3: caller | caller, arg4 | caller, scratch | caller, arg4, scratch 64 * XMM4: caller | caller, arg5 | caller, scratch | caller, arg5, scratch 65 * XMM5: caller | caller, arg6 | caller, scratch | caller, arg6, scratch 66 * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch 67 * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch 68 * --- x86-64/x32 registers 69 * XMM8 .. 15: caller save available as scratch registers for ART. 70 * 71 * X87 is a necessary evil outside of ART code for x86: 72 * ST0: x86 float/double native return value, caller save 73 * ST1 .. ST7: caller save 74 * 75 * Stack frame diagram (stack grows down, higher addresses at top): 76 * 77 * +------------------------+ 78 * | IN[ins-1] | {Note: resides in caller's frame} 79 * | . | 80 * | IN[0] | 81 * | caller's Method* | 82 * +========================+ {Note: start of callee's frame} 83 * | return address | {pushed by call} 84 * | spill region | {variable sized} 85 * +------------------------+ 86 * | ...filler word... | {Note: used as 2nd word of V[locals-1] if long] 87 * +------------------------+ 88 * | V[locals-1] | 89 * | V[locals-2] | 90 * | . | 91 * | . | 92 * | V[1] | 93 * | V[0] | 94 * +------------------------+ 95 * | 0 to 3 words padding | 96 * +------------------------+ 97 * | OUT[outs-1] | 98 * | OUT[outs-2] | 99 * | . | 100 * | OUT[0] | 101 * | cur_method* | <<== sp w/ 16-byte alignment 102 * +========================+ 103 */ 104 105enum X86ResourceEncodingPos { 106 kX86GPReg0 = 0, 107 kX86RegSP = 4, 108 kX86FPReg0 = 16, // xmm0 .. xmm7/xmm15. 109 kX86FPRegEnd = 32, 110 kX86FPStack = 33, 111 kX86RegEnd = kX86FPStack, 112}; 113 114#define ENCODE_X86_REG_SP (1ULL << kX86RegSP) 115#define ENCODE_X86_FP_STACK (1ULL << kX86FPStack) 116 117// FIXME: for 64-bit, perhaps add an X86_64NativeRegisterPool enum? 118enum X86NativeRegisterPool { 119 r0 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 0, 120 rAX = r0, 121 r1 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 1, 122 rCX = r1, 123 r2 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 2, 124 rDX = r2, 125 r3 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3, 126 rBX = r3, 127 r4sp_32 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4, 128 rX86_SP_32 = r4sp_32, 129 r4sp_64 = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4, 130 rX86_SP_64 = r4sp_64, 131 r5 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5, 132 rBP = r5, 133 r5sib_no_base = r5, 134 r6 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 6, 135 rSI = r6, 136 r7 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 7, 137 rDI = r7, 138#ifndef TARGET_REX_SUPPORT 139 // fake return address register for core spill mask. 140 rRET = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8, 141#else 142 r8 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 8, 143 r9 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 9, 144 r10 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 10, 145 r11 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 11, 146 r12 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 12, 147 r13 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 13, 148 r14 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 14, 149 r15 = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 15, 150 // fake return address register for core spill mask. 151 rRET = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 16, 152#endif 153 154 // xmm registers, single precision view. 155 fr0 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 0, 156 fr1 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 1, 157 fr2 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 2, 158 fr3 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 3, 159 fr4 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 4, 160 fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5, 161 fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, 162 fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, 163 164 // xmm registers, double precision aliases. 165 dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, 166 dr1 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 1, 167 dr2 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 2, 168 dr3 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 3, 169 dr4 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 4, 170 dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, 171 dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, 172 dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, 173 174 // xmm registers aliases. 175 xr0 = RegStorage::k128BitSolo | 0, 176 xr1 = RegStorage::k128BitSolo | 1, 177 xr2 = RegStorage::k128BitSolo | 2, 178 xr3 = RegStorage::k128BitSolo | 3, 179 xr4 = RegStorage::k128BitSolo | 4, 180 xr5 = RegStorage::k128BitSolo | 5, 181 xr6 = RegStorage::k128BitSolo | 6, 182 xr7 = RegStorage::k128BitSolo | 7, 183 184 // TODO: as needed, add 256, 512 and 1024-bit xmm views. 185}; 186 187constexpr RegStorage rs_r0(RegStorage::kValid | r0); 188constexpr RegStorage rs_rAX = rs_r0; 189constexpr RegStorage rs_r1(RegStorage::kValid | r1); 190constexpr RegStorage rs_rCX = rs_r1; 191constexpr RegStorage rs_r2(RegStorage::kValid | r2); 192constexpr RegStorage rs_rDX = rs_r2; 193constexpr RegStorage rs_r3(RegStorage::kValid | r3); 194constexpr RegStorage rs_rBX = rs_r3; 195constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64); 196constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32); 197extern RegStorage rs_rX86_SP; 198constexpr RegStorage rs_r5(RegStorage::kValid | r5); 199constexpr RegStorage rs_rBP = rs_r5; 200constexpr RegStorage rs_r6(RegStorage::kValid | r6); 201constexpr RegStorage rs_rSI = rs_r6; 202constexpr RegStorage rs_r7(RegStorage::kValid | r7); 203constexpr RegStorage rs_rDI = rs_r7; 204constexpr RegStorage rs_rRET(RegStorage::kValid | rRET); 205 206constexpr RegStorage rs_fr0(RegStorage::kValid | fr0); 207constexpr RegStorage rs_fr1(RegStorage::kValid | fr1); 208constexpr RegStorage rs_fr2(RegStorage::kValid | fr2); 209constexpr RegStorage rs_fr3(RegStorage::kValid | fr3); 210constexpr RegStorage rs_fr4(RegStorage::kValid | fr4); 211constexpr RegStorage rs_fr5(RegStorage::kValid | fr5); 212constexpr RegStorage rs_fr6(RegStorage::kValid | fr6); 213constexpr RegStorage rs_fr7(RegStorage::kValid | fr7); 214 215constexpr RegStorage rs_dr0(RegStorage::kValid | dr0); 216constexpr RegStorage rs_dr1(RegStorage::kValid | dr1); 217constexpr RegStorage rs_dr2(RegStorage::kValid | dr2); 218constexpr RegStorage rs_dr3(RegStorage::kValid | dr3); 219constexpr RegStorage rs_dr4(RegStorage::kValid | dr4); 220constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); 221constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); 222constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); 223 224constexpr RegStorage rs_xr0(RegStorage::kValid | xr0); 225constexpr RegStorage rs_xr1(RegStorage::kValid | xr1); 226constexpr RegStorage rs_xr2(RegStorage::kValid | xr2); 227constexpr RegStorage rs_xr3(RegStorage::kValid | xr3); 228constexpr RegStorage rs_xr4(RegStorage::kValid | xr4); 229constexpr RegStorage rs_xr5(RegStorage::kValid | xr5); 230constexpr RegStorage rs_xr6(RegStorage::kValid | xr6); 231constexpr RegStorage rs_xr7(RegStorage::kValid | xr7); 232 233extern X86NativeRegisterPool rX86_ARG0; 234extern X86NativeRegisterPool rX86_ARG1; 235extern X86NativeRegisterPool rX86_ARG2; 236extern X86NativeRegisterPool rX86_ARG3; 237extern X86NativeRegisterPool rX86_FARG0; 238extern X86NativeRegisterPool rX86_FARG1; 239extern X86NativeRegisterPool rX86_FARG2; 240extern X86NativeRegisterPool rX86_FARG3; 241extern X86NativeRegisterPool rX86_RET0; 242extern X86NativeRegisterPool rX86_RET1; 243extern X86NativeRegisterPool rX86_INVOKE_TGT; 244extern X86NativeRegisterPool rX86_COUNT; 245 246extern RegStorage rs_rX86_ARG0; 247extern RegStorage rs_rX86_ARG1; 248extern RegStorage rs_rX86_ARG2; 249extern RegStorage rs_rX86_ARG3; 250extern RegStorage rs_rX86_FARG0; 251extern RegStorage rs_rX86_FARG1; 252extern RegStorage rs_rX86_FARG2; 253extern RegStorage rs_rX86_FARG3; 254extern RegStorage rs_rX86_RET0; 255extern RegStorage rs_rX86_RET1; 256extern RegStorage rs_rX86_INVOKE_TGT; 257extern RegStorage rs_rX86_COUNT; 258 259// RegisterLocation templates return values (r_V0, or r_V0/r_V1). 260const RegLocation x86_loc_c_return 261 {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, 262 RegStorage(RegStorage::k32BitSolo, rAX), INVALID_SREG, INVALID_SREG}; 263const RegLocation x86_loc_c_return_wide 264 {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, 265 RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG}; 266const RegLocation x86_loc_c_return_float 267 {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, 268 RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG}; 269const RegLocation x86_loc_c_return_double 270 {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, 271 RegStorage(RegStorage::k64BitSolo, dr0), INVALID_SREG, INVALID_SREG}; 272 273/* 274 * The following enum defines the list of supported X86 instructions by the 275 * assembler. Their corresponding EncodingMap positions will be defined in 276 * Assemble.cc. 277 */ 278enum X86OpCode { 279 kX86First = 0, 280 kX8632BitData = kX86First, // data [31..0]. 281 kX86Bkpt, 282 kX86Nop, 283 // Define groups of binary operations 284 // MR - Memory Register - opcode [base + disp], reg 285 // - lir operands - 0: base, 1: disp, 2: reg 286 // AR - Array Register - opcode [base + index * scale + disp], reg 287 // - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg 288 // TR - Thread Register - opcode fs:[disp], reg - where fs: is equal to Thread::Current() 289 // - lir operands - 0: disp, 1: reg 290 // RR - Register Register - opcode reg1, reg2 291 // - lir operands - 0: reg1, 1: reg2 292 // RM - Register Memory - opcode reg, [base + disp] 293 // - lir operands - 0: reg, 1: base, 2: disp 294 // RA - Register Array - opcode reg, [base + index * scale + disp] 295 // - lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp 296 // RT - Register Thread - opcode reg, fs:[disp] - where fs: is equal to Thread::Current() 297 // - lir operands - 0: reg, 1: disp 298 // RI - Register Immediate - opcode reg, #immediate 299 // - lir operands - 0: reg, 1: immediate 300 // MI - Memory Immediate - opcode [base + disp], #immediate 301 // - lir operands - 0: base, 1: disp, 2: immediate 302 // AI - Array Immediate - opcode [base + index * scale + disp], #immediate 303 // - lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate 304 // TI - Thread Immediate - opcode fs:[disp], imm - where fs: is equal to Thread::Current() 305 // - lir operands - 0: disp, 1: imm 306#define BinaryOpCode(opcode) \ 307 opcode ## 8MR, opcode ## 8AR, opcode ## 8TR, \ 308 opcode ## 8RR, opcode ## 8RM, opcode ## 8RA, opcode ## 8RT, \ 309 opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, opcode ## 8TI, \ 310 opcode ## 16MR, opcode ## 16AR, opcode ## 16TR, \ 311 opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \ 312 opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \ 313 opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \ 314 opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR, \ 315 opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \ 316 opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \ 317 opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8 318 BinaryOpCode(kX86Add), 319 BinaryOpCode(kX86Or), 320 BinaryOpCode(kX86Adc), 321 BinaryOpCode(kX86Sbb), 322 BinaryOpCode(kX86And), 323 BinaryOpCode(kX86Sub), 324 BinaryOpCode(kX86Xor), 325 BinaryOpCode(kX86Cmp), 326#undef BinaryOpCode 327 kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI, 328 kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI, 329 kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8, 330 kX86Mov8MR, kX86Mov8AR, kX86Mov8TR, 331 kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT, 332 kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI, 333 kX86Mov16MR, kX86Mov16AR, kX86Mov16TR, 334 kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT, 335 kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI, 336 kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR, 337 kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT, 338 kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI, 339 kX86Lea32RM, 340 kX86Lea32RA, 341 // RRC - Register Register ConditionCode - cond_opcode reg1, reg2 342 // - lir operands - 0: reg1, 1: reg2, 2: CC 343 kX86Cmov32RRC, 344 // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp] 345 // - lir operands - 0: reg1, 1: base, 2: disp 3: CC 346 kX86Cmov32RMC, 347 348 // RC - Register CL - opcode reg, CL 349 // - lir operands - 0: reg, 1: CL 350 // MC - Memory CL - opcode [base + disp], CL 351 // - lir operands - 0: base, 1: disp, 2: CL 352 // AC - Array CL - opcode [base + index * scale + disp], CL 353 // - lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: CL 354#define BinaryShiftOpCode(opcode) \ 355 opcode ## 8RI, opcode ## 8MI, opcode ## 8AI, \ 356 opcode ## 8RC, opcode ## 8MC, opcode ## 8AC, \ 357 opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \ 358 opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \ 359 opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \ 360 opcode ## 32RC, opcode ## 32MC, opcode ## 32AC 361 BinaryShiftOpCode(kX86Rol), 362 BinaryShiftOpCode(kX86Ror), 363 BinaryShiftOpCode(kX86Rcl), 364 BinaryShiftOpCode(kX86Rcr), 365 BinaryShiftOpCode(kX86Sal), 366 BinaryShiftOpCode(kX86Shr), 367 BinaryShiftOpCode(kX86Sar), 368#undef BinaryShiftOpcode 369 kX86Cmc, 370 kX86Shld32RRI, 371 kX86Shld32MRI, 372 kX86Shrd32RRI, 373 kX86Shrd32MRI, 374#define UnaryOpcode(opcode, reg, mem, array) \ 375 opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \ 376 opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \ 377 opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array 378 UnaryOpcode(kX86Test, RI, MI, AI), 379 kX86Test32RR, 380 UnaryOpcode(kX86Not, R, M, A), 381 UnaryOpcode(kX86Neg, R, M, A), 382 UnaryOpcode(kX86Mul, DaR, DaM, DaA), 383 UnaryOpcode(kX86Imul, DaR, DaM, DaA), 384 UnaryOpcode(kX86Divmod, DaR, DaM, DaA), 385 UnaryOpcode(kX86Idivmod, DaR, DaM, DaA), 386 kx86Cdq32Da, 387 kX86Bswap32R, 388 kX86Push32R, kX86Pop32R, 389#undef UnaryOpcode 390#define Binary0fOpCode(opcode) \ 391 opcode ## RR, opcode ## RM, opcode ## RA 392 Binary0fOpCode(kX86Movsd), 393 kX86MovsdMR, 394 kX86MovsdAR, 395 Binary0fOpCode(kX86Movss), 396 kX86MovssMR, 397 kX86MovssAR, 398 Binary0fOpCode(kX86Cvtsi2sd), // int to double 399 Binary0fOpCode(kX86Cvtsi2ss), // int to float 400 Binary0fOpCode(kX86Cvttsd2si), // truncating double to int 401 Binary0fOpCode(kX86Cvttss2si), // truncating float to int 402 Binary0fOpCode(kX86Cvtsd2si), // rounding double to int 403 Binary0fOpCode(kX86Cvtss2si), // rounding float to int 404 Binary0fOpCode(kX86Ucomisd), // unordered double compare 405 Binary0fOpCode(kX86Ucomiss), // unordered float compare 406 Binary0fOpCode(kX86Comisd), // double compare 407 Binary0fOpCode(kX86Comiss), // float compare 408 Binary0fOpCode(kX86Orps), // or of floating point registers 409 Binary0fOpCode(kX86Xorps), // xor of floating point registers 410 Binary0fOpCode(kX86Addsd), // double add 411 Binary0fOpCode(kX86Addss), // float add 412 Binary0fOpCode(kX86Mulsd), // double multiply 413 Binary0fOpCode(kX86Mulss), // float multiply 414 Binary0fOpCode(kX86Cvtsd2ss), // double to float 415 Binary0fOpCode(kX86Cvtss2sd), // float to double 416 Binary0fOpCode(kX86Subsd), // double subtract 417 Binary0fOpCode(kX86Subss), // float subtract 418 Binary0fOpCode(kX86Divsd), // double divide 419 Binary0fOpCode(kX86Divss), // float divide 420 Binary0fOpCode(kX86Punpckldq), // Interleave low-order double words 421 Binary0fOpCode(kX86Sqrtsd), // square root 422 Binary0fOpCode(kX86Pmulld), // parallel integer multiply 32 bits x 4 423 Binary0fOpCode(kX86Pmullw), // parallel integer multiply 16 bits x 8 424 Binary0fOpCode(kX86Mulps), // parallel FP multiply 32 bits x 4 425 Binary0fOpCode(kX86Mulpd), // parallel FP multiply 64 bits x 2 426 Binary0fOpCode(kX86Paddb), // parallel integer addition 8 bits x 16 427 Binary0fOpCode(kX86Paddw), // parallel integer addition 16 bits x 8 428 Binary0fOpCode(kX86Paddd), // parallel integer addition 32 bits x 4 429 Binary0fOpCode(kX86Addps), // parallel FP addition 32 bits x 4 430 Binary0fOpCode(kX86Addpd), // parallel FP addition 64 bits x 2 431 Binary0fOpCode(kX86Psubb), // parallel integer subtraction 8 bits x 16 432 Binary0fOpCode(kX86Psubw), // parallel integer subtraction 16 bits x 8 433 Binary0fOpCode(kX86Psubd), // parallel integer subtraction 32 bits x 4 434 Binary0fOpCode(kX86Subps), // parallel FP subtraction 32 bits x 4 435 Binary0fOpCode(kX86Subpd), // parallel FP subtraction 64 bits x 2 436 Binary0fOpCode(kX86Pand), // parallel AND 128 bits x 1 437 Binary0fOpCode(kX86Por), // parallel OR 128 bits x 1 438 Binary0fOpCode(kX86Pxor), // parallel XOR 128 bits x 1 439 Binary0fOpCode(kX86Phaddw), // parallel horizontal addition 16 bits x 8 440 Binary0fOpCode(kX86Phaddd), // parallel horizontal addition 32 bits x 4 441 kX86PextrbRRI, // Extract 8 bits from XMM into GPR 442 kX86PextrwRRI, // Extract 16 bits from XMM into GPR 443 kX86PextrdRRI, // Extract 32 bits from XMM into GPR 444 kX86PshuflwRRI, // Shuffle 16 bits in lower 64 bits of XMM. 445 kX86PshufdRRI, // Shuffle 32 bits in XMM. 446 kX86PsrawRI, // signed right shift of floating point registers 16 bits x 8 447 kX86PsradRI, // signed right shift of floating point registers 32 bits x 4 448 kX86PsrlwRI, // logical right shift of floating point registers 16 bits x 8 449 kX86PsrldRI, // logical right shift of floating point registers 32 bits x 4 450 kX86PsrlqRI, // logical right shift of floating point registers 64 bits x 2 451 kX86PsllwRI, // left shift of floating point registers 16 bits x 8 452 kX86PslldRI, // left shift of floating point registers 32 bits x 4 453 kX86PsllqRI, // left shift of floating point registers 64 bits x 2 454 kX86Fild32M, // push 32-bit integer on x87 stack 455 kX86Fild64M, // push 64-bit integer on x87 stack 456 kX86Fstp32M, // pop top x87 fp stack and do 32-bit store 457 kX86Fstp64M, // pop top x87 fp stack and do 64-bit store 458 Binary0fOpCode(kX86Mova128), // move 128 bits aligned 459 kX86Mova128MR, kX86Mova128AR, // store 128 bit aligned from xmm1 to m128 460 Binary0fOpCode(kX86Movups), // load unaligned packed single FP values from xmm2/m128 to xmm1 461 kX86MovupsMR, kX86MovupsAR, // store unaligned packed single FP values from xmm1 to m128 462 Binary0fOpCode(kX86Movaps), // load aligned packed single FP values from xmm2/m128 to xmm1 463 kX86MovapsMR, kX86MovapsAR, // store aligned packed single FP values from xmm1 to m128 464 kX86MovlpsRM, kX86MovlpsRA, // load packed single FP values from m64 to low quadword of xmm 465 kX86MovlpsMR, kX86MovlpsAR, // store packed single FP values from low quadword of xmm to m64 466 kX86MovhpsRM, kX86MovhpsRA, // load packed single FP values from m64 to high quadword of xmm 467 kX86MovhpsMR, kX86MovhpsAR, // store packed single FP values from high quadword of xmm to m64 468 Binary0fOpCode(kX86Movdxr), // move into xmm from gpr 469 kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm 470 kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand 471 kX86Mfence, // memory barrier 472 Binary0fOpCode(kX86Imul16), // 16bit multiply 473 Binary0fOpCode(kX86Imul32), // 32bit multiply 474 kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR, // compare and exchange 475 kX86LockCmpxchgMR, kX86LockCmpxchgAR, // locked compare and exchange 476 kX86LockCmpxchg8bM, kX86LockCmpxchg8bA, // locked compare and exchange 477 kX86XchgMR, // exchange memory with register (automatically locked) 478 Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value 479 Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value 480 Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value 481 Binary0fOpCode(kX86Movsx16), // sign-extend 16-bit value 482#undef Binary0fOpCode 483 kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned 484 kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned 485 kX86JmpR, // jmp reg; lir operands - 0: reg 486 kX86Jecxz8, // jcexz rel8; jump relative if ECX is zero. 487 kX86JmpT, // jmp fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp 488 489 kX86CallR, // call reg; lir operands - 0: reg 490 kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp 491 kX86CallA, // call [base + index * scale + disp] 492 // lir operands - 0: base, 1: index, 2: scale, 3: disp 493 kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp 494 kX86CallI, // call <relative> - 0: disp; Used for core.oat linking only 495 kX86Ret, // ret; no lir operands 496 kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg 497 // lir operands - 0: reg 498 kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement] 499 // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table 500 kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table 501 kX86RepneScasw, // repne scasw 502 kX86Last 503}; 504 505/* Instruction assembly field_loc kind */ 506enum X86EncodingKind { 507 kData, // Special case for raw data. 508 kNop, // Special case for variable length nop. 509 kNullary, // Opcode that takes no arguments. 510 kPrefix2Nullary, // Opcode that takes no arguments, but 2 prefixes. 511 kRegOpcode, // Shorter form of R instruction kind (opcode+rd) 512 kReg, kReg64, kMem, kArray, // R, M and A instruction kinds. 513 kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg, // MR, AR and TR instruction kinds. 514 kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread, // RR, RM, RA and RT instruction kinds. 515 kRegRegStore, // RR following the store modrm reg-reg encoding rather than the load. 516 kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds. 517 kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds. 518 kMovRegImm, // Shorter form move RI. 519 kRegRegImmRev, // RRI with first reg in r/m 520 kMemRegImm, // MRI instruction kinds. 521 kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate. 522 kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL. 523 kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. 524 kRegCond, kMemCond, kArrayCond, // R, M, A instruction kinds following by a condition. 525 kRegRegCond, // RR instruction kind followed by a condition. 526 kRegMemCond, // RM instruction kind followed by a condition. 527 kJmp, kJcc, kCall, // Branch instruction kinds. 528 kPcRel, // Operation with displacement that is PC relative 529 kMacro, // An instruction composing multiple others 530 kUnimplemented // Encoding used when an instruction isn't yet implemented. 531}; 532 533/* Struct used to define the EncodingMap positions for each X86 opcode */ 534struct X86EncodingMap { 535 X86OpCode opcode; // e.g. kOpAddRI 536 X86EncodingKind kind; // Used to discriminate in the union below 537 uint64_t flags; 538 struct { 539 uint8_t prefix1; // non-zero => a prefix byte 540 uint8_t prefix2; // non-zero => a second prefix byte 541 uint8_t opcode; // 1 byte opcode 542 uint8_t extra_opcode1; // possible extra opcode byte 543 uint8_t extra_opcode2; // possible second extra opcode byte 544 // 3bit opcode that gets encoded in the register bits of the modrm byte, use determined by the 545 // encoding kind 546 uint8_t modrm_opcode; 547 uint8_t ax_opcode; // non-zero => shorter encoding for AX as a destination 548 uint8_t immediate_bytes; // number of bytes of immediate 549 } skeleton; 550 const char *name; 551 const char* fmt; 552}; 553 554 555// FIXME: mem barrier type - what do we do for x86? 556#define kSY 0 557#define kST 0 558 559// Offsets of high and low halves of a 64bit value. 560#define LOWORD_OFFSET 0 561#define HIWORD_OFFSET 4 562 563// Segment override instruction prefix used for quick TLS access to Thread::Current(). 564#define THREAD_PREFIX 0x64 565#define THREAD_PREFIX_GS 0x65 566 567// 64 Bit Operand Size 568#define REX_W 0x48 569// Extension of the ModR/M reg field 570 571#define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127)) 572#define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767)) 573 574extern X86EncodingMap EncodingMap[kX86Last]; 575extern X86ConditionCode X86ConditionEncoding(ConditionCode cond); 576 577} // namespace art 578 579#endif // ART_COMPILER_DEX_QUICK_X86_X86_LIR_H_ 580