target_x86.cc revision ffddfdf6fec0b9d98a692e27242eecb15af5ead2
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <string> 18#include <inttypes.h> 19 20#include "codegen_x86.h" 21#include "dex/compiler_internals.h" 22#include "dex/quick/mir_to_lir-inl.h" 23#include "mirror/array.h" 24#include "mirror/string.h" 25#include "x86_lir.h" 26 27namespace art { 28 29static const RegStorage core_regs_arr_32[] = { 30 rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI, 31}; 32static const RegStorage core_regs_arr_64[] = { 33 rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI, 34#ifdef TARGET_REX_SUPPORT 35 rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15 36#endif 37}; 38static const RegStorage core_regs_arr_64q[] = { 39 rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q, 40#ifdef TARGET_REX_SUPPORT 41 rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q 42#endif 43}; 44static const RegStorage sp_regs_arr_32[] = { 45 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 46}; 47static const RegStorage sp_regs_arr_64[] = { 48 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 49#ifdef TARGET_REX_SUPPORT 50 rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 51#endif 52}; 53static const RegStorage dp_regs_arr_32[] = { 54 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 55}; 56static const RegStorage dp_regs_arr_64[] = { 57 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 58#ifdef TARGET_REX_SUPPORT 59 rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 60#endif 61}; 62static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; 63static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64}; 64static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; 65static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX}; 66static const RegStorage core_temps_arr_64[] = { 67 rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, 68#ifdef TARGET_REX_SUPPORT 69 rs_r8, rs_r9, rs_r10, rs_r11 70#endif 71}; 72static const RegStorage core_temps_arr_64q[] = { 73 rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, 74#ifdef TARGET_REX_SUPPORT 75 rs_r8q, rs_r9q, rs_r10q, rs_r11q 76#endif 77}; 78static const RegStorage sp_temps_arr_32[] = { 79 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 80}; 81static const RegStorage sp_temps_arr_64[] = { 82 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 83#ifdef TARGET_REX_SUPPORT 84 rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 85#endif 86}; 87static const RegStorage dp_temps_arr_32[] = { 88 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 89}; 90static const RegStorage dp_temps_arr_64[] = { 91 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 92#ifdef TARGET_REX_SUPPORT 93 rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 94#endif 95}; 96 97static const RegStorage xp_temps_arr_32[] = { 98 rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, 99}; 100static const RegStorage xp_temps_arr_64[] = { 101 rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, 102#ifdef TARGET_REX_SUPPORT 103 rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 104#endif 105}; 106 107static const std::vector<RegStorage> empty_pool; 108static const std::vector<RegStorage> core_regs_32(core_regs_arr_32, 109 core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0])); 110static const std::vector<RegStorage> core_regs_64(core_regs_arr_64, 111 core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0])); 112static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q, 113 core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0])); 114static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32, 115 sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0])); 116static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64, 117 sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0])); 118static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32, 119 dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0])); 120static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64, 121 dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0])); 122static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32, 123 reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0])); 124static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64, 125 reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0])); 126static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q, 127 reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0])); 128static const std::vector<RegStorage> core_temps_32(core_temps_arr_32, 129 core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0])); 130static const std::vector<RegStorage> core_temps_64(core_temps_arr_64, 131 core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0])); 132static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q, 133 core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0])); 134static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32, 135 sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0])); 136static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64, 137 sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0])); 138static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32, 139 dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0])); 140static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64, 141 dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0])); 142 143static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32, 144 xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0])); 145static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64, 146 xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0])); 147 148RegStorage rs_rX86_SP; 149 150X86NativeRegisterPool rX86_ARG0; 151X86NativeRegisterPool rX86_ARG1; 152X86NativeRegisterPool rX86_ARG2; 153X86NativeRegisterPool rX86_ARG3; 154X86NativeRegisterPool rX86_FARG0; 155X86NativeRegisterPool rX86_FARG1; 156X86NativeRegisterPool rX86_FARG2; 157X86NativeRegisterPool rX86_FARG3; 158X86NativeRegisterPool rX86_RET0; 159X86NativeRegisterPool rX86_RET1; 160X86NativeRegisterPool rX86_INVOKE_TGT; 161X86NativeRegisterPool rX86_COUNT; 162 163RegStorage rs_rX86_ARG0; 164RegStorage rs_rX86_ARG1; 165RegStorage rs_rX86_ARG2; 166RegStorage rs_rX86_ARG3; 167RegStorage rs_rX86_FARG0; 168RegStorage rs_rX86_FARG1; 169RegStorage rs_rX86_FARG2; 170RegStorage rs_rX86_FARG3; 171RegStorage rs_rX86_RET0; 172RegStorage rs_rX86_RET1; 173RegStorage rs_rX86_INVOKE_TGT; 174RegStorage rs_rX86_COUNT; 175 176RegLocation X86Mir2Lir::LocCReturn() { 177 return x86_loc_c_return; 178} 179 180RegLocation X86Mir2Lir::LocCReturnWide() { 181 return x86_loc_c_return_wide; 182} 183 184RegLocation X86Mir2Lir::LocCReturnFloat() { 185 return x86_loc_c_return_float; 186} 187 188RegLocation X86Mir2Lir::LocCReturnDouble() { 189 return x86_loc_c_return_double; 190} 191 192// Return a target-dependent special register. 193RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { 194 RegStorage res_reg = RegStorage::InvalidReg(); 195 switch (reg) { 196 case kSelf: res_reg = RegStorage::InvalidReg(); break; 197 case kSuspend: res_reg = RegStorage::InvalidReg(); break; 198 case kLr: res_reg = RegStorage::InvalidReg(); break; 199 case kPc: res_reg = RegStorage::InvalidReg(); break; 200 case kSp: res_reg = rs_rX86_SP; break; 201 case kArg0: res_reg = rs_rX86_ARG0; break; 202 case kArg1: res_reg = rs_rX86_ARG1; break; 203 case kArg2: res_reg = rs_rX86_ARG2; break; 204 case kArg3: res_reg = rs_rX86_ARG3; break; 205 case kFArg0: res_reg = rs_rX86_FARG0; break; 206 case kFArg1: res_reg = rs_rX86_FARG1; break; 207 case kFArg2: res_reg = rs_rX86_FARG2; break; 208 case kFArg3: res_reg = rs_rX86_FARG3; break; 209 case kRet0: res_reg = rs_rX86_RET0; break; 210 case kRet1: res_reg = rs_rX86_RET1; break; 211 case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break; 212 case kHiddenArg: res_reg = rs_rAX; break; 213 case kHiddenFpArg: res_reg = rs_fr0; break; 214 case kCount: res_reg = rs_rX86_COUNT; break; 215 } 216 return res_reg; 217} 218 219RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { 220 // For the 32-bit internal ABI, the first 3 arguments are passed in registers. 221 // TODO: This is not 64-bit compliant and depends on new internal ABI. 222 switch (arg_num) { 223 case 0: 224 return rs_rX86_ARG1; 225 case 1: 226 return rs_rX86_ARG2; 227 case 2: 228 return rs_rX86_ARG3; 229 default: 230 return RegStorage::InvalidReg(); 231 } 232} 233 234/* 235 * Decode the register id. 236 */ 237uint64_t X86Mir2Lir::GetRegMaskCommon(RegStorage reg) { 238 uint64_t seed; 239 int shift; 240 int reg_id; 241 242 reg_id = reg.GetRegNum(); 243 /* Double registers in x86 are just a single FP register */ 244 seed = 1; 245 /* FP register starts at bit position 16 */ 246 shift = (reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0; 247 /* Expand the double register id into single offset */ 248 shift += reg_id; 249 return (seed << shift); 250} 251 252uint64_t X86Mir2Lir::GetPCUseDefEncoding() { 253 /* 254 * FIXME: might make sense to use a virtual resource encoding bit for pc. Might be 255 * able to clean up some of the x86/Arm_Mips differences 256 */ 257 LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86"; 258 return 0ULL; 259} 260 261void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) { 262 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); 263 DCHECK(!lir->flags.use_def_invalid); 264 265 // X86-specific resource map setup here. 266 if (flags & REG_USE_SP) { 267 lir->u.m.use_mask |= ENCODE_X86_REG_SP; 268 } 269 270 if (flags & REG_DEF_SP) { 271 lir->u.m.def_mask |= ENCODE_X86_REG_SP; 272 } 273 274 if (flags & REG_DEFA) { 275 SetupRegMask(&lir->u.m.def_mask, rs_rAX.GetReg()); 276 } 277 278 if (flags & REG_DEFD) { 279 SetupRegMask(&lir->u.m.def_mask, rs_rDX.GetReg()); 280 } 281 if (flags & REG_USEA) { 282 SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg()); 283 } 284 285 if (flags & REG_USEC) { 286 SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg()); 287 } 288 289 if (flags & REG_USED) { 290 SetupRegMask(&lir->u.m.use_mask, rs_rDX.GetReg()); 291 } 292 293 if (flags & REG_USEB) { 294 SetupRegMask(&lir->u.m.use_mask, rs_rBX.GetReg()); 295 } 296 297 // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI. 298 if (lir->opcode == kX86RepneScasw) { 299 SetupRegMask(&lir->u.m.use_mask, rs_rAX.GetReg()); 300 SetupRegMask(&lir->u.m.use_mask, rs_rCX.GetReg()); 301 SetupRegMask(&lir->u.m.use_mask, rs_rDI.GetReg()); 302 SetupRegMask(&lir->u.m.def_mask, rs_rDI.GetReg()); 303 } 304 305 if (flags & USE_FP_STACK) { 306 lir->u.m.use_mask |= ENCODE_X86_FP_STACK; 307 lir->u.m.def_mask |= ENCODE_X86_FP_STACK; 308 } 309} 310 311/* For dumping instructions */ 312static const char* x86RegName[] = { 313 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", 314 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 315}; 316 317static const char* x86CondName[] = { 318 "O", 319 "NO", 320 "B/NAE/C", 321 "NB/AE/NC", 322 "Z/EQ", 323 "NZ/NE", 324 "BE/NA", 325 "NBE/A", 326 "S", 327 "NS", 328 "P/PE", 329 "NP/PO", 330 "L/NGE", 331 "NL/GE", 332 "LE/NG", 333 "NLE/G" 334}; 335 336/* 337 * Interpret a format string and build a string no longer than size 338 * See format key in Assemble.cc. 339 */ 340std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) { 341 std::string buf; 342 size_t i = 0; 343 size_t fmt_len = strlen(fmt); 344 while (i < fmt_len) { 345 if (fmt[i] != '!') { 346 buf += fmt[i]; 347 i++; 348 } else { 349 i++; 350 DCHECK_LT(i, fmt_len); 351 char operand_number_ch = fmt[i]; 352 i++; 353 if (operand_number_ch == '!') { 354 buf += "!"; 355 } else { 356 int operand_number = operand_number_ch - '0'; 357 DCHECK_LT(operand_number, 6); // Expect upto 6 LIR operands. 358 DCHECK_LT(i, fmt_len); 359 int operand = lir->operands[operand_number]; 360 switch (fmt[i]) { 361 case 'c': 362 DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName)); 363 buf += x86CondName[operand]; 364 break; 365 case 'd': 366 buf += StringPrintf("%d", operand); 367 break; 368 case 'p': { 369 EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand)); 370 buf += StringPrintf("0x%08x", tab_rec->offset); 371 break; 372 } 373 case 'r': 374 if (RegStorage::IsFloat(operand)) { 375 int fp_reg = RegStorage::RegNum(operand); 376 buf += StringPrintf("xmm%d", fp_reg); 377 } else { 378 int reg_num = RegStorage::RegNum(operand); 379 DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName)); 380 buf += x86RegName[reg_num]; 381 } 382 break; 383 case 't': 384 buf += StringPrintf("0x%08" PRIxPTR " (L%p)", 385 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand, 386 lir->target); 387 break; 388 default: 389 buf += StringPrintf("DecodeError '%c'", fmt[i]); 390 break; 391 } 392 i++; 393 } 394 } 395 } 396 return buf; 397} 398 399void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, uint64_t mask, const char *prefix) { 400 char buf[256]; 401 buf[0] = 0; 402 403 if (mask == ENCODE_ALL) { 404 strcpy(buf, "all"); 405 } else { 406 char num[8]; 407 int i; 408 409 for (i = 0; i < kX86RegEnd; i++) { 410 if (mask & (1ULL << i)) { 411 snprintf(num, arraysize(num), "%d ", i); 412 strcat(buf, num); 413 } 414 } 415 416 if (mask & ENCODE_CCODE) { 417 strcat(buf, "cc "); 418 } 419 /* Memory bits */ 420 if (x86LIR && (mask & ENCODE_DALVIK_REG)) { 421 snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", 422 DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info), 423 (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : ""); 424 } 425 if (mask & ENCODE_LITERAL) { 426 strcat(buf, "lit "); 427 } 428 429 if (mask & ENCODE_HEAP_REF) { 430 strcat(buf, "heap "); 431 } 432 if (mask & ENCODE_MUST_NOT_ALIAS) { 433 strcat(buf, "noalias "); 434 } 435 } 436 if (buf[0]) { 437 LOG(INFO) << prefix << ": " << buf; 438 } 439} 440 441void X86Mir2Lir::AdjustSpillMask() { 442 // Adjustment for LR spilling, x86 has no LR so nothing to do here 443 core_spill_mask_ |= (1 << rs_rRET.GetRegNum()); 444 num_core_spills_++; 445} 446 447/* 448 * Mark a callee-save fp register as promoted. Note that 449 * vpush/vpop uses contiguous register lists so we must 450 * include any holes in the mask. Associate holes with 451 * Dalvik register INVALID_VREG (0xFFFFU). 452 */ 453void X86Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) { 454 UNIMPLEMENTED(FATAL) << "MarkPreservedSingle"; 455} 456 457void X86Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) { 458 UNIMPLEMENTED(FATAL) << "MarkPreservedDouble"; 459} 460 461RegStorage X86Mir2Lir::AllocateByteRegister() { 462 return AllocTypedTemp(false, kCoreReg); 463} 464 465/* Clobber all regs that might be used by an external C call */ 466void X86Mir2Lir::ClobberCallerSave() { 467 Clobber(rs_rAX); 468 Clobber(rs_rCX); 469 Clobber(rs_rDX); 470 Clobber(rs_rBX); 471} 472 473RegLocation X86Mir2Lir::GetReturnWideAlt() { 474 RegLocation res = LocCReturnWide(); 475 DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg()); 476 DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg()); 477 Clobber(rs_rAX); 478 Clobber(rs_rDX); 479 MarkInUse(rs_rAX); 480 MarkInUse(rs_rDX); 481 MarkWide(res.reg); 482 return res; 483} 484 485RegLocation X86Mir2Lir::GetReturnAlt() { 486 RegLocation res = LocCReturn(); 487 res.reg.SetReg(rs_rDX.GetReg()); 488 Clobber(rs_rDX); 489 MarkInUse(rs_rDX); 490 return res; 491} 492 493/* To be used when explicitly managing register use */ 494void X86Mir2Lir::LockCallTemps() { 495 LockTemp(rs_rX86_ARG0); 496 LockTemp(rs_rX86_ARG1); 497 LockTemp(rs_rX86_ARG2); 498 LockTemp(rs_rX86_ARG3); 499} 500 501/* To be used when explicitly managing register use */ 502void X86Mir2Lir::FreeCallTemps() { 503 FreeTemp(rs_rX86_ARG0); 504 FreeTemp(rs_rX86_ARG1); 505 FreeTemp(rs_rX86_ARG2); 506 FreeTemp(rs_rX86_ARG3); 507} 508 509bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { 510 switch (opcode) { 511 case kX86LockCmpxchgMR: 512 case kX86LockCmpxchgAR: 513 case kX86LockCmpxchg8bM: 514 case kX86LockCmpxchg8bA: 515 case kX86XchgMR: 516 case kX86Mfence: 517 // Atomic memory instructions provide full barrier. 518 return true; 519 default: 520 break; 521 } 522 523 // Conservative if cannot prove it provides full barrier. 524 return false; 525} 526 527bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { 528#if ANDROID_SMP != 0 529 // Start off with using the last LIR as the barrier. If it is not enough, then we will update it. 530 LIR* mem_barrier = last_lir_insn_; 531 532 bool ret = false; 533 /* 534 * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers 535 * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need 536 * to ensure is that there is a scheduling barrier in place. 537 */ 538 if (barrier_kind == kStoreLoad) { 539 // If no LIR exists already that can be used a barrier, then generate an mfence. 540 if (mem_barrier == nullptr) { 541 mem_barrier = NewLIR0(kX86Mfence); 542 ret = true; 543 } 544 545 // If last instruction does not provide full barrier, then insert an mfence. 546 if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) { 547 mem_barrier = NewLIR0(kX86Mfence); 548 ret = true; 549 } 550 } 551 552 // Now ensure that a scheduling barrier is in place. 553 if (mem_barrier == nullptr) { 554 GenBarrier(); 555 } else { 556 // Mark as a scheduling barrier. 557 DCHECK(!mem_barrier->flags.use_def_invalid); 558 mem_barrier->u.m.def_mask = ENCODE_ALL; 559 } 560 return ret; 561#else 562 return false; 563#endif 564} 565 566void X86Mir2Lir::CompilerInitializeRegAlloc() { 567 if (Gen64Bit()) { 568 reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64, 569 dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/, 570 core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64); 571 } else { 572 reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32, 573 dp_regs_32, reserved_regs_32, empty_pool, 574 core_temps_32, empty_pool, sp_temps_32, dp_temps_32); 575 } 576 577 // Target-specific adjustments. 578 579 // Add in XMM registers. 580 const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32; 581 for (RegStorage reg : *xp_temps) { 582 RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); 583 reginfo_map_.Put(reg.GetReg(), info); 584 info->SetIsTemp(true); 585 } 586 587 // Alias single precision xmm to double xmms. 588 // TODO: as needed, add larger vector sizes - alias all to the largest. 589 GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_); 590 for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { 591 int sp_reg_num = info->GetReg().GetRegNum(); 592 RegStorage xp_reg = RegStorage::Solo128(sp_reg_num); 593 RegisterInfo* xp_reg_info = GetRegInfo(xp_reg); 594 // 128-bit xmm vector register's master storage should refer to itself. 595 DCHECK_EQ(xp_reg_info, xp_reg_info->Master()); 596 597 // Redirect 32-bit vector's master storage to 128-bit vector. 598 info->SetMaster(xp_reg_info); 599 600 RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num); 601 RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); 602 // Redirect 64-bit vector's master storage to 128-bit vector. 603 dp_reg_info->SetMaster(xp_reg_info); 604 } 605 606 // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. 607 // TODO: adjust for x86/hard float calling convention. 608 reg_pool_->next_core_reg_ = 2; 609 reg_pool_->next_sp_reg_ = 2; 610 reg_pool_->next_dp_reg_ = 1; 611} 612 613void X86Mir2Lir::SpillCoreRegs() { 614 if (num_core_spills_ == 0) { 615 return; 616 } 617 // Spill mask not including fake return address register 618 uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); 619 int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); 620 for (int reg = 0; mask; mask >>= 1, reg++) { 621 if (mask & 0x1) { 622 StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); 623 offset += GetInstructionSetPointerSize(cu_->instruction_set); 624 } 625 } 626} 627 628void X86Mir2Lir::UnSpillCoreRegs() { 629 if (num_core_spills_ == 0) { 630 return; 631 } 632 // Spill mask not including fake return address register 633 uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); 634 int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); 635 for (int reg = 0; mask; mask >>= 1, reg++) { 636 if (mask & 0x1) { 637 LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); 638 offset += GetInstructionSetPointerSize(cu_->instruction_set); 639 } 640 } 641} 642 643bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) { 644 return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32); 645} 646 647bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) { 648 return true; 649} 650 651RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) { 652 if (UNLIKELY(is_volatile)) { 653 // On x86, atomic 64-bit load/store requires an fp register. 654 // Smaller aligned load/store is atomic for both core and fp registers. 655 if (size == k64 || size == kDouble) { 656 return kFPReg; 657 } 658 } 659 return RegClassBySize(size); 660} 661 662X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit) 663 : Mir2Lir(cu, mir_graph, arena), 664 base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false), 665 method_address_insns_(arena, 100, kGrowableArrayMisc), 666 class_type_address_insns_(arena, 100, kGrowableArrayMisc), 667 call_method_insns_(arena, 100, kGrowableArrayMisc), 668 stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit), 669 const_vectors_(nullptr) { 670 store_method_addr_used_ = false; 671 if (kIsDebugBuild) { 672 for (int i = 0; i < kX86Last; i++) { 673 if (X86Mir2Lir::EncodingMap[i].opcode != i) { 674 LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name 675 << " is wrong: expecting " << i << ", seeing " 676 << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); 677 } 678 } 679 } 680 if (Gen64Bit()) { 681 rs_rX86_SP = rs_rX86_SP_64; 682 683 rs_rX86_ARG0 = rs_rDI; 684 rs_rX86_ARG1 = rs_rSI; 685 rs_rX86_ARG2 = rs_rDX; 686 rs_rX86_ARG3 = rs_rCX; 687 rX86_ARG0 = rDI; 688 rX86_ARG1 = rSI; 689 rX86_ARG2 = rDX; 690 rX86_ARG3 = rCX; 691 // TODO: ARG4(r8), ARG5(r9), floating point args. 692 } else { 693 rs_rX86_SP = rs_rX86_SP_32; 694 695 rs_rX86_ARG0 = rs_rAX; 696 rs_rX86_ARG1 = rs_rCX; 697 rs_rX86_ARG2 = rs_rDX; 698 rs_rX86_ARG3 = rs_rBX; 699 rX86_ARG0 = rAX; 700 rX86_ARG1 = rCX; 701 rX86_ARG2 = rDX; 702 rX86_ARG3 = rBX; 703 } 704 rs_rX86_FARG0 = rs_rAX; 705 rs_rX86_FARG1 = rs_rCX; 706 rs_rX86_FARG2 = rs_rDX; 707 rs_rX86_FARG3 = rs_rBX; 708 rs_rX86_RET0 = rs_rAX; 709 rs_rX86_RET1 = rs_rDX; 710 rs_rX86_INVOKE_TGT = rs_rAX; 711 rs_rX86_COUNT = rs_rCX; 712 rX86_FARG0 = rAX; 713 rX86_FARG1 = rCX; 714 rX86_FARG2 = rDX; 715 rX86_FARG3 = rBX; 716 rX86_RET0 = rAX; 717 rX86_RET1 = rDX; 718 rX86_INVOKE_TGT = rAX; 719 rX86_COUNT = rCX; 720} 721 722Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, 723 ArenaAllocator* const arena) { 724 return new X86Mir2Lir(cu, mir_graph, arena, false); 725} 726 727Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, 728 ArenaAllocator* const arena) { 729 return new X86Mir2Lir(cu, mir_graph, arena, true); 730} 731 732// Not used in x86 733RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) { 734 LOG(FATAL) << "Unexpected use of LoadHelper in x86"; 735 return RegStorage::InvalidReg(); 736} 737 738// Not used in x86 739RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) { 740 LOG(FATAL) << "Unexpected use of LoadHelper in x86"; 741 return RegStorage::InvalidReg(); 742} 743 744LIR* X86Mir2Lir::CheckSuspendUsingLoad() { 745 LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86"; 746 return nullptr; 747} 748 749uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) { 750 DCHECK(!IsPseudoLirOp(opcode)); 751 return X86Mir2Lir::EncodingMap[opcode].flags; 752} 753 754const char* X86Mir2Lir::GetTargetInstName(int opcode) { 755 DCHECK(!IsPseudoLirOp(opcode)); 756 return X86Mir2Lir::EncodingMap[opcode].name; 757} 758 759const char* X86Mir2Lir::GetTargetInstFmt(int opcode) { 760 DCHECK(!IsPseudoLirOp(opcode)); 761 return X86Mir2Lir::EncodingMap[opcode].fmt; 762} 763 764void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { 765 // Can we do this directly to memory? 766 rl_dest = UpdateLocWide(rl_dest); 767 if ((rl_dest.location == kLocDalvikFrame) || 768 (rl_dest.location == kLocCompilerTemp)) { 769 int32_t val_lo = Low32Bits(value); 770 int32_t val_hi = High32Bits(value); 771 int r_base = TargetReg(kSp).GetReg(); 772 int displacement = SRegOffset(rl_dest.s_reg_low); 773 774 LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo); 775 AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2, 776 false /* is_load */, true /* is64bit */); 777 store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi); 778 AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2, 779 false /* is_load */, true /* is64bit */); 780 return; 781 } 782 783 // Just use the standard code to do the generation. 784 Mir2Lir::GenConstWide(rl_dest, value); 785} 786 787// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc 788void X86Mir2Lir::DumpRegLocation(RegLocation loc) { 789 LOG(INFO) << "location: " << loc.location << ',' 790 << (loc.wide ? " w" : " ") 791 << (loc.defined ? " D" : " ") 792 << (loc.is_const ? " c" : " ") 793 << (loc.fp ? " F" : " ") 794 << (loc.core ? " C" : " ") 795 << (loc.ref ? " r" : " ") 796 << (loc.high_word ? " h" : " ") 797 << (loc.home ? " H" : " ") 798 << ", low: " << static_cast<int>(loc.reg.GetLowReg()) 799 << ", high: " << static_cast<int>(loc.reg.GetHighReg()) 800 << ", s_reg: " << loc.s_reg_low 801 << ", orig: " << loc.orig_sreg; 802} 803 804void X86Mir2Lir::Materialize() { 805 // A good place to put the analysis before starting. 806 AnalyzeMIR(); 807 808 // Now continue with regular code generation. 809 Mir2Lir::Materialize(); 810} 811 812void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type, 813 SpecialTargetRegister symbolic_reg) { 814 /* 815 * For x86, just generate a 32 bit move immediate instruction, that will be filled 816 * in at 'link time'. For now, put a unique value based on target to ensure that 817 * code deduplication works. 818 */ 819 int target_method_idx = target_method.dex_method_index; 820 const DexFile* target_dex_file = target_method.dex_file; 821 const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx); 822 uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id); 823 824 // Generate the move instruction with the unique pointer and save index, dex_file, and type. 825 LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(), 826 static_cast<int>(target_method_id_ptr), target_method_idx, 827 WrapPointer(const_cast<DexFile*>(target_dex_file)), type); 828 AppendLIR(move); 829 method_address_insns_.Insert(move); 830} 831 832void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) { 833 /* 834 * For x86, just generate a 32 bit move immediate instruction, that will be filled 835 * in at 'link time'. For now, put a unique value based on target to ensure that 836 * code deduplication works. 837 */ 838 const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx); 839 uintptr_t ptr = reinterpret_cast<uintptr_t>(&id); 840 841 // Generate the move instruction with the unique pointer and save index and type. 842 LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(), 843 static_cast<int>(ptr), type_idx); 844 AppendLIR(move); 845 class_type_address_insns_.Insert(move); 846} 847 848LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) { 849 /* 850 * For x86, just generate a 32 bit call relative instruction, that will be filled 851 * in at 'link time'. For now, put a unique value based on target to ensure that 852 * code deduplication works. 853 */ 854 int target_method_idx = target_method.dex_method_index; 855 const DexFile* target_dex_file = target_method.dex_file; 856 const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx); 857 uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id); 858 859 // Generate the call instruction with the unique pointer and save index, dex_file, and type. 860 LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr), 861 target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type); 862 AppendLIR(call); 863 call_method_insns_.Insert(call); 864 return call; 865} 866 867/* 868 * @brief Enter a 32 bit quantity into a buffer 869 * @param buf buffer. 870 * @param data Data value. 871 */ 872 873static void PushWord(std::vector<uint8_t>&buf, int32_t data) { 874 buf.push_back(data & 0xff); 875 buf.push_back((data >> 8) & 0xff); 876 buf.push_back((data >> 16) & 0xff); 877 buf.push_back((data >> 24) & 0xff); 878} 879 880void X86Mir2Lir::InstallLiteralPools() { 881 // These are handled differently for x86. 882 DCHECK(code_literal_list_ == nullptr); 883 DCHECK(method_literal_list_ == nullptr); 884 DCHECK(class_literal_list_ == nullptr); 885 886 // Align to 16 byte boundary. We have implicit knowledge that the start of the method is 887 // on a 4 byte boundary. How can I check this if it changes (other than aligned loads 888 // will fail at runtime)? 889 if (const_vectors_ != nullptr) { 890 int align_size = (16-4) - (code_buffer_.size() & 0xF); 891 if (align_size < 0) { 892 align_size += 16; 893 } 894 895 while (align_size > 0) { 896 code_buffer_.push_back(0); 897 align_size--; 898 } 899 for (LIR *p = const_vectors_; p != nullptr; p = p->next) { 900 PushWord(code_buffer_, p->operands[0]); 901 PushWord(code_buffer_, p->operands[1]); 902 PushWord(code_buffer_, p->operands[2]); 903 PushWord(code_buffer_, p->operands[3]); 904 } 905 } 906 907 // Handle the fixups for methods. 908 for (uint32_t i = 0; i < method_address_insns_.Size(); i++) { 909 LIR* p = method_address_insns_.Get(i); 910 DCHECK_EQ(p->opcode, kX86Mov32RI); 911 uint32_t target_method_idx = p->operands[2]; 912 const DexFile* target_dex_file = 913 reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3])); 914 915 // The offset to patch is the last 4 bytes of the instruction. 916 int patch_offset = p->offset + p->flags.size - 4; 917 cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx, 918 cu_->method_idx, cu_->invoke_type, 919 target_method_idx, target_dex_file, 920 static_cast<InvokeType>(p->operands[4]), 921 patch_offset); 922 } 923 924 // Handle the fixups for class types. 925 for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) { 926 LIR* p = class_type_address_insns_.Get(i); 927 DCHECK_EQ(p->opcode, kX86Mov32RI); 928 uint32_t target_method_idx = p->operands[2]; 929 930 // The offset to patch is the last 4 bytes of the instruction. 931 int patch_offset = p->offset + p->flags.size - 4; 932 cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx, 933 cu_->method_idx, target_method_idx, patch_offset); 934 } 935 936 // And now the PC-relative calls to methods. 937 for (uint32_t i = 0; i < call_method_insns_.Size(); i++) { 938 LIR* p = call_method_insns_.Get(i); 939 DCHECK_EQ(p->opcode, kX86CallI); 940 uint32_t target_method_idx = p->operands[1]; 941 const DexFile* target_dex_file = 942 reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2])); 943 944 // The offset to patch is the last 4 bytes of the instruction. 945 int patch_offset = p->offset + p->flags.size - 4; 946 cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx, 947 cu_->method_idx, cu_->invoke_type, 948 target_method_idx, target_dex_file, 949 static_cast<InvokeType>(p->operands[3]), 950 patch_offset, -4 /* offset */); 951 } 952 953 // And do the normal processing. 954 Mir2Lir::InstallLiteralPools(); 955} 956 957/* 958 * Fast string.index_of(I) & (II). Inline check for simple case of char <= 0xffff, 959 * otherwise bails to standard library code. 960 */ 961bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { 962 ClobberCallerSave(); 963 LockCallTemps(); // Using fixed registers 964 965 // EAX: 16 bit character being searched. 966 // ECX: count: number of words to be searched. 967 // EDI: String being searched. 968 // EDX: temporary during execution. 969 // EBX: temporary during execution. 970 971 RegLocation rl_obj = info->args[0]; 972 RegLocation rl_char = info->args[1]; 973 RegLocation rl_start; // Note: only present in III flavor or IndexOf. 974 975 uint32_t char_value = 976 rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0; 977 978 if (char_value > 0xFFFF) { 979 // We have to punt to the real String.indexOf. 980 return false; 981 } 982 983 // Okay, we are commited to inlining this. 984 RegLocation rl_return = GetReturn(false); 985 RegLocation rl_dest = InlineTarget(info); 986 987 // Is the string non-NULL? 988 LoadValueDirectFixed(rl_obj, rs_rDX); 989 GenNullCheck(rs_rDX, info->opt_flags); 990 info->opt_flags |= MIR_IGNORE_NULL_CHECK; // Record that we've null checked. 991 992 // Does the character fit in 16 bits? 993 LIR* slowpath_branch = nullptr; 994 if (rl_char.is_const) { 995 // We need the value in EAX. 996 LoadConstantNoClobber(rs_rAX, char_value); 997 } else { 998 // Character is not a constant; compare at runtime. 999 LoadValueDirectFixed(rl_char, rs_rAX); 1000 slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr); 1001 } 1002 1003 // From here down, we know that we are looking for a char that fits in 16 bits. 1004 // Location of reference to data array within the String object. 1005 int value_offset = mirror::String::ValueOffset().Int32Value(); 1006 // Location of count within the String object. 1007 int count_offset = mirror::String::CountOffset().Int32Value(); 1008 // Starting offset within data array. 1009 int offset_offset = mirror::String::OffsetOffset().Int32Value(); 1010 // Start of char data with array_. 1011 int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); 1012 1013 // Character is in EAX. 1014 // Object pointer is in EDX. 1015 1016 // We need to preserve EDI, but have no spare registers, so push it on the stack. 1017 // We have to remember that all stack addresses after this are offset by sizeof(EDI). 1018 NewLIR1(kX86Push32R, rs_rDI.GetReg()); 1019 1020 // Compute the number of words to search in to rCX. 1021 Load32Disp(rs_rDX, count_offset, rs_rCX); 1022 LIR *length_compare = nullptr; 1023 int start_value = 0; 1024 bool is_index_on_stack = false; 1025 if (zero_based) { 1026 // We have to handle an empty string. Use special instruction JECXZ. 1027 length_compare = NewLIR0(kX86Jecxz8); 1028 } else { 1029 rl_start = info->args[2]; 1030 // We have to offset by the start index. 1031 if (rl_start.is_const) { 1032 start_value = mir_graph_->ConstantValue(rl_start.orig_sreg); 1033 start_value = std::max(start_value, 0); 1034 1035 // Is the start > count? 1036 length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr); 1037 1038 if (start_value != 0) { 1039 OpRegImm(kOpSub, rs_rCX, start_value); 1040 } 1041 } else { 1042 // Runtime start index. 1043 rl_start = UpdateLocTyped(rl_start, kCoreReg); 1044 if (rl_start.location == kLocPhysReg) { 1045 // Handle "start index < 0" case. 1046 OpRegReg(kOpXor, rs_rBX, rs_rBX); 1047 OpRegReg(kOpCmp, rl_start.reg, rs_rBX); 1048 OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX); 1049 1050 // The length of the string should be greater than the start index. 1051 length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr); 1052 OpRegReg(kOpSub, rs_rCX, rl_start.reg); 1053 if (rl_start.reg == rs_rDI) { 1054 // The special case. We will use EDI further, so lets put start index to stack. 1055 NewLIR1(kX86Push32R, rs_rDI.GetReg()); 1056 is_index_on_stack = true; 1057 } 1058 } else { 1059 // Load the start index from stack, remembering that we pushed EDI. 1060 int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t); 1061 Load32Disp(rs_rX86_SP, displacement, rs_rBX); 1062 OpRegReg(kOpXor, rs_rDI, rs_rDI); 1063 OpRegReg(kOpCmp, rs_rBX, rs_rDI); 1064 OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI); 1065 1066 length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr); 1067 OpRegReg(kOpSub, rs_rCX, rs_rBX); 1068 // Put the start index to stack. 1069 NewLIR1(kX86Push32R, rs_rBX.GetReg()); 1070 is_index_on_stack = true; 1071 } 1072 } 1073 } 1074 DCHECK(length_compare != nullptr); 1075 1076 // ECX now contains the count in words to be searched. 1077 1078 // Load the address of the string into EBX. 1079 // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET. 1080 Load32Disp(rs_rDX, value_offset, rs_rDI); 1081 Load32Disp(rs_rDX, offset_offset, rs_rBX); 1082 OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset); 1083 1084 // Now compute into EDI where the search will start. 1085 if (zero_based || rl_start.is_const) { 1086 if (start_value == 0) { 1087 OpRegCopy(rs_rDI, rs_rBX); 1088 } else { 1089 NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value); 1090 } 1091 } else { 1092 if (is_index_on_stack == true) { 1093 // Load the start index from stack. 1094 NewLIR1(kX86Pop32R, rs_rDX.GetReg()); 1095 OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0); 1096 } else { 1097 OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0); 1098 } 1099 } 1100 1101 // EDI now contains the start of the string to be searched. 1102 // We are all prepared to do the search for the character. 1103 NewLIR0(kX86RepneScasw); 1104 1105 // Did we find a match? 1106 LIR* failed_branch = OpCondBranch(kCondNe, nullptr); 1107 1108 // yes, we matched. Compute the index of the result. 1109 // index = ((curr_ptr - orig_ptr) / 2) - 1. 1110 OpRegReg(kOpSub, rs_rDI, rs_rBX); 1111 OpRegImm(kOpAsr, rs_rDI, 1); 1112 NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1); 1113 LIR *all_done = NewLIR1(kX86Jmp8, 0); 1114 1115 // Failed to match; return -1. 1116 LIR *not_found = NewLIR0(kPseudoTargetLabel); 1117 length_compare->target = not_found; 1118 failed_branch->target = not_found; 1119 LoadConstantNoClobber(rl_return.reg, -1); 1120 1121 // And join up at the end. 1122 all_done->target = NewLIR0(kPseudoTargetLabel); 1123 // Restore EDI from the stack. 1124 NewLIR1(kX86Pop32R, rs_rDI.GetReg()); 1125 1126 // Out of line code returns here. 1127 if (slowpath_branch != nullptr) { 1128 LIR *return_point = NewLIR0(kPseudoTargetLabel); 1129 AddIntrinsicSlowPath(info, slowpath_branch, return_point); 1130 } 1131 1132 StoreValue(rl_dest, rl_return); 1133 return true; 1134} 1135 1136/* 1137 * @brief Enter an 'advance LOC' into the FDE buffer 1138 * @param buf FDE buffer. 1139 * @param increment Amount by which to increase the current location. 1140 */ 1141static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) { 1142 if (increment < 64) { 1143 // Encoding in opcode. 1144 buf.push_back(0x1 << 6 | increment); 1145 } else if (increment < 256) { 1146 // Single byte delta. 1147 buf.push_back(0x02); 1148 buf.push_back(increment); 1149 } else if (increment < 256 * 256) { 1150 // Two byte delta. 1151 buf.push_back(0x03); 1152 buf.push_back(increment & 0xff); 1153 buf.push_back((increment >> 8) & 0xff); 1154 } else { 1155 // Four byte delta. 1156 buf.push_back(0x04); 1157 PushWord(buf, increment); 1158 } 1159} 1160 1161 1162std::vector<uint8_t>* X86CFIInitialization() { 1163 return X86Mir2Lir::ReturnCommonCallFrameInformation(); 1164} 1165 1166std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() { 1167 std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>; 1168 1169 // Length of the CIE (except for this field). 1170 PushWord(*cfi_info, 16); 1171 1172 // CIE id. 1173 PushWord(*cfi_info, 0xFFFFFFFFU); 1174 1175 // Version: 3. 1176 cfi_info->push_back(0x03); 1177 1178 // Augmentation: empty string. 1179 cfi_info->push_back(0x0); 1180 1181 // Code alignment: 1. 1182 cfi_info->push_back(0x01); 1183 1184 // Data alignment: -4. 1185 cfi_info->push_back(0x7C); 1186 1187 // Return address register (R8). 1188 cfi_info->push_back(0x08); 1189 1190 // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4. 1191 cfi_info->push_back(0x0C); 1192 cfi_info->push_back(0x04); 1193 cfi_info->push_back(0x04); 1194 1195 // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);. 1196 cfi_info->push_back(0x2 << 6 | 0x08); 1197 cfi_info->push_back(0x01); 1198 1199 // And 2 Noops to align to 4 byte boundary. 1200 cfi_info->push_back(0x0); 1201 cfi_info->push_back(0x0); 1202 1203 DCHECK_EQ(cfi_info->size() & 3, 0U); 1204 return cfi_info; 1205} 1206 1207static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) { 1208 uint8_t buffer[12]; 1209 uint8_t *ptr = EncodeUnsignedLeb128(buffer, value); 1210 for (uint8_t *p = buffer; p < ptr; p++) { 1211 buf.push_back(*p); 1212 } 1213} 1214 1215std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() { 1216 std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>; 1217 1218 // Generate the FDE for the method. 1219 DCHECK_NE(data_offset_, 0U); 1220 1221 // Length (will be filled in later in this routine). 1222 PushWord(*cfi_info, 0); 1223 1224 // CIE_pointer (can be filled in by linker); might be left at 0 if there is only 1225 // one CIE for the whole debug_frame section. 1226 PushWord(*cfi_info, 0); 1227 1228 // 'initial_location' (filled in by linker). 1229 PushWord(*cfi_info, 0); 1230 1231 // 'address_range' (number of bytes in the method). 1232 PushWord(*cfi_info, data_offset_); 1233 1234 // The instructions in the FDE. 1235 if (stack_decrement_ != nullptr) { 1236 // Advance LOC to just past the stack decrement. 1237 uint32_t pc = NEXT_LIR(stack_decrement_)->offset; 1238 AdvanceLoc(*cfi_info, pc); 1239 1240 // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size. 1241 cfi_info->push_back(0x0e); 1242 EncodeUnsignedLeb128(*cfi_info, frame_size_); 1243 1244 // We continue with that stack until the epilogue. 1245 if (stack_increment_ != nullptr) { 1246 uint32_t new_pc = NEXT_LIR(stack_increment_)->offset; 1247 AdvanceLoc(*cfi_info, new_pc - pc); 1248 1249 // We probably have code snippets after the epilogue, so save the 1250 // current state: DW_CFA_remember_state. 1251 cfi_info->push_back(0x0a); 1252 1253 // We have now popped the stack: DW_CFA_def_cfa_offset 4. There is only the return 1254 // PC on the stack now. 1255 cfi_info->push_back(0x0e); 1256 EncodeUnsignedLeb128(*cfi_info, 4); 1257 1258 // Everything after that is the same as before the epilogue. 1259 // Stack bump was followed by RET instruction. 1260 LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_)); 1261 if (post_ret_insn != nullptr) { 1262 pc = new_pc; 1263 new_pc = post_ret_insn->offset; 1264 AdvanceLoc(*cfi_info, new_pc - pc); 1265 // Restore the state: DW_CFA_restore_state. 1266 cfi_info->push_back(0x0b); 1267 } 1268 } 1269 } 1270 1271 // Padding to a multiple of 4 1272 while ((cfi_info->size() & 3) != 0) { 1273 // DW_CFA_nop is encoded as 0. 1274 cfi_info->push_back(0); 1275 } 1276 1277 // Set the length of the FDE inside the generated bytes. 1278 uint32_t length = cfi_info->size() - 4; 1279 (*cfi_info)[0] = length; 1280 (*cfi_info)[1] = length >> 8; 1281 (*cfi_info)[2] = length >> 16; 1282 (*cfi_info)[3] = length >> 24; 1283 return cfi_info; 1284} 1285 1286void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { 1287 switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { 1288 case kMirOpConstVector: 1289 GenConst128(bb, mir); 1290 break; 1291 case kMirOpMoveVector: 1292 GenMoveVector(bb, mir); 1293 break; 1294 case kMirOpPackedMultiply: 1295 GenMultiplyVector(bb, mir); 1296 break; 1297 case kMirOpPackedAddition: 1298 GenAddVector(bb, mir); 1299 break; 1300 case kMirOpPackedSubtract: 1301 GenSubtractVector(bb, mir); 1302 break; 1303 case kMirOpPackedShiftLeft: 1304 GenShiftLeftVector(bb, mir); 1305 break; 1306 case kMirOpPackedSignedShiftRight: 1307 GenSignedShiftRightVector(bb, mir); 1308 break; 1309 case kMirOpPackedUnsignedShiftRight: 1310 GenUnsignedShiftRightVector(bb, mir); 1311 break; 1312 case kMirOpPackedAnd: 1313 GenAndVector(bb, mir); 1314 break; 1315 case kMirOpPackedOr: 1316 GenOrVector(bb, mir); 1317 break; 1318 case kMirOpPackedXor: 1319 GenXorVector(bb, mir); 1320 break; 1321 case kMirOpPackedAddReduce: 1322 GenAddReduceVector(bb, mir); 1323 break; 1324 case kMirOpPackedReduce: 1325 GenReduceVector(bb, mir); 1326 break; 1327 case kMirOpPackedSet: 1328 GenSetVector(bb, mir); 1329 break; 1330 default: 1331 break; 1332 } 1333} 1334 1335void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { 1336 int type_size = mir->dalvikInsn.vA; 1337 // We support 128 bit vectors. 1338 DCHECK_EQ(type_size & 0xFFFF, 128); 1339 RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); 1340 uint32_t *args = mir->dalvikInsn.arg; 1341 int reg = rs_dest.GetReg(); 1342 // Check for all 0 case. 1343 if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) { 1344 NewLIR2(kX86XorpsRR, reg, reg); 1345 return; 1346 } 1347 // Okay, load it from the constant vector area. 1348 LIR *data_target = ScanVectorLiteral(mir); 1349 if (data_target == nullptr) { 1350 data_target = AddVectorLiteral(mir); 1351 } 1352 1353 // Address the start of the method. 1354 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 1355 rl_method = LoadValue(rl_method, kCoreReg); 1356 1357 // Load the proper value from the literal area. 1358 // We don't know the proper offset for the value, so pick one that will force 1359 // 4 byte offset. We will fix this up in the assembler later to have the right 1360 // value. 1361 LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */); 1362 load->flags.fixup = kFixupLoad; 1363 load->target = data_target; 1364 SetMemRefType(load, true, kLiteral); 1365} 1366 1367void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) { 1368 // We only support 128 bit registers. 1369 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1370 RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); 1371 RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC); 1372 NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg()); 1373} 1374 1375void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) { 1376 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1377 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1378 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1379 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1380 int opcode = 0; 1381 switch (opsize) { 1382 case k32: 1383 opcode = kX86PmulldRR; 1384 break; 1385 case kSignedHalf: 1386 opcode = kX86PmullwRR; 1387 break; 1388 case kSingle: 1389 opcode = kX86MulpsRR; 1390 break; 1391 case kDouble: 1392 opcode = kX86MulpdRR; 1393 break; 1394 default: 1395 LOG(FATAL) << "Unsupported vector multiply " << opsize; 1396 break; 1397 } 1398 NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1399} 1400 1401void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) { 1402 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1403 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1404 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1405 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1406 int opcode = 0; 1407 switch (opsize) { 1408 case k32: 1409 opcode = kX86PadddRR; 1410 break; 1411 case kSignedHalf: 1412 case kUnsignedHalf: 1413 opcode = kX86PaddwRR; 1414 break; 1415 case kUnsignedByte: 1416 case kSignedByte: 1417 opcode = kX86PaddbRR; 1418 break; 1419 case kSingle: 1420 opcode = kX86AddpsRR; 1421 break; 1422 case kDouble: 1423 opcode = kX86AddpdRR; 1424 break; 1425 default: 1426 LOG(FATAL) << "Unsupported vector addition " << opsize; 1427 break; 1428 } 1429 NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1430} 1431 1432void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) { 1433 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1434 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1435 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1436 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1437 int opcode = 0; 1438 switch (opsize) { 1439 case k32: 1440 opcode = kX86PsubdRR; 1441 break; 1442 case kSignedHalf: 1443 case kUnsignedHalf: 1444 opcode = kX86PsubwRR; 1445 break; 1446 case kUnsignedByte: 1447 case kSignedByte: 1448 opcode = kX86PsubbRR; 1449 break; 1450 case kSingle: 1451 opcode = kX86SubpsRR; 1452 break; 1453 case kDouble: 1454 opcode = kX86SubpdRR; 1455 break; 1456 default: 1457 LOG(FATAL) << "Unsupported vector subtraction " << opsize; 1458 break; 1459 } 1460 NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1461} 1462 1463void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) { 1464 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1465 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1466 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1467 int imm = mir->dalvikInsn.vC; 1468 int opcode = 0; 1469 switch (opsize) { 1470 case k32: 1471 opcode = kX86PslldRI; 1472 break; 1473 case k64: 1474 opcode = kX86PsllqRI; 1475 break; 1476 case kSignedHalf: 1477 case kUnsignedHalf: 1478 opcode = kX86PsllwRI; 1479 break; 1480 default: 1481 LOG(FATAL) << "Unsupported vector shift left " << opsize; 1482 break; 1483 } 1484 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1485} 1486 1487void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) { 1488 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1489 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1490 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1491 int imm = mir->dalvikInsn.vC; 1492 int opcode = 0; 1493 switch (opsize) { 1494 case k32: 1495 opcode = kX86PsradRI; 1496 break; 1497 case kSignedHalf: 1498 case kUnsignedHalf: 1499 opcode = kX86PsrawRI; 1500 break; 1501 default: 1502 LOG(FATAL) << "Unsupported vector signed shift right " << opsize; 1503 break; 1504 } 1505 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1506} 1507 1508void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) { 1509 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1510 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1511 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1512 int imm = mir->dalvikInsn.vC; 1513 int opcode = 0; 1514 switch (opsize) { 1515 case k32: 1516 opcode = kX86PsrldRI; 1517 break; 1518 case k64: 1519 opcode = kX86PsrlqRI; 1520 break; 1521 case kSignedHalf: 1522 case kUnsignedHalf: 1523 opcode = kX86PsrlwRI; 1524 break; 1525 default: 1526 LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize; 1527 break; 1528 } 1529 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1530} 1531 1532void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) { 1533 // We only support 128 bit registers. 1534 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1535 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1536 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1537 NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1538} 1539 1540void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) { 1541 // We only support 128 bit registers. 1542 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1543 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1544 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1545 NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1546} 1547 1548void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) { 1549 // We only support 128 bit registers. 1550 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1551 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1552 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1553 NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1554} 1555 1556void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) { 1557 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1558 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1559 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1560 int imm = mir->dalvikInsn.vC; 1561 int opcode = 0; 1562 switch (opsize) { 1563 case k32: 1564 opcode = kX86PhadddRR; 1565 break; 1566 case kSignedHalf: 1567 case kUnsignedHalf: 1568 opcode = kX86PhaddwRR; 1569 break; 1570 default: 1571 LOG(FATAL) << "Unsupported vector add reduce " << opsize; 1572 break; 1573 } 1574 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1575} 1576 1577void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) { 1578 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1579 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1580 RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB); 1581 int index = mir->dalvikInsn.arg[0]; 1582 int opcode = 0; 1583 switch (opsize) { 1584 case k32: 1585 opcode = kX86PextrdRRI; 1586 break; 1587 case kSignedHalf: 1588 case kUnsignedHalf: 1589 opcode = kX86PextrwRRI; 1590 break; 1591 case kUnsignedByte: 1592 case kSignedByte: 1593 opcode = kX86PextrbRRI; 1594 break; 1595 default: 1596 LOG(FATAL) << "Unsupported vector reduce " << opsize; 1597 break; 1598 } 1599 // We need to extract to a GPR. 1600 RegStorage temp = AllocTemp(); 1601 NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index); 1602 1603 // Assume that the destination VR is in the def for the mir. 1604 RegLocation rl_dest = mir_graph_->GetDest(mir); 1605 RegLocation rl_temp = 1606 {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG}; 1607 StoreValue(rl_dest, rl_temp); 1608} 1609 1610void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) { 1611 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1612 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1613 RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); 1614 int op_low = 0, op_high = 0; 1615 switch (opsize) { 1616 case k32: 1617 op_low = kX86PshufdRRI; 1618 break; 1619 case kSignedHalf: 1620 case kUnsignedHalf: 1621 // Handles low quadword. 1622 op_low = kX86PshuflwRRI; 1623 // Handles upper quadword. 1624 op_high = kX86PshufdRRI; 1625 break; 1626 default: 1627 LOG(FATAL) << "Unsupported vector set " << opsize; 1628 break; 1629 } 1630 1631 // Load the value from the VR into a GPR. 1632 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 1633 rl_src = LoadValue(rl_src, kCoreReg); 1634 1635 // Load the value into the XMM register. 1636 NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg()); 1637 1638 // Now shuffle the value across the destination. 1639 NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0); 1640 1641 // And then repeat as needed. 1642 if (op_high != 0) { 1643 NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0); 1644 } 1645} 1646 1647 1648LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) { 1649 int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); 1650 for (LIR *p = const_vectors_; p != nullptr; p = p->next) { 1651 if (args[0] == p->operands[0] && args[1] == p->operands[1] && 1652 args[2] == p->operands[2] && args[3] == p->operands[3]) { 1653 return p; 1654 } 1655 } 1656 return nullptr; 1657} 1658 1659LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) { 1660 LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData)); 1661 int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); 1662 new_value->operands[0] = args[0]; 1663 new_value->operands[1] = args[1]; 1664 new_value->operands[2] = args[2]; 1665 new_value->operands[3] = args[3]; 1666 new_value->next = const_vectors_; 1667 if (const_vectors_ == nullptr) { 1668 estimated_native_code_size_ += 12; // Amount needed to align to 16 byte boundary. 1669 } 1670 estimated_native_code_size_ += 16; // Space for one vector. 1671 const_vectors_ = new_value; 1672 return new_value; 1673} 1674 1675} // namespace art 1676