target_x86.cc revision b5860fb459f1ed71f39d8a87b45bee6727d79fe8
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include <string> 18#include <inttypes.h> 19 20#include "codegen_x86.h" 21#include "dex/compiler_internals.h" 22#include "dex/quick/mir_to_lir-inl.h" 23#include "dex/reg_storage_eq.h" 24#include "mirror/array.h" 25#include "mirror/string.h" 26#include "x86_lir.h" 27 28namespace art { 29 30static constexpr RegStorage core_regs_arr_32[] = { 31 rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI, 32}; 33static constexpr RegStorage core_regs_arr_64[] = { 34 rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI, 35 rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15 36}; 37static constexpr RegStorage core_regs_arr_64q[] = { 38 rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q, 39 rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q 40}; 41static constexpr RegStorage sp_regs_arr_32[] = { 42 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 43}; 44static constexpr RegStorage sp_regs_arr_64[] = { 45 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 46 rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 47}; 48static constexpr RegStorage dp_regs_arr_32[] = { 49 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 50}; 51static constexpr RegStorage dp_regs_arr_64[] = { 52 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 53 rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 54}; 55static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; 56static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32}; 57static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; 58static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX}; 59static constexpr RegStorage core_temps_arr_64[] = { 60 rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, 61 rs_r8, rs_r9, rs_r10, rs_r11 62}; 63static constexpr RegStorage core_temps_arr_64q[] = { 64 rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, 65 rs_r8q, rs_r9q, rs_r10q, rs_r11q 66}; 67static constexpr RegStorage sp_temps_arr_32[] = { 68 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 69}; 70static constexpr RegStorage sp_temps_arr_64[] = { 71 rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, 72 rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 73}; 74static constexpr RegStorage dp_temps_arr_32[] = { 75 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 76}; 77static constexpr RegStorage dp_temps_arr_64[] = { 78 rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, 79 rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 80}; 81 82static constexpr RegStorage xp_temps_arr_32[] = { 83 rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, 84}; 85static constexpr RegStorage xp_temps_arr_64[] = { 86 rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, 87 rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 88}; 89 90static constexpr ArrayRef<const RegStorage> empty_pool; 91static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32); 92static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64); 93static constexpr ArrayRef<const RegStorage> core_regs_64q(core_regs_arr_64q); 94static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32); 95static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64); 96static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32); 97static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64); 98static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32); 99static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64); 100static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q); 101static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32); 102static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64); 103static constexpr ArrayRef<const RegStorage> core_temps_64q(core_temps_arr_64q); 104static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32); 105static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64); 106static constexpr ArrayRef<const RegStorage> dp_temps_32(dp_temps_arr_32); 107static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64); 108 109static constexpr ArrayRef<const RegStorage> xp_temps_32(xp_temps_arr_32); 110static constexpr ArrayRef<const RegStorage> xp_temps_64(xp_temps_arr_64); 111 112RegStorage rs_rX86_SP; 113 114X86NativeRegisterPool rX86_ARG0; 115X86NativeRegisterPool rX86_ARG1; 116X86NativeRegisterPool rX86_ARG2; 117X86NativeRegisterPool rX86_ARG3; 118X86NativeRegisterPool rX86_ARG4; 119X86NativeRegisterPool rX86_ARG5; 120X86NativeRegisterPool rX86_FARG0; 121X86NativeRegisterPool rX86_FARG1; 122X86NativeRegisterPool rX86_FARG2; 123X86NativeRegisterPool rX86_FARG3; 124X86NativeRegisterPool rX86_FARG4; 125X86NativeRegisterPool rX86_FARG5; 126X86NativeRegisterPool rX86_FARG6; 127X86NativeRegisterPool rX86_FARG7; 128X86NativeRegisterPool rX86_RET0; 129X86NativeRegisterPool rX86_RET1; 130X86NativeRegisterPool rX86_INVOKE_TGT; 131X86NativeRegisterPool rX86_COUNT; 132 133RegStorage rs_rX86_ARG0; 134RegStorage rs_rX86_ARG1; 135RegStorage rs_rX86_ARG2; 136RegStorage rs_rX86_ARG3; 137RegStorage rs_rX86_ARG4; 138RegStorage rs_rX86_ARG5; 139RegStorage rs_rX86_FARG0; 140RegStorage rs_rX86_FARG1; 141RegStorage rs_rX86_FARG2; 142RegStorage rs_rX86_FARG3; 143RegStorage rs_rX86_FARG4; 144RegStorage rs_rX86_FARG5; 145RegStorage rs_rX86_FARG6; 146RegStorage rs_rX86_FARG7; 147RegStorage rs_rX86_RET0; 148RegStorage rs_rX86_RET1; 149RegStorage rs_rX86_INVOKE_TGT; 150RegStorage rs_rX86_COUNT; 151 152RegLocation X86Mir2Lir::LocCReturn() { 153 return x86_loc_c_return; 154} 155 156RegLocation X86Mir2Lir::LocCReturnRef() { 157 // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported. 158 return x86_loc_c_return; 159} 160 161RegLocation X86Mir2Lir::LocCReturnWide() { 162 return cu_->target64 ? x86_64_loc_c_return_wide : x86_loc_c_return_wide; 163} 164 165RegLocation X86Mir2Lir::LocCReturnFloat() { 166 return x86_loc_c_return_float; 167} 168 169RegLocation X86Mir2Lir::LocCReturnDouble() { 170 return x86_loc_c_return_double; 171} 172 173// Return a target-dependent special register. 174RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) { 175 RegStorage res_reg = RegStorage::InvalidReg(); 176 switch (reg) { 177 case kSelf: res_reg = RegStorage::InvalidReg(); break; 178 case kSuspend: res_reg = RegStorage::InvalidReg(); break; 179 case kLr: res_reg = RegStorage::InvalidReg(); break; 180 case kPc: res_reg = RegStorage::InvalidReg(); break; 181 case kSp: res_reg = rs_rX86_SP; break; 182 case kArg0: res_reg = rs_rX86_ARG0; break; 183 case kArg1: res_reg = rs_rX86_ARG1; break; 184 case kArg2: res_reg = rs_rX86_ARG2; break; 185 case kArg3: res_reg = rs_rX86_ARG3; break; 186 case kArg4: res_reg = rs_rX86_ARG4; break; 187 case kArg5: res_reg = rs_rX86_ARG5; break; 188 case kFArg0: res_reg = rs_rX86_FARG0; break; 189 case kFArg1: res_reg = rs_rX86_FARG1; break; 190 case kFArg2: res_reg = rs_rX86_FARG2; break; 191 case kFArg3: res_reg = rs_rX86_FARG3; break; 192 case kFArg4: res_reg = rs_rX86_FARG4; break; 193 case kFArg5: res_reg = rs_rX86_FARG5; break; 194 case kFArg6: res_reg = rs_rX86_FARG6; break; 195 case kFArg7: res_reg = rs_rX86_FARG7; break; 196 case kRet0: res_reg = rs_rX86_RET0; break; 197 case kRet1: res_reg = rs_rX86_RET1; break; 198 case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break; 199 case kHiddenArg: res_reg = rs_rAX; break; 200 case kHiddenFpArg: DCHECK(!cu_->target64); res_reg = rs_fr0; break; 201 case kCount: res_reg = rs_rX86_COUNT; break; 202 default: res_reg = RegStorage::InvalidReg(); 203 } 204 return res_reg; 205} 206 207/* 208 * Decode the register id. 209 */ 210ResourceMask X86Mir2Lir::GetRegMaskCommon(const RegStorage& reg) const { 211 /* Double registers in x86 are just a single FP register. This is always just a single bit. */ 212 return ResourceMask::Bit( 213 /* FP register starts at bit position 16 */ 214 ((reg.IsFloat() || reg.StorageSize() > 8) ? kX86FPReg0 : 0) + reg.GetRegNum()); 215} 216 217ResourceMask X86Mir2Lir::GetPCUseDefEncoding() const { 218 /* 219 * FIXME: might make sense to use a virtual resource encoding bit for pc. Might be 220 * able to clean up some of the x86/Arm_Mips differences 221 */ 222 LOG(FATAL) << "Unexpected call to GetPCUseDefEncoding for x86"; 223 return kEncodeNone; 224} 225 226void X86Mir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags, 227 ResourceMask* use_mask, ResourceMask* def_mask) { 228 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); 229 DCHECK(!lir->flags.use_def_invalid); 230 231 // X86-specific resource map setup here. 232 if (flags & REG_USE_SP) { 233 use_mask->SetBit(kX86RegSP); 234 } 235 236 if (flags & REG_DEF_SP) { 237 def_mask->SetBit(kX86RegSP); 238 } 239 240 if (flags & REG_DEFA) { 241 SetupRegMask(def_mask, rs_rAX.GetReg()); 242 } 243 244 if (flags & REG_DEFD) { 245 SetupRegMask(def_mask, rs_rDX.GetReg()); 246 } 247 if (flags & REG_USEA) { 248 SetupRegMask(use_mask, rs_rAX.GetReg()); 249 } 250 251 if (flags & REG_USEC) { 252 SetupRegMask(use_mask, rs_rCX.GetReg()); 253 } 254 255 if (flags & REG_USED) { 256 SetupRegMask(use_mask, rs_rDX.GetReg()); 257 } 258 259 if (flags & REG_USEB) { 260 SetupRegMask(use_mask, rs_rBX.GetReg()); 261 } 262 263 // Fixup hard to describe instruction: Uses rAX, rCX, rDI; sets rDI. 264 if (lir->opcode == kX86RepneScasw) { 265 SetupRegMask(use_mask, rs_rAX.GetReg()); 266 SetupRegMask(use_mask, rs_rCX.GetReg()); 267 SetupRegMask(use_mask, rs_rDI.GetReg()); 268 SetupRegMask(def_mask, rs_rDI.GetReg()); 269 } 270 271 if (flags & USE_FP_STACK) { 272 use_mask->SetBit(kX86FPStack); 273 def_mask->SetBit(kX86FPStack); 274 } 275} 276 277/* For dumping instructions */ 278static const char* x86RegName[] = { 279 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", 280 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" 281}; 282 283static const char* x86CondName[] = { 284 "O", 285 "NO", 286 "B/NAE/C", 287 "NB/AE/NC", 288 "Z/EQ", 289 "NZ/NE", 290 "BE/NA", 291 "NBE/A", 292 "S", 293 "NS", 294 "P/PE", 295 "NP/PO", 296 "L/NGE", 297 "NL/GE", 298 "LE/NG", 299 "NLE/G" 300}; 301 302/* 303 * Interpret a format string and build a string no longer than size 304 * See format key in Assemble.cc. 305 */ 306std::string X86Mir2Lir::BuildInsnString(const char *fmt, LIR *lir, unsigned char* base_addr) { 307 std::string buf; 308 size_t i = 0; 309 size_t fmt_len = strlen(fmt); 310 while (i < fmt_len) { 311 if (fmt[i] != '!') { 312 buf += fmt[i]; 313 i++; 314 } else { 315 i++; 316 DCHECK_LT(i, fmt_len); 317 char operand_number_ch = fmt[i]; 318 i++; 319 if (operand_number_ch == '!') { 320 buf += "!"; 321 } else { 322 int operand_number = operand_number_ch - '0'; 323 DCHECK_LT(operand_number, 6); // Expect upto 6 LIR operands. 324 DCHECK_LT(i, fmt_len); 325 int operand = lir->operands[operand_number]; 326 switch (fmt[i]) { 327 case 'c': 328 DCHECK_LT(static_cast<size_t>(operand), sizeof(x86CondName)); 329 buf += x86CondName[operand]; 330 break; 331 case 'd': 332 buf += StringPrintf("%d", operand); 333 break; 334 case 'p': { 335 EmbeddedData *tab_rec = reinterpret_cast<EmbeddedData*>(UnwrapPointer(operand)); 336 buf += StringPrintf("0x%08x", tab_rec->offset); 337 break; 338 } 339 case 'r': 340 if (RegStorage::IsFloat(operand)) { 341 int fp_reg = RegStorage::RegNum(operand); 342 buf += StringPrintf("xmm%d", fp_reg); 343 } else { 344 int reg_num = RegStorage::RegNum(operand); 345 DCHECK_LT(static_cast<size_t>(reg_num), sizeof(x86RegName)); 346 buf += x86RegName[reg_num]; 347 } 348 break; 349 case 't': 350 buf += StringPrintf("0x%08" PRIxPTR " (L%p)", 351 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand, 352 lir->target); 353 break; 354 default: 355 buf += StringPrintf("DecodeError '%c'", fmt[i]); 356 break; 357 } 358 i++; 359 } 360 } 361 } 362 return buf; 363} 364 365void X86Mir2Lir::DumpResourceMask(LIR *x86LIR, const ResourceMask& mask, const char *prefix) { 366 char buf[256]; 367 buf[0] = 0; 368 369 if (mask.Equals(kEncodeAll)) { 370 strcpy(buf, "all"); 371 } else { 372 char num[8]; 373 int i; 374 375 for (i = 0; i < kX86RegEnd; i++) { 376 if (mask.HasBit(i)) { 377 snprintf(num, arraysize(num), "%d ", i); 378 strcat(buf, num); 379 } 380 } 381 382 if (mask.HasBit(ResourceMask::kCCode)) { 383 strcat(buf, "cc "); 384 } 385 /* Memory bits */ 386 if (x86LIR && (mask.HasBit(ResourceMask::kDalvikReg))) { 387 snprintf(buf + strlen(buf), arraysize(buf) - strlen(buf), "dr%d%s", 388 DECODE_ALIAS_INFO_REG(x86LIR->flags.alias_info), 389 (DECODE_ALIAS_INFO_WIDE(x86LIR->flags.alias_info)) ? "(+1)" : ""); 390 } 391 if (mask.HasBit(ResourceMask::kLiteral)) { 392 strcat(buf, "lit "); 393 } 394 395 if (mask.HasBit(ResourceMask::kHeapRef)) { 396 strcat(buf, "heap "); 397 } 398 if (mask.HasBit(ResourceMask::kMustNotAlias)) { 399 strcat(buf, "noalias "); 400 } 401 } 402 if (buf[0]) { 403 LOG(INFO) << prefix << ": " << buf; 404 } 405} 406 407void X86Mir2Lir::AdjustSpillMask() { 408 // Adjustment for LR spilling, x86 has no LR so nothing to do here 409 core_spill_mask_ |= (1 << rs_rRET.GetRegNum()); 410 num_core_spills_++; 411} 412 413RegStorage X86Mir2Lir::AllocateByteRegister() { 414 RegStorage reg = AllocTypedTemp(false, kCoreReg); 415 if (!cu_->target64) { 416 DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum()); 417 } 418 return reg; 419} 420 421bool X86Mir2Lir::IsByteRegister(RegStorage reg) { 422 return cu_->target64 || reg.GetRegNum() < rs_rX86_SP.GetRegNum(); 423} 424 425/* Clobber all regs that might be used by an external C call */ 426void X86Mir2Lir::ClobberCallerSave() { 427 Clobber(rs_rAX); 428 Clobber(rs_rCX); 429 Clobber(rs_rDX); 430 Clobber(rs_rBX); 431 432 Clobber(rs_fr0); 433 Clobber(rs_fr1); 434 Clobber(rs_fr2); 435 Clobber(rs_fr3); 436 Clobber(rs_fr4); 437 Clobber(rs_fr5); 438 Clobber(rs_fr6); 439 Clobber(rs_fr7); 440 441 if (cu_->target64) { 442 Clobber(rs_r8); 443 Clobber(rs_r9); 444 Clobber(rs_r10); 445 Clobber(rs_r11); 446 447 Clobber(rs_fr8); 448 Clobber(rs_fr9); 449 Clobber(rs_fr10); 450 Clobber(rs_fr11); 451 Clobber(rs_fr12); 452 Clobber(rs_fr13); 453 Clobber(rs_fr14); 454 Clobber(rs_fr15); 455 } 456} 457 458RegLocation X86Mir2Lir::GetReturnWideAlt() { 459 RegLocation res = LocCReturnWide(); 460 DCHECK(res.reg.GetLowReg() == rs_rAX.GetReg()); 461 DCHECK(res.reg.GetHighReg() == rs_rDX.GetReg()); 462 Clobber(rs_rAX); 463 Clobber(rs_rDX); 464 MarkInUse(rs_rAX); 465 MarkInUse(rs_rDX); 466 MarkWide(res.reg); 467 return res; 468} 469 470RegLocation X86Mir2Lir::GetReturnAlt() { 471 RegLocation res = LocCReturn(); 472 res.reg.SetReg(rs_rDX.GetReg()); 473 Clobber(rs_rDX); 474 MarkInUse(rs_rDX); 475 return res; 476} 477 478/* To be used when explicitly managing register use */ 479void X86Mir2Lir::LockCallTemps() { 480 LockTemp(rs_rX86_ARG0); 481 LockTemp(rs_rX86_ARG1); 482 LockTemp(rs_rX86_ARG2); 483 LockTemp(rs_rX86_ARG3); 484 if (cu_->target64) { 485 LockTemp(rs_rX86_ARG4); 486 LockTemp(rs_rX86_ARG5); 487 LockTemp(rs_rX86_FARG0); 488 LockTemp(rs_rX86_FARG1); 489 LockTemp(rs_rX86_FARG2); 490 LockTemp(rs_rX86_FARG3); 491 LockTemp(rs_rX86_FARG4); 492 LockTemp(rs_rX86_FARG5); 493 LockTemp(rs_rX86_FARG6); 494 LockTemp(rs_rX86_FARG7); 495 } 496} 497 498/* To be used when explicitly managing register use */ 499void X86Mir2Lir::FreeCallTemps() { 500 FreeTemp(rs_rX86_ARG0); 501 FreeTemp(rs_rX86_ARG1); 502 FreeTemp(rs_rX86_ARG2); 503 FreeTemp(rs_rX86_ARG3); 504 if (cu_->target64) { 505 FreeTemp(rs_rX86_ARG4); 506 FreeTemp(rs_rX86_ARG5); 507 FreeTemp(rs_rX86_FARG0); 508 FreeTemp(rs_rX86_FARG1); 509 FreeTemp(rs_rX86_FARG2); 510 FreeTemp(rs_rX86_FARG3); 511 FreeTemp(rs_rX86_FARG4); 512 FreeTemp(rs_rX86_FARG5); 513 FreeTemp(rs_rX86_FARG6); 514 FreeTemp(rs_rX86_FARG7); 515 } 516} 517 518bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { 519 switch (opcode) { 520 case kX86LockCmpxchgMR: 521 case kX86LockCmpxchgAR: 522 case kX86LockCmpxchg64M: 523 case kX86LockCmpxchg64A: 524 case kX86XchgMR: 525 case kX86Mfence: 526 // Atomic memory instructions provide full barrier. 527 return true; 528 default: 529 break; 530 } 531 532 // Conservative if cannot prove it provides full barrier. 533 return false; 534} 535 536bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { 537#if ANDROID_SMP != 0 538 // Start off with using the last LIR as the barrier. If it is not enough, then we will update it. 539 LIR* mem_barrier = last_lir_insn_; 540 541 bool ret = false; 542 /* 543 * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers 544 * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need 545 * to ensure is that there is a scheduling barrier in place. 546 */ 547 if (barrier_kind == kStoreLoad) { 548 // If no LIR exists already that can be used a barrier, then generate an mfence. 549 if (mem_barrier == nullptr) { 550 mem_barrier = NewLIR0(kX86Mfence); 551 ret = true; 552 } 553 554 // If last instruction does not provide full barrier, then insert an mfence. 555 if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) { 556 mem_barrier = NewLIR0(kX86Mfence); 557 ret = true; 558 } 559 } 560 561 // Now ensure that a scheduling barrier is in place. 562 if (mem_barrier == nullptr) { 563 GenBarrier(); 564 } else { 565 // Mark as a scheduling barrier. 566 DCHECK(!mem_barrier->flags.use_def_invalid); 567 mem_barrier->u.m.def_mask = &kEncodeAll; 568 } 569 return ret; 570#else 571 return false; 572#endif 573} 574 575void X86Mir2Lir::CompilerInitializeRegAlloc() { 576 if (cu_->target64) { 577 reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64, 578 dp_regs_64, reserved_regs_64, reserved_regs_64q, 579 core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64); 580 } else { 581 reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32, 582 dp_regs_32, reserved_regs_32, empty_pool, 583 core_temps_32, empty_pool, sp_temps_32, dp_temps_32); 584 } 585 586 // Target-specific adjustments. 587 588 // Add in XMM registers. 589 const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; 590 for (RegStorage reg : *xp_temps) { 591 RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); 592 reginfo_map_.Put(reg.GetReg(), info); 593 info->SetIsTemp(true); 594 } 595 596 // Alias single precision xmm to double xmms. 597 // TODO: as needed, add larger vector sizes - alias all to the largest. 598 GrowableArray<RegisterInfo*>::Iterator it(®_pool_->sp_regs_); 599 for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { 600 int sp_reg_num = info->GetReg().GetRegNum(); 601 RegStorage xp_reg = RegStorage::Solo128(sp_reg_num); 602 RegisterInfo* xp_reg_info = GetRegInfo(xp_reg); 603 // 128-bit xmm vector register's master storage should refer to itself. 604 DCHECK_EQ(xp_reg_info, xp_reg_info->Master()); 605 606 // Redirect 32-bit vector's master storage to 128-bit vector. 607 info->SetMaster(xp_reg_info); 608 609 RegStorage dp_reg = RegStorage::FloatSolo64(sp_reg_num); 610 RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); 611 // Redirect 64-bit vector's master storage to 128-bit vector. 612 dp_reg_info->SetMaster(xp_reg_info); 613 // Singles should show a single 32-bit mask bit, at first referring to the low half. 614 DCHECK_EQ(info->StorageMask(), 0x1U); 615 } 616 617 if (cu_->target64) { 618 // Alias 32bit W registers to corresponding 64bit X registers. 619 GrowableArray<RegisterInfo*>::Iterator w_it(®_pool_->core_regs_); 620 for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) { 621 int x_reg_num = info->GetReg().GetRegNum(); 622 RegStorage x_reg = RegStorage::Solo64(x_reg_num); 623 RegisterInfo* x_reg_info = GetRegInfo(x_reg); 624 // 64bit X register's master storage should refer to itself. 625 DCHECK_EQ(x_reg_info, x_reg_info->Master()); 626 // Redirect 32bit W master storage to 64bit X. 627 info->SetMaster(x_reg_info); 628 // 32bit W should show a single 32-bit mask bit, at first referring to the low half. 629 DCHECK_EQ(info->StorageMask(), 0x1U); 630 } 631 } 632 633 // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. 634 // TODO: adjust for x86/hard float calling convention. 635 reg_pool_->next_core_reg_ = 2; 636 reg_pool_->next_sp_reg_ = 2; 637 reg_pool_->next_dp_reg_ = 1; 638} 639 640void X86Mir2Lir::SpillCoreRegs() { 641 if (num_core_spills_ == 0) { 642 return; 643 } 644 // Spill mask not including fake return address register 645 uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); 646 int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); 647 for (int reg = 0; mask; mask >>= 1, reg++) { 648 if (mask & 0x1) { 649 StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); 650 offset += GetInstructionSetPointerSize(cu_->instruction_set); 651 } 652 } 653} 654 655void X86Mir2Lir::UnSpillCoreRegs() { 656 if (num_core_spills_ == 0) { 657 return; 658 } 659 // Spill mask not including fake return address register 660 uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); 661 int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); 662 for (int reg = 0; mask; mask >>= 1, reg++) { 663 if (mask & 0x1) { 664 LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); 665 offset += GetInstructionSetPointerSize(cu_->instruction_set); 666 } 667 } 668} 669 670bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) { 671 return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32); 672} 673 674bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) { 675 return true; 676} 677 678RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) { 679 // X86_64 can handle any size. 680 if (cu_->target64) { 681 if (size == kReference) { 682 return kRefReg; 683 } 684 return kCoreReg; 685 } 686 687 if (UNLIKELY(is_volatile)) { 688 // On x86, atomic 64-bit load/store requires an fp register. 689 // Smaller aligned load/store is atomic for both core and fp registers. 690 if (size == k64 || size == kDouble) { 691 return kFPReg; 692 } 693 } 694 return RegClassBySize(size); 695} 696 697X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) 698 : Mir2Lir(cu, mir_graph, arena), 699 base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false), 700 method_address_insns_(arena, 100, kGrowableArrayMisc), 701 class_type_address_insns_(arena, 100, kGrowableArrayMisc), 702 call_method_insns_(arena, 100, kGrowableArrayMisc), 703 stack_decrement_(nullptr), stack_increment_(nullptr), 704 const_vectors_(nullptr) { 705 store_method_addr_used_ = false; 706 if (kIsDebugBuild) { 707 for (int i = 0; i < kX86Last; i++) { 708 if (X86Mir2Lir::EncodingMap[i].opcode != i) { 709 LOG(FATAL) << "Encoding order for " << X86Mir2Lir::EncodingMap[i].name 710 << " is wrong: expecting " << i << ", seeing " 711 << static_cast<int>(X86Mir2Lir::EncodingMap[i].opcode); 712 } 713 } 714 } 715 if (cu_->target64) { 716 rs_rX86_SP = rs_rX86_SP_64; 717 718 rs_rX86_ARG0 = rs_rDI; 719 rs_rX86_ARG1 = rs_rSI; 720 rs_rX86_ARG2 = rs_rDX; 721 rs_rX86_ARG3 = rs_rCX; 722 rs_rX86_ARG4 = rs_r8; 723 rs_rX86_ARG5 = rs_r9; 724 rs_rX86_FARG0 = rs_fr0; 725 rs_rX86_FARG1 = rs_fr1; 726 rs_rX86_FARG2 = rs_fr2; 727 rs_rX86_FARG3 = rs_fr3; 728 rs_rX86_FARG4 = rs_fr4; 729 rs_rX86_FARG5 = rs_fr5; 730 rs_rX86_FARG6 = rs_fr6; 731 rs_rX86_FARG7 = rs_fr7; 732 rX86_ARG0 = rDI; 733 rX86_ARG1 = rSI; 734 rX86_ARG2 = rDX; 735 rX86_ARG3 = rCX; 736 rX86_ARG4 = r8; 737 rX86_ARG5 = r9; 738 rX86_FARG0 = fr0; 739 rX86_FARG1 = fr1; 740 rX86_FARG2 = fr2; 741 rX86_FARG3 = fr3; 742 rX86_FARG4 = fr4; 743 rX86_FARG5 = fr5; 744 rX86_FARG6 = fr6; 745 rX86_FARG7 = fr7; 746 rs_rX86_INVOKE_TGT = rs_rDI; 747 } else { 748 rs_rX86_SP = rs_rX86_SP_32; 749 750 rs_rX86_ARG0 = rs_rAX; 751 rs_rX86_ARG1 = rs_rCX; 752 rs_rX86_ARG2 = rs_rDX; 753 rs_rX86_ARG3 = rs_rBX; 754 rs_rX86_ARG4 = RegStorage::InvalidReg(); 755 rs_rX86_ARG5 = RegStorage::InvalidReg(); 756 rs_rX86_FARG0 = rs_rAX; 757 rs_rX86_FARG1 = rs_rCX; 758 rs_rX86_FARG2 = rs_rDX; 759 rs_rX86_FARG3 = rs_rBX; 760 rs_rX86_FARG4 = RegStorage::InvalidReg(); 761 rs_rX86_FARG5 = RegStorage::InvalidReg(); 762 rs_rX86_FARG6 = RegStorage::InvalidReg(); 763 rs_rX86_FARG7 = RegStorage::InvalidReg(); 764 rX86_ARG0 = rAX; 765 rX86_ARG1 = rCX; 766 rX86_ARG2 = rDX; 767 rX86_ARG3 = rBX; 768 rX86_FARG0 = rAX; 769 rX86_FARG1 = rCX; 770 rX86_FARG2 = rDX; 771 rX86_FARG3 = rBX; 772 rs_rX86_INVOKE_TGT = rs_rAX; 773 // TODO(64): Initialize with invalid reg 774// rX86_ARG4 = RegStorage::InvalidReg(); 775// rX86_ARG5 = RegStorage::InvalidReg(); 776 } 777 rs_rX86_RET0 = rs_rAX; 778 rs_rX86_RET1 = rs_rDX; 779 rs_rX86_COUNT = rs_rCX; 780 rX86_RET0 = rAX; 781 rX86_RET1 = rDX; 782 rX86_INVOKE_TGT = rAX; 783 rX86_COUNT = rCX; 784} 785 786Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph, 787 ArenaAllocator* const arena) { 788 return new X86Mir2Lir(cu, mir_graph, arena); 789} 790 791// Not used in x86 792RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<4> offset) { 793 LOG(FATAL) << "Unexpected use of LoadHelper in x86"; 794 return RegStorage::InvalidReg(); 795} 796 797// Not used in x86 798RegStorage X86Mir2Lir::LoadHelper(ThreadOffset<8> offset) { 799 LOG(FATAL) << "Unexpected use of LoadHelper in x86"; 800 return RegStorage::InvalidReg(); 801} 802 803LIR* X86Mir2Lir::CheckSuspendUsingLoad() { 804 LOG(FATAL) << "Unexpected use of CheckSuspendUsingLoad in x86"; 805 return nullptr; 806} 807 808uint64_t X86Mir2Lir::GetTargetInstFlags(int opcode) { 809 DCHECK(!IsPseudoLirOp(opcode)); 810 return X86Mir2Lir::EncodingMap[opcode].flags; 811} 812 813const char* X86Mir2Lir::GetTargetInstName(int opcode) { 814 DCHECK(!IsPseudoLirOp(opcode)); 815 return X86Mir2Lir::EncodingMap[opcode].name; 816} 817 818const char* X86Mir2Lir::GetTargetInstFmt(int opcode) { 819 DCHECK(!IsPseudoLirOp(opcode)); 820 return X86Mir2Lir::EncodingMap[opcode].fmt; 821} 822 823void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) { 824 // Can we do this directly to memory? 825 rl_dest = UpdateLocWide(rl_dest); 826 if ((rl_dest.location == kLocDalvikFrame) || 827 (rl_dest.location == kLocCompilerTemp)) { 828 int32_t val_lo = Low32Bits(value); 829 int32_t val_hi = High32Bits(value); 830 int r_base = TargetReg(kSp).GetReg(); 831 int displacement = SRegOffset(rl_dest.s_reg_low); 832 833 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 834 LIR * store = NewLIR3(kX86Mov32MI, r_base, displacement + LOWORD_OFFSET, val_lo); 835 AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2, 836 false /* is_load */, true /* is64bit */); 837 store = NewLIR3(kX86Mov32MI, r_base, displacement + HIWORD_OFFSET, val_hi); 838 AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2, 839 false /* is_load */, true /* is64bit */); 840 return; 841 } 842 843 // Just use the standard code to do the generation. 844 Mir2Lir::GenConstWide(rl_dest, value); 845} 846 847// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc 848void X86Mir2Lir::DumpRegLocation(RegLocation loc) { 849 LOG(INFO) << "location: " << loc.location << ',' 850 << (loc.wide ? " w" : " ") 851 << (loc.defined ? " D" : " ") 852 << (loc.is_const ? " c" : " ") 853 << (loc.fp ? " F" : " ") 854 << (loc.core ? " C" : " ") 855 << (loc.ref ? " r" : " ") 856 << (loc.high_word ? " h" : " ") 857 << (loc.home ? " H" : " ") 858 << ", low: " << static_cast<int>(loc.reg.GetLowReg()) 859 << ", high: " << static_cast<int>(loc.reg.GetHighReg()) 860 << ", s_reg: " << loc.s_reg_low 861 << ", orig: " << loc.orig_sreg; 862} 863 864void X86Mir2Lir::Materialize() { 865 // A good place to put the analysis before starting. 866 AnalyzeMIR(); 867 868 // Now continue with regular code generation. 869 Mir2Lir::Materialize(); 870} 871 872void X86Mir2Lir::LoadMethodAddress(const MethodReference& target_method, InvokeType type, 873 SpecialTargetRegister symbolic_reg) { 874 /* 875 * For x86, just generate a 32 bit move immediate instruction, that will be filled 876 * in at 'link time'. For now, put a unique value based on target to ensure that 877 * code deduplication works. 878 */ 879 int target_method_idx = target_method.dex_method_index; 880 const DexFile* target_dex_file = target_method.dex_file; 881 const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx); 882 uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id); 883 884 // Generate the move instruction with the unique pointer and save index, dex_file, and type. 885 LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(), 886 static_cast<int>(target_method_id_ptr), target_method_idx, 887 WrapPointer(const_cast<DexFile*>(target_dex_file)), type); 888 AppendLIR(move); 889 method_address_insns_.Insert(move); 890} 891 892void X86Mir2Lir::LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg) { 893 /* 894 * For x86, just generate a 32 bit move immediate instruction, that will be filled 895 * in at 'link time'. For now, put a unique value based on target to ensure that 896 * code deduplication works. 897 */ 898 const DexFile::TypeId& id = cu_->dex_file->GetTypeId(type_idx); 899 uintptr_t ptr = reinterpret_cast<uintptr_t>(&id); 900 901 // Generate the move instruction with the unique pointer and save index and type. 902 LIR *move = RawLIR(current_dalvik_offset_, kX86Mov32RI, TargetReg(symbolic_reg).GetReg(), 903 static_cast<int>(ptr), type_idx); 904 AppendLIR(move); 905 class_type_address_insns_.Insert(move); 906} 907 908LIR *X86Mir2Lir::CallWithLinkerFixup(const MethodReference& target_method, InvokeType type) { 909 /* 910 * For x86, just generate a 32 bit call relative instruction, that will be filled 911 * in at 'link time'. For now, put a unique value based on target to ensure that 912 * code deduplication works. 913 */ 914 int target_method_idx = target_method.dex_method_index; 915 const DexFile* target_dex_file = target_method.dex_file; 916 const DexFile::MethodId& target_method_id = target_dex_file->GetMethodId(target_method_idx); 917 uintptr_t target_method_id_ptr = reinterpret_cast<uintptr_t>(&target_method_id); 918 919 // Generate the call instruction with the unique pointer and save index, dex_file, and type. 920 LIR *call = RawLIR(current_dalvik_offset_, kX86CallI, static_cast<int>(target_method_id_ptr), 921 target_method_idx, WrapPointer(const_cast<DexFile*>(target_dex_file)), type); 922 AppendLIR(call); 923 call_method_insns_.Insert(call); 924 return call; 925} 926 927/* 928 * @brief Enter a 32 bit quantity into a buffer 929 * @param buf buffer. 930 * @param data Data value. 931 */ 932 933static void PushWord(std::vector<uint8_t>&buf, int32_t data) { 934 buf.push_back(data & 0xff); 935 buf.push_back((data >> 8) & 0xff); 936 buf.push_back((data >> 16) & 0xff); 937 buf.push_back((data >> 24) & 0xff); 938} 939 940void X86Mir2Lir::InstallLiteralPools() { 941 // These are handled differently for x86. 942 DCHECK(code_literal_list_ == nullptr); 943 DCHECK(method_literal_list_ == nullptr); 944 DCHECK(class_literal_list_ == nullptr); 945 946 // Align to 16 byte boundary. We have implicit knowledge that the start of the method is 947 // on a 4 byte boundary. How can I check this if it changes (other than aligned loads 948 // will fail at runtime)? 949 if (const_vectors_ != nullptr) { 950 int align_size = (16-4) - (code_buffer_.size() & 0xF); 951 if (align_size < 0) { 952 align_size += 16; 953 } 954 955 while (align_size > 0) { 956 code_buffer_.push_back(0); 957 align_size--; 958 } 959 for (LIR *p = const_vectors_; p != nullptr; p = p->next) { 960 PushWord(code_buffer_, p->operands[0]); 961 PushWord(code_buffer_, p->operands[1]); 962 PushWord(code_buffer_, p->operands[2]); 963 PushWord(code_buffer_, p->operands[3]); 964 } 965 } 966 967 // Handle the fixups for methods. 968 for (uint32_t i = 0; i < method_address_insns_.Size(); i++) { 969 LIR* p = method_address_insns_.Get(i); 970 DCHECK_EQ(p->opcode, kX86Mov32RI); 971 uint32_t target_method_idx = p->operands[2]; 972 const DexFile* target_dex_file = 973 reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[3])); 974 975 // The offset to patch is the last 4 bytes of the instruction. 976 int patch_offset = p->offset + p->flags.size - 4; 977 cu_->compiler_driver->AddMethodPatch(cu_->dex_file, cu_->class_def_idx, 978 cu_->method_idx, cu_->invoke_type, 979 target_method_idx, target_dex_file, 980 static_cast<InvokeType>(p->operands[4]), 981 patch_offset); 982 } 983 984 // Handle the fixups for class types. 985 for (uint32_t i = 0; i < class_type_address_insns_.Size(); i++) { 986 LIR* p = class_type_address_insns_.Get(i); 987 DCHECK_EQ(p->opcode, kX86Mov32RI); 988 uint32_t target_method_idx = p->operands[2]; 989 990 // The offset to patch is the last 4 bytes of the instruction. 991 int patch_offset = p->offset + p->flags.size - 4; 992 cu_->compiler_driver->AddClassPatch(cu_->dex_file, cu_->class_def_idx, 993 cu_->method_idx, target_method_idx, patch_offset); 994 } 995 996 // And now the PC-relative calls to methods. 997 for (uint32_t i = 0; i < call_method_insns_.Size(); i++) { 998 LIR* p = call_method_insns_.Get(i); 999 DCHECK_EQ(p->opcode, kX86CallI); 1000 uint32_t target_method_idx = p->operands[1]; 1001 const DexFile* target_dex_file = 1002 reinterpret_cast<const DexFile*>(UnwrapPointer(p->operands[2])); 1003 1004 // The offset to patch is the last 4 bytes of the instruction. 1005 int patch_offset = p->offset + p->flags.size - 4; 1006 cu_->compiler_driver->AddRelativeCodePatch(cu_->dex_file, cu_->class_def_idx, 1007 cu_->method_idx, cu_->invoke_type, 1008 target_method_idx, target_dex_file, 1009 static_cast<InvokeType>(p->operands[3]), 1010 patch_offset, -4 /* offset */); 1011 } 1012 1013 // And do the normal processing. 1014 Mir2Lir::InstallLiteralPools(); 1015} 1016 1017/* 1018 * Fast string.index_of(I) & (II). Inline check for simple case of char <= 0xffff, 1019 * otherwise bails to standard library code. 1020 */ 1021bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { 1022 ClobberCallerSave(); 1023 LockCallTemps(); // Using fixed registers 1024 1025 // EAX: 16 bit character being searched. 1026 // ECX: count: number of words to be searched. 1027 // EDI: String being searched. 1028 // EDX: temporary during execution. 1029 // EBX: temporary during execution. 1030 1031 RegLocation rl_obj = info->args[0]; 1032 RegLocation rl_char = info->args[1]; 1033 RegLocation rl_start; // Note: only present in III flavor or IndexOf. 1034 1035 uint32_t char_value = 1036 rl_char.is_const ? mir_graph_->ConstantValue(rl_char.orig_sreg) : 0; 1037 1038 if (char_value > 0xFFFF) { 1039 // We have to punt to the real String.indexOf. 1040 return false; 1041 } 1042 1043 // Okay, we are commited to inlining this. 1044 RegLocation rl_return = GetReturn(kCoreReg); 1045 RegLocation rl_dest = InlineTarget(info); 1046 1047 // Is the string non-NULL? 1048 LoadValueDirectFixed(rl_obj, rs_rDX); 1049 GenNullCheck(rs_rDX, info->opt_flags); 1050 info->opt_flags |= MIR_IGNORE_NULL_CHECK; // Record that we've null checked. 1051 1052 // Does the character fit in 16 bits? 1053 LIR* slowpath_branch = nullptr; 1054 if (rl_char.is_const) { 1055 // We need the value in EAX. 1056 LoadConstantNoClobber(rs_rAX, char_value); 1057 } else { 1058 // Character is not a constant; compare at runtime. 1059 LoadValueDirectFixed(rl_char, rs_rAX); 1060 slowpath_branch = OpCmpImmBranch(kCondGt, rs_rAX, 0xFFFF, nullptr); 1061 } 1062 1063 // From here down, we know that we are looking for a char that fits in 16 bits. 1064 // Location of reference to data array within the String object. 1065 int value_offset = mirror::String::ValueOffset().Int32Value(); 1066 // Location of count within the String object. 1067 int count_offset = mirror::String::CountOffset().Int32Value(); 1068 // Starting offset within data array. 1069 int offset_offset = mirror::String::OffsetOffset().Int32Value(); 1070 // Start of char data with array_. 1071 int data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); 1072 1073 // Character is in EAX. 1074 // Object pointer is in EDX. 1075 1076 // We need to preserve EDI, but have no spare registers, so push it on the stack. 1077 // We have to remember that all stack addresses after this are offset by sizeof(EDI). 1078 NewLIR1(kX86Push32R, rs_rDI.GetReg()); 1079 1080 // Compute the number of words to search in to rCX. 1081 Load32Disp(rs_rDX, count_offset, rs_rCX); 1082 LIR *length_compare = nullptr; 1083 int start_value = 0; 1084 bool is_index_on_stack = false; 1085 if (zero_based) { 1086 // We have to handle an empty string. Use special instruction JECXZ. 1087 length_compare = NewLIR0(kX86Jecxz8); 1088 } else { 1089 rl_start = info->args[2]; 1090 // We have to offset by the start index. 1091 if (rl_start.is_const) { 1092 start_value = mir_graph_->ConstantValue(rl_start.orig_sreg); 1093 start_value = std::max(start_value, 0); 1094 1095 // Is the start > count? 1096 length_compare = OpCmpImmBranch(kCondLe, rs_rCX, start_value, nullptr); 1097 1098 if (start_value != 0) { 1099 OpRegImm(kOpSub, rs_rCX, start_value); 1100 } 1101 } else { 1102 // Runtime start index. 1103 rl_start = UpdateLocTyped(rl_start, kCoreReg); 1104 if (rl_start.location == kLocPhysReg) { 1105 // Handle "start index < 0" case. 1106 OpRegReg(kOpXor, rs_rBX, rs_rBX); 1107 OpRegReg(kOpCmp, rl_start.reg, rs_rBX); 1108 OpCondRegReg(kOpCmov, kCondLt, rl_start.reg, rs_rBX); 1109 1110 // The length of the string should be greater than the start index. 1111 length_compare = OpCmpBranch(kCondLe, rs_rCX, rl_start.reg, nullptr); 1112 OpRegReg(kOpSub, rs_rCX, rl_start.reg); 1113 if (rl_start.reg == rs_rDI) { 1114 // The special case. We will use EDI further, so lets put start index to stack. 1115 NewLIR1(kX86Push32R, rs_rDI.GetReg()); 1116 is_index_on_stack = true; 1117 } 1118 } else { 1119 // Load the start index from stack, remembering that we pushed EDI. 1120 int displacement = SRegOffset(rl_start.s_reg_low) + sizeof(uint32_t); 1121 { 1122 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1123 Load32Disp(rs_rX86_SP, displacement, rs_rBX); 1124 } 1125 OpRegReg(kOpXor, rs_rDI, rs_rDI); 1126 OpRegReg(kOpCmp, rs_rBX, rs_rDI); 1127 OpCondRegReg(kOpCmov, kCondLt, rs_rBX, rs_rDI); 1128 1129 length_compare = OpCmpBranch(kCondLe, rs_rCX, rs_rBX, nullptr); 1130 OpRegReg(kOpSub, rs_rCX, rs_rBX); 1131 // Put the start index to stack. 1132 NewLIR1(kX86Push32R, rs_rBX.GetReg()); 1133 is_index_on_stack = true; 1134 } 1135 } 1136 } 1137 DCHECK(length_compare != nullptr); 1138 1139 // ECX now contains the count in words to be searched. 1140 1141 // Load the address of the string into EBX. 1142 // The string starts at VALUE(String) + 2 * OFFSET(String) + DATA_OFFSET. 1143 Load32Disp(rs_rDX, value_offset, rs_rDI); 1144 Load32Disp(rs_rDX, offset_offset, rs_rBX); 1145 OpLea(rs_rBX, rs_rDI, rs_rBX, 1, data_offset); 1146 1147 // Now compute into EDI where the search will start. 1148 if (zero_based || rl_start.is_const) { 1149 if (start_value == 0) { 1150 OpRegCopy(rs_rDI, rs_rBX); 1151 } else { 1152 NewLIR3(kX86Lea32RM, rs_rDI.GetReg(), rs_rBX.GetReg(), 2 * start_value); 1153 } 1154 } else { 1155 if (is_index_on_stack == true) { 1156 // Load the start index from stack. 1157 NewLIR1(kX86Pop32R, rs_rDX.GetReg()); 1158 OpLea(rs_rDI, rs_rBX, rs_rDX, 1, 0); 1159 } else { 1160 OpLea(rs_rDI, rs_rBX, rl_start.reg, 1, 0); 1161 } 1162 } 1163 1164 // EDI now contains the start of the string to be searched. 1165 // We are all prepared to do the search for the character. 1166 NewLIR0(kX86RepneScasw); 1167 1168 // Did we find a match? 1169 LIR* failed_branch = OpCondBranch(kCondNe, nullptr); 1170 1171 // yes, we matched. Compute the index of the result. 1172 // index = ((curr_ptr - orig_ptr) / 2) - 1. 1173 OpRegReg(kOpSub, rs_rDI, rs_rBX); 1174 OpRegImm(kOpAsr, rs_rDI, 1); 1175 NewLIR3(kX86Lea32RM, rl_return.reg.GetReg(), rs_rDI.GetReg(), -1); 1176 LIR *all_done = NewLIR1(kX86Jmp8, 0); 1177 1178 // Failed to match; return -1. 1179 LIR *not_found = NewLIR0(kPseudoTargetLabel); 1180 length_compare->target = not_found; 1181 failed_branch->target = not_found; 1182 LoadConstantNoClobber(rl_return.reg, -1); 1183 1184 // And join up at the end. 1185 all_done->target = NewLIR0(kPseudoTargetLabel); 1186 // Restore EDI from the stack. 1187 NewLIR1(kX86Pop32R, rs_rDI.GetReg()); 1188 1189 // Out of line code returns here. 1190 if (slowpath_branch != nullptr) { 1191 LIR *return_point = NewLIR0(kPseudoTargetLabel); 1192 AddIntrinsicSlowPath(info, slowpath_branch, return_point); 1193 } 1194 1195 StoreValue(rl_dest, rl_return); 1196 return true; 1197} 1198 1199/* 1200 * @brief Enter an 'advance LOC' into the FDE buffer 1201 * @param buf FDE buffer. 1202 * @param increment Amount by which to increase the current location. 1203 */ 1204static void AdvanceLoc(std::vector<uint8_t>&buf, uint32_t increment) { 1205 if (increment < 64) { 1206 // Encoding in opcode. 1207 buf.push_back(0x1 << 6 | increment); 1208 } else if (increment < 256) { 1209 // Single byte delta. 1210 buf.push_back(0x02); 1211 buf.push_back(increment); 1212 } else if (increment < 256 * 256) { 1213 // Two byte delta. 1214 buf.push_back(0x03); 1215 buf.push_back(increment & 0xff); 1216 buf.push_back((increment >> 8) & 0xff); 1217 } else { 1218 // Four byte delta. 1219 buf.push_back(0x04); 1220 PushWord(buf, increment); 1221 } 1222} 1223 1224 1225std::vector<uint8_t>* X86CFIInitialization() { 1226 return X86Mir2Lir::ReturnCommonCallFrameInformation(); 1227} 1228 1229std::vector<uint8_t>* X86Mir2Lir::ReturnCommonCallFrameInformation() { 1230 std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>; 1231 1232 // Length of the CIE (except for this field). 1233 PushWord(*cfi_info, 16); 1234 1235 // CIE id. 1236 PushWord(*cfi_info, 0xFFFFFFFFU); 1237 1238 // Version: 3. 1239 cfi_info->push_back(0x03); 1240 1241 // Augmentation: empty string. 1242 cfi_info->push_back(0x0); 1243 1244 // Code alignment: 1. 1245 cfi_info->push_back(0x01); 1246 1247 // Data alignment: -4. 1248 cfi_info->push_back(0x7C); 1249 1250 // Return address register (R8). 1251 cfi_info->push_back(0x08); 1252 1253 // Initial return PC is 4(ESP): DW_CFA_def_cfa R4 4. 1254 cfi_info->push_back(0x0C); 1255 cfi_info->push_back(0x04); 1256 cfi_info->push_back(0x04); 1257 1258 // Return address location: 0(SP): DW_CFA_offset R8 1 (* -4);. 1259 cfi_info->push_back(0x2 << 6 | 0x08); 1260 cfi_info->push_back(0x01); 1261 1262 // And 2 Noops to align to 4 byte boundary. 1263 cfi_info->push_back(0x0); 1264 cfi_info->push_back(0x0); 1265 1266 DCHECK_EQ(cfi_info->size() & 3, 0U); 1267 return cfi_info; 1268} 1269 1270static void EncodeUnsignedLeb128(std::vector<uint8_t>& buf, uint32_t value) { 1271 uint8_t buffer[12]; 1272 uint8_t *ptr = EncodeUnsignedLeb128(buffer, value); 1273 for (uint8_t *p = buffer; p < ptr; p++) { 1274 buf.push_back(*p); 1275 } 1276} 1277 1278std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() { 1279 std::vector<uint8_t>*cfi_info = new std::vector<uint8_t>; 1280 1281 // Generate the FDE for the method. 1282 DCHECK_NE(data_offset_, 0U); 1283 1284 // Length (will be filled in later in this routine). 1285 PushWord(*cfi_info, 0); 1286 1287 // CIE_pointer (can be filled in by linker); might be left at 0 if there is only 1288 // one CIE for the whole debug_frame section. 1289 PushWord(*cfi_info, 0); 1290 1291 // 'initial_location' (filled in by linker). 1292 PushWord(*cfi_info, 0); 1293 1294 // 'address_range' (number of bytes in the method). 1295 PushWord(*cfi_info, data_offset_); 1296 1297 // The instructions in the FDE. 1298 if (stack_decrement_ != nullptr) { 1299 // Advance LOC to just past the stack decrement. 1300 uint32_t pc = NEXT_LIR(stack_decrement_)->offset; 1301 AdvanceLoc(*cfi_info, pc); 1302 1303 // Now update the offset to the call frame: DW_CFA_def_cfa_offset frame_size. 1304 cfi_info->push_back(0x0e); 1305 EncodeUnsignedLeb128(*cfi_info, frame_size_); 1306 1307 // We continue with that stack until the epilogue. 1308 if (stack_increment_ != nullptr) { 1309 uint32_t new_pc = NEXT_LIR(stack_increment_)->offset; 1310 AdvanceLoc(*cfi_info, new_pc - pc); 1311 1312 // We probably have code snippets after the epilogue, so save the 1313 // current state: DW_CFA_remember_state. 1314 cfi_info->push_back(0x0a); 1315 1316 // We have now popped the stack: DW_CFA_def_cfa_offset 4. There is only the return 1317 // PC on the stack now. 1318 cfi_info->push_back(0x0e); 1319 EncodeUnsignedLeb128(*cfi_info, 4); 1320 1321 // Everything after that is the same as before the epilogue. 1322 // Stack bump was followed by RET instruction. 1323 LIR *post_ret_insn = NEXT_LIR(NEXT_LIR(stack_increment_)); 1324 if (post_ret_insn != nullptr) { 1325 pc = new_pc; 1326 new_pc = post_ret_insn->offset; 1327 AdvanceLoc(*cfi_info, new_pc - pc); 1328 // Restore the state: DW_CFA_restore_state. 1329 cfi_info->push_back(0x0b); 1330 } 1331 } 1332 } 1333 1334 // Padding to a multiple of 4 1335 while ((cfi_info->size() & 3) != 0) { 1336 // DW_CFA_nop is encoded as 0. 1337 cfi_info->push_back(0); 1338 } 1339 1340 // Set the length of the FDE inside the generated bytes. 1341 uint32_t length = cfi_info->size() - 4; 1342 (*cfi_info)[0] = length; 1343 (*cfi_info)[1] = length >> 8; 1344 (*cfi_info)[2] = length >> 16; 1345 (*cfi_info)[3] = length >> 24; 1346 return cfi_info; 1347} 1348 1349void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { 1350 switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) { 1351 case kMirOpConstVector: 1352 GenConst128(bb, mir); 1353 break; 1354 case kMirOpMoveVector: 1355 GenMoveVector(bb, mir); 1356 break; 1357 case kMirOpPackedMultiply: 1358 GenMultiplyVector(bb, mir); 1359 break; 1360 case kMirOpPackedAddition: 1361 GenAddVector(bb, mir); 1362 break; 1363 case kMirOpPackedSubtract: 1364 GenSubtractVector(bb, mir); 1365 break; 1366 case kMirOpPackedShiftLeft: 1367 GenShiftLeftVector(bb, mir); 1368 break; 1369 case kMirOpPackedSignedShiftRight: 1370 GenSignedShiftRightVector(bb, mir); 1371 break; 1372 case kMirOpPackedUnsignedShiftRight: 1373 GenUnsignedShiftRightVector(bb, mir); 1374 break; 1375 case kMirOpPackedAnd: 1376 GenAndVector(bb, mir); 1377 break; 1378 case kMirOpPackedOr: 1379 GenOrVector(bb, mir); 1380 break; 1381 case kMirOpPackedXor: 1382 GenXorVector(bb, mir); 1383 break; 1384 case kMirOpPackedAddReduce: 1385 GenAddReduceVector(bb, mir); 1386 break; 1387 case kMirOpPackedReduce: 1388 GenReduceVector(bb, mir); 1389 break; 1390 case kMirOpPackedSet: 1391 GenSetVector(bb, mir); 1392 break; 1393 default: 1394 break; 1395 } 1396} 1397 1398void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) { 1399 int type_size = mir->dalvikInsn.vA; 1400 // We support 128 bit vectors. 1401 DCHECK_EQ(type_size & 0xFFFF, 128); 1402 RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); 1403 uint32_t *args = mir->dalvikInsn.arg; 1404 int reg = rs_dest.GetReg(); 1405 // Check for all 0 case. 1406 if (args[0] == 0 && args[1] == 0 && args[2] == 0 && args[3] == 0) { 1407 NewLIR2(kX86XorpsRR, reg, reg); 1408 return; 1409 } 1410 // Okay, load it from the constant vector area. 1411 LIR *data_target = ScanVectorLiteral(mir); 1412 if (data_target == nullptr) { 1413 data_target = AddVectorLiteral(mir); 1414 } 1415 1416 // Address the start of the method. 1417 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 1418 if (rl_method.wide) { 1419 rl_method = LoadValueWide(rl_method, kCoreReg); 1420 } else { 1421 rl_method = LoadValue(rl_method, kCoreReg); 1422 } 1423 1424 // Load the proper value from the literal area. 1425 // We don't know the proper offset for the value, so pick one that will force 1426 // 4 byte offset. We will fix this up in the assembler later to have the right 1427 // value. 1428 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 1429 LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */); 1430 load->flags.fixup = kFixupLoad; 1431 load->target = data_target; 1432} 1433 1434void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) { 1435 // We only support 128 bit registers. 1436 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1437 RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); 1438 RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC); 1439 NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg()); 1440} 1441 1442void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) { 1443 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1444 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1445 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1446 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1447 int opcode = 0; 1448 switch (opsize) { 1449 case k32: 1450 opcode = kX86PmulldRR; 1451 break; 1452 case kSignedHalf: 1453 opcode = kX86PmullwRR; 1454 break; 1455 case kSingle: 1456 opcode = kX86MulpsRR; 1457 break; 1458 case kDouble: 1459 opcode = kX86MulpdRR; 1460 break; 1461 default: 1462 LOG(FATAL) << "Unsupported vector multiply " << opsize; 1463 break; 1464 } 1465 NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1466} 1467 1468void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) { 1469 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1470 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1471 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1472 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1473 int opcode = 0; 1474 switch (opsize) { 1475 case k32: 1476 opcode = kX86PadddRR; 1477 break; 1478 case kSignedHalf: 1479 case kUnsignedHalf: 1480 opcode = kX86PaddwRR; 1481 break; 1482 case kUnsignedByte: 1483 case kSignedByte: 1484 opcode = kX86PaddbRR; 1485 break; 1486 case kSingle: 1487 opcode = kX86AddpsRR; 1488 break; 1489 case kDouble: 1490 opcode = kX86AddpdRR; 1491 break; 1492 default: 1493 LOG(FATAL) << "Unsupported vector addition " << opsize; 1494 break; 1495 } 1496 NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1497} 1498 1499void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) { 1500 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1501 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1502 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1503 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1504 int opcode = 0; 1505 switch (opsize) { 1506 case k32: 1507 opcode = kX86PsubdRR; 1508 break; 1509 case kSignedHalf: 1510 case kUnsignedHalf: 1511 opcode = kX86PsubwRR; 1512 break; 1513 case kUnsignedByte: 1514 case kSignedByte: 1515 opcode = kX86PsubbRR; 1516 break; 1517 case kSingle: 1518 opcode = kX86SubpsRR; 1519 break; 1520 case kDouble: 1521 opcode = kX86SubpdRR; 1522 break; 1523 default: 1524 LOG(FATAL) << "Unsupported vector subtraction " << opsize; 1525 break; 1526 } 1527 NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1528} 1529 1530void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) { 1531 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1532 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1533 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1534 int imm = mir->dalvikInsn.vC; 1535 int opcode = 0; 1536 switch (opsize) { 1537 case k32: 1538 opcode = kX86PslldRI; 1539 break; 1540 case k64: 1541 opcode = kX86PsllqRI; 1542 break; 1543 case kSignedHalf: 1544 case kUnsignedHalf: 1545 opcode = kX86PsllwRI; 1546 break; 1547 default: 1548 LOG(FATAL) << "Unsupported vector shift left " << opsize; 1549 break; 1550 } 1551 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1552} 1553 1554void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) { 1555 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1556 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1557 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1558 int imm = mir->dalvikInsn.vC; 1559 int opcode = 0; 1560 switch (opsize) { 1561 case k32: 1562 opcode = kX86PsradRI; 1563 break; 1564 case kSignedHalf: 1565 case kUnsignedHalf: 1566 opcode = kX86PsrawRI; 1567 break; 1568 default: 1569 LOG(FATAL) << "Unsupported vector signed shift right " << opsize; 1570 break; 1571 } 1572 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1573} 1574 1575void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) { 1576 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1577 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1578 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1579 int imm = mir->dalvikInsn.vC; 1580 int opcode = 0; 1581 switch (opsize) { 1582 case k32: 1583 opcode = kX86PsrldRI; 1584 break; 1585 case k64: 1586 opcode = kX86PsrlqRI; 1587 break; 1588 case kSignedHalf: 1589 case kUnsignedHalf: 1590 opcode = kX86PsrlwRI; 1591 break; 1592 default: 1593 LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize; 1594 break; 1595 } 1596 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1597} 1598 1599void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) { 1600 // We only support 128 bit registers. 1601 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1602 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1603 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1604 NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1605} 1606 1607void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) { 1608 // We only support 128 bit registers. 1609 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1610 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1611 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1612 NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1613} 1614 1615void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) { 1616 // We only support 128 bit registers. 1617 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1618 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1619 RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC); 1620 NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg()); 1621} 1622 1623void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) { 1624 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1625 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1626 RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB); 1627 int imm = mir->dalvikInsn.vC; 1628 int opcode = 0; 1629 switch (opsize) { 1630 case k32: 1631 opcode = kX86PhadddRR; 1632 break; 1633 case kSignedHalf: 1634 case kUnsignedHalf: 1635 opcode = kX86PhaddwRR; 1636 break; 1637 default: 1638 LOG(FATAL) << "Unsupported vector add reduce " << opsize; 1639 break; 1640 } 1641 NewLIR2(opcode, rs_dest_src1.GetReg(), imm); 1642} 1643 1644void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) { 1645 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1646 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1647 RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB); 1648 int index = mir->dalvikInsn.arg[0]; 1649 int opcode = 0; 1650 switch (opsize) { 1651 case k32: 1652 opcode = kX86PextrdRRI; 1653 break; 1654 case kSignedHalf: 1655 case kUnsignedHalf: 1656 opcode = kX86PextrwRRI; 1657 break; 1658 case kUnsignedByte: 1659 case kSignedByte: 1660 opcode = kX86PextrbRRI; 1661 break; 1662 default: 1663 LOG(FATAL) << "Unsupported vector reduce " << opsize; 1664 break; 1665 } 1666 // We need to extract to a GPR. 1667 RegStorage temp = AllocTemp(); 1668 NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index); 1669 1670 // Assume that the destination VR is in the def for the mir. 1671 RegLocation rl_dest = mir_graph_->GetDest(mir); 1672 RegLocation rl_temp = 1673 {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG}; 1674 StoreValue(rl_dest, rl_temp); 1675} 1676 1677void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) { 1678 DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U); 1679 OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16); 1680 RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB); 1681 int op_low = 0, op_high = 0; 1682 switch (opsize) { 1683 case k32: 1684 op_low = kX86PshufdRRI; 1685 break; 1686 case kSignedHalf: 1687 case kUnsignedHalf: 1688 // Handles low quadword. 1689 op_low = kX86PshuflwRRI; 1690 // Handles upper quadword. 1691 op_high = kX86PshufdRRI; 1692 break; 1693 default: 1694 LOG(FATAL) << "Unsupported vector set " << opsize; 1695 break; 1696 } 1697 1698 // Load the value from the VR into a GPR. 1699 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 1700 rl_src = LoadValue(rl_src, kCoreReg); 1701 1702 // Load the value into the XMM register. 1703 NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg()); 1704 1705 // Now shuffle the value across the destination. 1706 NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0); 1707 1708 // And then repeat as needed. 1709 if (op_high != 0) { 1710 NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0); 1711 } 1712} 1713 1714 1715LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) { 1716 int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); 1717 for (LIR *p = const_vectors_; p != nullptr; p = p->next) { 1718 if (args[0] == p->operands[0] && args[1] == p->operands[1] && 1719 args[2] == p->operands[2] && args[3] == p->operands[3]) { 1720 return p; 1721 } 1722 } 1723 return nullptr; 1724} 1725 1726LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) { 1727 LIR* new_value = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocData)); 1728 int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg); 1729 new_value->operands[0] = args[0]; 1730 new_value->operands[1] = args[1]; 1731 new_value->operands[2] = args[2]; 1732 new_value->operands[3] = args[3]; 1733 new_value->next = const_vectors_; 1734 if (const_vectors_ == nullptr) { 1735 estimated_native_code_size_ += 12; // Amount needed to align to 16 byte boundary. 1736 } 1737 estimated_native_code_size_ += 16; // Space for one vector. 1738 const_vectors_ = new_value; 1739 return new_value; 1740} 1741 1742// ------------ ABI support: mapping of args to physical registers ------------- 1743RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) { 1744 const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5}; 1745 const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage); 1746 const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3, 1747 rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7}; 1748 const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage); 1749 1750 RegStorage result = RegStorage::InvalidReg(); 1751 if (is_double_or_float) { 1752 if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { 1753 result = fpArgMappingToPhysicalReg[cur_fp_reg_++]; 1754 if (result.Valid()) { 1755 result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg()); 1756 } 1757 } 1758 } else { 1759 if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { 1760 result = coreArgMappingToPhysicalReg[cur_core_reg_++]; 1761 if (result.Valid()) { 1762 result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg()); 1763 } 1764 } 1765 } 1766 return result; 1767} 1768 1769RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) { 1770 DCHECK(IsInitialized()); 1771 auto res = mapping_.find(in_position); 1772 return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); 1773} 1774 1775void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) { 1776 DCHECK(mapper != nullptr); 1777 max_mapped_in_ = -1; 1778 is_there_stack_mapped_ = false; 1779 for (int in_position = 0; in_position < count; in_position++) { 1780 RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide); 1781 if (reg.Valid()) { 1782 mapping_[in_position] = reg; 1783 max_mapped_in_ = std::max(max_mapped_in_, in_position); 1784 if (reg.Is64BitSolo()) { 1785 // We covered 2 args, so skip the next one 1786 in_position++; 1787 } 1788 } else { 1789 is_there_stack_mapped_ = true; 1790 } 1791 } 1792 initialized_ = true; 1793} 1794 1795RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { 1796 if (!cu_->target64) { 1797 return GetCoreArgMappingToPhysicalReg(arg_num); 1798 } 1799 1800 if (!in_to_reg_storage_mapping_.IsInitialized()) { 1801 int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; 1802 RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; 1803 1804 InToRegStorageX86_64Mapper mapper; 1805 in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper); 1806 } 1807 return in_to_reg_storage_mapping_.Get(arg_num); 1808} 1809 1810RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) { 1811 // For the 32-bit internal ABI, the first 3 arguments are passed in registers. 1812 // Not used for 64-bit, TODO: Move X86_32 to the same framework 1813 switch (core_arg_num) { 1814 case 0: 1815 return rs_rX86_ARG1; 1816 case 1: 1817 return rs_rX86_ARG2; 1818 case 2: 1819 return rs_rX86_ARG3; 1820 default: 1821 return RegStorage::InvalidReg(); 1822 } 1823} 1824 1825// ---------End of ABI support: mapping of args to physical registers ------------- 1826 1827/* 1828 * If there are any ins passed in registers that have not been promoted 1829 * to a callee-save register, flush them to the frame. Perform initial 1830 * assignment of promoted arguments. 1831 * 1832 * ArgLocs is an array of location records describing the incoming arguments 1833 * with one location record per word of argument. 1834 */ 1835void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { 1836 if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method); 1837 /* 1838 * Dummy up a RegLocation for the incoming Method* 1839 * It will attempt to keep kArg0 live (or copy it to home location 1840 * if promoted). 1841 */ 1842 1843 RegLocation rl_src = rl_method; 1844 rl_src.location = kLocPhysReg; 1845 rl_src.reg = TargetReg(kArg0); 1846 rl_src.home = false; 1847 MarkLive(rl_src); 1848 StoreValue(rl_method, rl_src); 1849 // If Method* has been promoted, explicitly flush 1850 if (rl_method.location == kLocPhysReg) { 1851 StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0), kNotVolatile); 1852 } 1853 1854 if (cu_->num_ins == 0) { 1855 return; 1856 } 1857 1858 int start_vreg = cu_->num_dalvik_registers - cu_->num_ins; 1859 /* 1860 * Copy incoming arguments to their proper home locations. 1861 * NOTE: an older version of dx had an issue in which 1862 * it would reuse static method argument registers. 1863 * This could result in the same Dalvik virtual register 1864 * being promoted to both core and fp regs. To account for this, 1865 * we only copy to the corresponding promoted physical register 1866 * if it matches the type of the SSA name for the incoming 1867 * argument. It is also possible that long and double arguments 1868 * end up half-promoted. In those cases, we must flush the promoted 1869 * half to memory as well. 1870 */ 1871 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1872 for (int i = 0; i < cu_->num_ins; i++) { 1873 // get reg corresponding to input 1874 RegStorage reg = GetArgMappingToPhysicalReg(i); 1875 1876 RegLocation* t_loc = &ArgLocs[i]; 1877 if (reg.Valid()) { 1878 // If arriving in register. 1879 1880 // We have already updated the arg location with promoted info 1881 // so we can be based on it. 1882 if (t_loc->location == kLocPhysReg) { 1883 // Just copy it. 1884 OpRegCopy(t_loc->reg, reg); 1885 } else { 1886 // Needs flush. 1887 if (t_loc->ref) { 1888 StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile); 1889 } else { 1890 StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32, 1891 kNotVolatile); 1892 } 1893 } 1894 } else { 1895 // If arriving in frame & promoted. 1896 if (t_loc->location == kLocPhysReg) { 1897 if (t_loc->ref) { 1898 LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile); 1899 } else { 1900 LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, 1901 t_loc->wide ? k64 : k32, kNotVolatile); 1902 } 1903 } 1904 } 1905 if (t_loc->wide) { 1906 // Increment i to skip the next one. 1907 i++; 1908 } 1909 } 1910} 1911 1912/* 1913 * Load up to 5 arguments, the first three of which will be in 1914 * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, 1915 * and as part of the load sequence, it must be replaced with 1916 * the target method pointer. Note, this may also be called 1917 * for "range" variants if the number of arguments is 5 or fewer. 1918 */ 1919int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, 1920 int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, 1921 const MethodReference& target_method, 1922 uint32_t vtable_idx, uintptr_t direct_code, 1923 uintptr_t direct_method, InvokeType type, bool skip_this) { 1924 if (!cu_->target64) { 1925 return Mir2Lir::GenDalvikArgsNoRange(info, 1926 call_state, pcrLabel, next_call_insn, 1927 target_method, 1928 vtable_idx, direct_code, 1929 direct_method, type, skip_this); 1930 } 1931 return GenDalvikArgsRange(info, 1932 call_state, pcrLabel, next_call_insn, 1933 target_method, 1934 vtable_idx, direct_code, 1935 direct_method, type, skip_this); 1936} 1937 1938/* 1939 * May have 0+ arguments (also used for jumbo). Note that 1940 * source virtual registers may be in physical registers, so may 1941 * need to be flushed to home location before copying. This 1942 * applies to arg3 and above (see below). 1943 * 1944 * Two general strategies: 1945 * If < 20 arguments 1946 * Pass args 3-18 using vldm/vstm block copy 1947 * Pass arg0, arg1 & arg2 in kArg1-kArg3 1948 * If 20+ arguments 1949 * Pass args arg19+ using memcpy block copy 1950 * Pass arg0, arg1 & arg2 in kArg1-kArg3 1951 * 1952 */ 1953int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, 1954 LIR** pcrLabel, NextCallInsn next_call_insn, 1955 const MethodReference& target_method, 1956 uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, 1957 InvokeType type, bool skip_this) { 1958 if (!cu_->target64) { 1959 return Mir2Lir::GenDalvikArgsRange(info, call_state, 1960 pcrLabel, next_call_insn, 1961 target_method, 1962 vtable_idx, direct_code, direct_method, 1963 type, skip_this); 1964 } 1965 1966 /* If no arguments, just return */ 1967 if (info->num_arg_words == 0) 1968 return call_state; 1969 1970 const int start_index = skip_this ? 1 : 0; 1971 1972 InToRegStorageX86_64Mapper mapper; 1973 InToRegStorageMapping in_to_reg_storage_mapping; 1974 in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); 1975 const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); 1976 const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 : 1977 in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1; 1978 int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped); 1979 1980 // Fisrt of all, check whether it make sense to use bulk copying 1981 // Optimization is aplicable only for range case 1982 // TODO: make a constant instead of 2 1983 if (info->is_range && regs_left_to_pass_via_stack >= 2) { 1984 // Scan the rest of the args - if in phys_reg flush to memory 1985 for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) { 1986 RegLocation loc = info->args[next_arg]; 1987 if (loc.wide) { 1988 loc = UpdateLocWide(loc); 1989 if (loc.location == kLocPhysReg) { 1990 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1991 StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); 1992 } 1993 next_arg += 2; 1994 } else { 1995 loc = UpdateLoc(loc); 1996 if (loc.location == kLocPhysReg) { 1997 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1998 StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile); 1999 } 2000 next_arg++; 2001 } 2002 } 2003 2004 // Logic below assumes that Method pointer is at offset zero from SP. 2005 DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0); 2006 2007 // The rest can be copied together 2008 int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low); 2009 int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set); 2010 2011 int current_src_offset = start_offset; 2012 int current_dest_offset = outs_offset; 2013 2014 // Only davik regs are accessed in this loop; no next_call_insn() calls. 2015 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2016 while (regs_left_to_pass_via_stack > 0) { 2017 // This is based on the knowledge that the stack itself is 16-byte aligned. 2018 bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; 2019 bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0; 2020 size_t bytes_to_move; 2021 2022 /* 2023 * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a 2024 * a 128-bit move because we won't get the chance to try to aligned. If there are more than 2025 * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned. 2026 * We do this because we could potentially do a smaller move to align. 2027 */ 2028 if (regs_left_to_pass_via_stack == 4 || 2029 (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) { 2030 // Moving 128-bits via xmm register. 2031 bytes_to_move = sizeof(uint32_t) * 4; 2032 2033 // Allocate a free xmm temp. Since we are working through the calling sequence, 2034 // we expect to have an xmm temporary available. AllocTempDouble will abort if 2035 // there are no free registers. 2036 RegStorage temp = AllocTempDouble(); 2037 2038 LIR* ld1 = nullptr; 2039 LIR* ld2 = nullptr; 2040 LIR* st1 = nullptr; 2041 LIR* st2 = nullptr; 2042 2043 /* 2044 * The logic is similar for both loads and stores. If we have 16-byte alignment, 2045 * do an aligned move. If we have 8-byte alignment, then do the move in two 2046 * parts. This approach prevents possible cache line splits. Finally, fall back 2047 * to doing an unaligned move. In most cases we likely won't split the cache 2048 * line but we cannot prove it and thus take a conservative approach. 2049 */ 2050 bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; 2051 bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; 2052 2053 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2054 if (src_is_16b_aligned) { 2055 ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP); 2056 } else if (src_is_8b_aligned) { 2057 ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP); 2058 ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1), 2059 kMovHi128FP); 2060 } else { 2061 ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP); 2062 } 2063 2064 if (dest_is_16b_aligned) { 2065 st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP); 2066 } else if (dest_is_8b_aligned) { 2067 st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP); 2068 st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1), 2069 temp, kMovHi128FP); 2070 } else { 2071 st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP); 2072 } 2073 2074 // TODO If we could keep track of aliasing information for memory accesses that are wider 2075 // than 64-bit, we wouldn't need to set up a barrier. 2076 if (ld1 != nullptr) { 2077 if (ld2 != nullptr) { 2078 // For 64-bit load we can actually set up the aliasing information. 2079 AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true); 2080 AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); 2081 } else { 2082 // Set barrier for 128-bit load. 2083 ld1->u.m.def_mask = &kEncodeAll; 2084 } 2085 } 2086 if (st1 != nullptr) { 2087 if (st2 != nullptr) { 2088 // For 64-bit store we can actually set up the aliasing information. 2089 AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true); 2090 AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); 2091 } else { 2092 // Set barrier for 128-bit store. 2093 st1->u.m.def_mask = &kEncodeAll; 2094 } 2095 } 2096 2097 // Free the temporary used for the data movement. 2098 FreeTemp(temp); 2099 } else { 2100 // Moving 32-bits via general purpose register. 2101 bytes_to_move = sizeof(uint32_t); 2102 2103 // Instead of allocating a new temp, simply reuse one of the registers being used 2104 // for argument passing. 2105 RegStorage temp = TargetReg(kArg3); 2106 2107 // Now load the argument VR and store to the outs. 2108 Load32Disp(TargetReg(kSp), current_src_offset, temp); 2109 Store32Disp(TargetReg(kSp), current_dest_offset, temp); 2110 } 2111 2112 current_src_offset += bytes_to_move; 2113 current_dest_offset += bytes_to_move; 2114 regs_left_to_pass_via_stack -= (bytes_to_move >> 2); 2115 } 2116 DCHECK_EQ(regs_left_to_pass_via_stack, 0); 2117 } 2118 2119 // Now handle rest not registers if they are 2120 if (in_to_reg_storage_mapping.IsThereStackMapped()) { 2121 RegStorage regSingle = TargetReg(kArg2); 2122 RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg()); 2123 for (int i = start_index; 2124 i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) { 2125 RegLocation rl_arg = info->args[i]; 2126 rl_arg = UpdateRawLoc(rl_arg); 2127 RegStorage reg = in_to_reg_storage_mapping.Get(i); 2128 if (!reg.Valid()) { 2129 int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); 2130 2131 { 2132 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2133 if (rl_arg.wide) { 2134 if (rl_arg.location == kLocPhysReg) { 2135 StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); 2136 } else { 2137 LoadValueDirectWideFixed(rl_arg, regWide); 2138 StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile); 2139 } 2140 } else { 2141 if (rl_arg.location == kLocPhysReg) { 2142 StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); 2143 } else { 2144 LoadValueDirectFixed(rl_arg, regSingle); 2145 StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32, kNotVolatile); 2146 } 2147 } 2148 } 2149 call_state = next_call_insn(cu_, info, call_state, target_method, 2150 vtable_idx, direct_code, direct_method, type); 2151 } 2152 if (rl_arg.wide) { 2153 i++; 2154 } 2155 } 2156 } 2157 2158 // Finish with mapped registers 2159 for (int i = start_index; i <= last_mapped_in; i++) { 2160 RegLocation rl_arg = info->args[i]; 2161 rl_arg = UpdateRawLoc(rl_arg); 2162 RegStorage reg = in_to_reg_storage_mapping.Get(i); 2163 if (reg.Valid()) { 2164 if (rl_arg.wide) { 2165 LoadValueDirectWideFixed(rl_arg, reg); 2166 } else { 2167 LoadValueDirectFixed(rl_arg, reg); 2168 } 2169 call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, 2170 direct_code, direct_method, type); 2171 } 2172 if (rl_arg.wide) { 2173 i++; 2174 } 2175 } 2176 2177 call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, 2178 direct_code, direct_method, type); 2179 if (pcrLabel) { 2180 if (cu_->compiler_driver->GetCompilerOptions().GetExplicitNullChecks()) { 2181 *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags); 2182 } else { 2183 *pcrLabel = nullptr; 2184 // In lieu of generating a check for kArg1 being null, we need to 2185 // perform a load when doing implicit checks. 2186 RegStorage tmp = AllocTemp(); 2187 Load32Disp(TargetReg(kArg1), 0, tmp); 2188 MarkPossibleNullPointerException(info->opt_flags); 2189 FreeTemp(tmp); 2190 } 2191 } 2192 return call_state; 2193} 2194 2195} // namespace art 2196