int_x86.cc revision 08df4b3da75366e5db37e696eaa7e855cba01deb
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the X86 ISA */ 18 19#include "codegen_x86.h" 20#include "dex/quick/mir_to_lir-inl.h" 21#include "mirror/array.h" 22#include "x86_lir.h" 23 24namespace art { 25 26/* 27 * Perform register memory operation. 28 */ 29LIR* X86Mir2Lir::GenRegMemCheck(ConditionCode c_code, RegStorage reg1, RegStorage base, 30 int offset, ThrowKind kind) { 31 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 32 current_dalvik_offset_, reg1.GetReg(), base.GetReg(), offset); 33 OpRegMem(kOpCmp, reg1, base, offset); 34 LIR* branch = OpCondBranch(c_code, tgt); 35 // Remember branch target - will process later 36 throw_launchpads_.Insert(tgt); 37 return branch; 38} 39 40/* 41 * Perform a compare of memory to immediate value 42 */ 43LIR* X86Mir2Lir::GenMemImmedCheck(ConditionCode c_code, RegStorage base, int offset, 44 int check_value, ThrowKind kind) { 45 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 46 current_dalvik_offset_, base.GetReg(), check_value, 0); 47 NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base.GetReg(), offset, check_value); 48 LIR* branch = OpCondBranch(c_code, tgt); 49 // Remember branch target - will process later 50 throw_launchpads_.Insert(tgt); 51 return branch; 52} 53 54/* 55 * Compare two 64-bit values 56 * x = y return 0 57 * x < y return -1 58 * x > y return 1 59 */ 60void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 61 RegLocation rl_src2) { 62 FlushAllRegs(); 63 LockCallTemps(); // Prepare for explicit register usage 64 RegStorage r_tmp1(RegStorage::k64BitPair, r0, r1); 65 RegStorage r_tmp2(RegStorage::k64BitPair, r2, r3); 66 LoadValueDirectWideFixed(rl_src1, r_tmp1); 67 LoadValueDirectWideFixed(rl_src2, r_tmp2); 68 // Compute (r1:r0) = (r1:r0) - (r3:r2) 69 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 70 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF 71 NewLIR2(kX86Set8R, r2, kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 72 NewLIR2(kX86Movzx8RR, r2, r2); 73 OpReg(kOpNeg, rs_r2); // r2 = -r2 74 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF 75 NewLIR2(kX86Set8R, r0, kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 76 NewLIR2(kX86Movzx8RR, r0, r0); 77 OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2 78 RegLocation rl_result = LocCReturn(); 79 StoreValue(rl_dest, rl_result); 80} 81 82X86ConditionCode X86ConditionEncoding(ConditionCode cond) { 83 switch (cond) { 84 case kCondEq: return kX86CondEq; 85 case kCondNe: return kX86CondNe; 86 case kCondCs: return kX86CondC; 87 case kCondCc: return kX86CondNc; 88 case kCondUlt: return kX86CondC; 89 case kCondUge: return kX86CondNc; 90 case kCondMi: return kX86CondS; 91 case kCondPl: return kX86CondNs; 92 case kCondVs: return kX86CondO; 93 case kCondVc: return kX86CondNo; 94 case kCondHi: return kX86CondA; 95 case kCondLs: return kX86CondBe; 96 case kCondGe: return kX86CondGe; 97 case kCondLt: return kX86CondL; 98 case kCondGt: return kX86CondG; 99 case kCondLe: return kX86CondLe; 100 case kCondAl: 101 case kCondNv: LOG(FATAL) << "Should not reach here"; 102 } 103 return kX86CondO; 104} 105 106LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { 107 NewLIR2(kX86Cmp32RR, src1.GetReg(), src2.GetReg()); 108 X86ConditionCode cc = X86ConditionEncoding(cond); 109 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , 110 cc); 111 branch->target = target; 112 return branch; 113} 114 115LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, 116 int check_value, LIR* target) { 117 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { 118 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode 119 NewLIR2(kX86Test32RR, reg.GetReg(), reg.GetReg()); 120 } else { 121 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value); 122 } 123 X86ConditionCode cc = X86ConditionEncoding(cond); 124 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); 125 branch->target = target; 126 return branch; 127} 128 129LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { 130 // If src or dest is a pair, we'll be using low reg. 131 if (r_dest.IsPair()) { 132 r_dest = r_dest.GetLow(); 133 } 134 if (r_src.IsPair()) { 135 r_src = r_src.GetLow(); 136 } 137 if (X86_FPREG(r_dest.GetReg()) || X86_FPREG(r_src.GetReg())) 138 return OpFpRegCopy(r_dest, r_src); 139 LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, 140 r_dest.GetReg(), r_src.GetReg()); 141 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 142 res->flags.is_nop = true; 143 } 144 return res; 145} 146 147LIR* X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { 148 LIR *res = OpRegCopyNoInsert(r_dest, r_src); 149 AppendLIR(res); 150 return res; 151} 152 153void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { 154 // FIXME: handle k64BitSolo when we start using them. 155 DCHECK(r_dest.IsPair()); 156 DCHECK(r_src.IsPair()); 157 bool dest_fp = X86_FPREG(r_dest.GetLowReg()); 158 bool src_fp = X86_FPREG(r_src.GetLowReg()); 159 if (dest_fp) { 160 if (src_fp) { 161 // TODO: we ought to handle this case here - reserve OpRegCopy for 32-bit copies. 162 OpRegCopy(RegStorage::Solo64(S2d(r_dest.GetLowReg(), r_dest.GetHighReg())), 163 RegStorage::Solo64(S2d(r_src.GetLowReg(), r_src.GetHighReg()))); 164 } else { 165 // TODO: Prevent this from happening in the code. The result is often 166 // unused or could have been loaded more easily from memory. 167 NewLIR2(kX86MovdxrRR, r_dest.GetLowReg(), r_src.GetLowReg()); 168 RegStorage r_tmp = AllocTempDouble(); 169 NewLIR2(kX86MovdxrRR, r_tmp.GetLowReg(), r_src.GetHighReg()); 170 NewLIR2(kX86PunpckldqRR, r_dest.GetLowReg(), r_tmp.GetLowReg()); 171 FreeTemp(r_tmp); 172 } 173 } else { 174 if (src_fp) { 175 NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetLowReg()); 176 NewLIR2(kX86PsrlqRI, r_src.GetLowReg(), 32); 177 NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), r_src.GetLowReg()); 178 } else { 179 // Handle overlap 180 if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) { 181 // Deal with cycles. 182 RegStorage temp_reg = AllocTemp(); 183 OpRegCopy(temp_reg, r_dest.GetHigh()); 184 OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); 185 OpRegCopy(r_dest.GetLow(), temp_reg); 186 FreeTemp(temp_reg); 187 } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { 188 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 189 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 190 } else { 191 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 192 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 193 } 194 } 195 } 196} 197 198void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 199 RegLocation rl_result; 200 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 201 RegLocation rl_dest = mir_graph_->GetDest(mir); 202 rl_src = LoadValue(rl_src, kCoreReg); 203 ConditionCode ccode = mir->meta.ccode; 204 205 // The kMirOpSelect has two variants, one for constants and one for moves. 206 const bool is_constant_case = (mir->ssa_rep->num_uses == 1); 207 208 if (is_constant_case) { 209 int true_val = mir->dalvikInsn.vB; 210 int false_val = mir->dalvikInsn.vC; 211 rl_result = EvalLoc(rl_dest, kCoreReg, true); 212 213 /* 214 * For ccode == kCondEq: 215 * 216 * 1) When the true case is zero and result_reg is not same as src_reg: 217 * xor result_reg, result_reg 218 * cmp $0, src_reg 219 * mov t1, $false_case 220 * cmovnz result_reg, t1 221 * 2) When the false case is zero and result_reg is not same as src_reg: 222 * xor result_reg, result_reg 223 * cmp $0, src_reg 224 * mov t1, $true_case 225 * cmovz result_reg, t1 226 * 3) All other cases (we do compare first to set eflags): 227 * cmp $0, src_reg 228 * mov result_reg, $false_case 229 * mov t1, $true_case 230 * cmovz result_reg, t1 231 */ 232 const bool result_reg_same_as_src = 233 (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg()); 234 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); 235 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); 236 const bool catch_all_case = !(true_zero_case || false_zero_case); 237 238 if (true_zero_case || false_zero_case) { 239 OpRegReg(kOpXor, rl_result.reg, rl_result.reg); 240 } 241 242 if (true_zero_case || false_zero_case || catch_all_case) { 243 OpRegImm(kOpCmp, rl_src.reg, 0); 244 } 245 246 if (catch_all_case) { 247 OpRegImm(kOpMov, rl_result.reg, false_val); 248 } 249 250 if (true_zero_case || false_zero_case || catch_all_case) { 251 ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode; 252 int immediateForTemp = true_zero_case ? false_val : true_val; 253 RegStorage temp1_reg = AllocTemp(); 254 OpRegImm(kOpMov, temp1_reg, immediateForTemp); 255 256 OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg); 257 258 FreeTemp(temp1_reg); 259 } 260 } else { 261 RegLocation rl_true = mir_graph_->GetSrc(mir, 1); 262 RegLocation rl_false = mir_graph_->GetSrc(mir, 2); 263 rl_true = LoadValue(rl_true, kCoreReg); 264 rl_false = LoadValue(rl_false, kCoreReg); 265 rl_result = EvalLoc(rl_dest, kCoreReg, true); 266 267 /* 268 * For ccode == kCondEq: 269 * 270 * 1) When true case is already in place: 271 * cmp $0, src_reg 272 * cmovnz result_reg, false_reg 273 * 2) When false case is already in place: 274 * cmp $0, src_reg 275 * cmovz result_reg, true_reg 276 * 3) When neither cases are in place: 277 * cmp $0, src_reg 278 * mov result_reg, false_reg 279 * cmovz result_reg, true_reg 280 */ 281 282 // kMirOpSelect is generated just for conditional cases when comparison is done with zero. 283 OpRegImm(kOpCmp, rl_src.reg, 0); 284 285 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { 286 OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg); 287 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { 288 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); 289 } else { 290 OpRegCopy(rl_result.reg, rl_false.reg); 291 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); 292 } 293 } 294 295 StoreValue(rl_dest, rl_result); 296} 297 298void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 299 LIR* taken = &block_label_list_[bb->taken]; 300 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 301 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 302 ConditionCode ccode = mir->meta.ccode; 303 304 if (rl_src1.is_const) { 305 std::swap(rl_src1, rl_src2); 306 ccode = FlipComparisonOrder(ccode); 307 } 308 if (rl_src2.is_const) { 309 // Do special compare/branch against simple const operand 310 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 311 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 312 return; 313 } 314 315 FlushAllRegs(); 316 LockCallTemps(); // Prepare for explicit register usage 317 RegStorage r_tmp1(RegStorage::k64BitPair, r0, r1); 318 RegStorage r_tmp2(RegStorage::k64BitPair, r2, r3); 319 LoadValueDirectWideFixed(rl_src1, r_tmp1); 320 LoadValueDirectWideFixed(rl_src2, r_tmp2); 321 // Swap operands and condition code to prevent use of zero flag. 322 if (ccode == kCondLe || ccode == kCondGt) { 323 // Compute (r3:r2) = (r3:r2) - (r1:r0) 324 OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0 325 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF 326 } else { 327 // Compute (r1:r0) = (r1:r0) - (r3:r2) 328 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 329 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF 330 } 331 switch (ccode) { 332 case kCondEq: 333 case kCondNe: 334 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1 335 break; 336 case kCondLe: 337 ccode = kCondGe; 338 break; 339 case kCondGt: 340 ccode = kCondLt; 341 break; 342 case kCondLt: 343 case kCondGe: 344 break; 345 default: 346 LOG(FATAL) << "Unexpected ccode: " << ccode; 347 } 348 OpCondBranch(ccode, taken); 349} 350 351void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 352 int64_t val, ConditionCode ccode) { 353 int32_t val_lo = Low32Bits(val); 354 int32_t val_hi = High32Bits(val); 355 LIR* taken = &block_label_list_[bb->taken]; 356 LIR* not_taken = &block_label_list_[bb->fall_through]; 357 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 358 RegStorage low_reg = rl_src1.reg.GetLow(); 359 RegStorage high_reg = rl_src1.reg.GetHigh(); 360 361 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 362 RegStorage t_reg = AllocTemp(); 363 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); 364 FreeTemp(t_reg); 365 OpCondBranch(ccode, taken); 366 return; 367 } 368 369 OpRegImm(kOpCmp, high_reg, val_hi); 370 switch (ccode) { 371 case kCondEq: 372 case kCondNe: 373 OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken); 374 break; 375 case kCondLt: 376 OpCondBranch(kCondLt, taken); 377 OpCondBranch(kCondGt, not_taken); 378 ccode = kCondUlt; 379 break; 380 case kCondLe: 381 OpCondBranch(kCondLt, taken); 382 OpCondBranch(kCondGt, not_taken); 383 ccode = kCondLs; 384 break; 385 case kCondGt: 386 OpCondBranch(kCondGt, taken); 387 OpCondBranch(kCondLt, not_taken); 388 ccode = kCondHi; 389 break; 390 case kCondGe: 391 OpCondBranch(kCondGt, taken); 392 OpCondBranch(kCondLt, not_taken); 393 ccode = kCondUge; 394 break; 395 default: 396 LOG(FATAL) << "Unexpected ccode: " << ccode; 397 } 398 OpCmpImmBranch(ccode, low_reg, val_lo, taken); 399} 400 401void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) { 402 // It does not make sense to calculate magic and shift for zero divisor. 403 DCHECK_NE(divisor, 0); 404 405 /* According to H.S.Warren's Hacker's Delight Chapter 10 and 406 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 407 * The magic number M and shift S can be calculated in the following way: 408 * Let nc be the most positive value of numerator(n) such that nc = kd - 1, 409 * where divisor(d) >=2. 410 * Let nc be the most negative value of numerator(n) such that nc = kd + 1, 411 * where divisor(d) <= -2. 412 * Thus nc can be calculated like: 413 * nc = 2^31 + 2^31 % d - 1, where d >= 2 414 * nc = -2^31 + (2^31 + 1) % d, where d >= 2. 415 * 416 * So the shift p is the smallest p satisfying 417 * 2^p > nc * (d - 2^p % d), where d >= 2 418 * 2^p > nc * (d + 2^p % d), where d <= -2. 419 * 420 * the magic number M is calcuated by 421 * M = (2^p + d - 2^p % d) / d, where d >= 2 422 * M = (2^p - d - 2^p % d) / d, where d <= -2. 423 * 424 * Notice that p is always bigger than or equal to 32, so we just return 32-p as 425 * the shift number S. 426 */ 427 428 int32_t p = 31; 429 const uint32_t two31 = 0x80000000U; 430 431 // Initialize the computations. 432 uint32_t abs_d = (divisor >= 0) ? divisor : -divisor; 433 uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31); 434 uint32_t abs_nc = tmp - 1 - tmp % abs_d; 435 uint32_t quotient1 = two31 / abs_nc; 436 uint32_t remainder1 = two31 % abs_nc; 437 uint32_t quotient2 = two31 / abs_d; 438 uint32_t remainder2 = two31 % abs_d; 439 440 /* 441 * To avoid handling both positive and negative divisor, Hacker's Delight 442 * introduces a method to handle these 2 cases together to avoid duplication. 443 */ 444 uint32_t delta; 445 do { 446 p++; 447 quotient1 = 2 * quotient1; 448 remainder1 = 2 * remainder1; 449 if (remainder1 >= abs_nc) { 450 quotient1++; 451 remainder1 = remainder1 - abs_nc; 452 } 453 quotient2 = 2 * quotient2; 454 remainder2 = 2 * remainder2; 455 if (remainder2 >= abs_d) { 456 quotient2++; 457 remainder2 = remainder2 - abs_d; 458 } 459 delta = abs_d - remainder2; 460 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); 461 462 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); 463 shift = p - 32; 464} 465 466RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) { 467 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; 468 return rl_dest; 469} 470 471RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, 472 int imm, bool is_div) { 473 // Use a multiply (and fixup) to perform an int div/rem by a constant. 474 475 // We have to use fixed registers, so flush all the temps. 476 FlushAllRegs(); 477 LockCallTemps(); // Prepare for explicit register usage. 478 479 // Assume that the result will be in EDX. 480 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rs_r2, 481 INVALID_SREG, INVALID_SREG}; 482 483 // handle div/rem by 1 special case. 484 if (imm == 1) { 485 if (is_div) { 486 // x / 1 == x. 487 StoreValue(rl_result, rl_src); 488 } else { 489 // x % 1 == 0. 490 LoadConstantNoClobber(rs_r0, 0); 491 // For this case, return the result in EAX. 492 rl_result.reg.SetReg(r0); 493 } 494 } else if (imm == -1) { // handle 0x80000000 / -1 special case. 495 if (is_div) { 496 LIR *minint_branch = 0; 497 LoadValueDirectFixed(rl_src, rs_r0); 498 OpRegImm(kOpCmp, rs_r0, 0x80000000); 499 minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 500 501 // for x != MIN_INT, x / -1 == -x. 502 NewLIR1(kX86Neg32R, r0); 503 504 LIR* branch_around = NewLIR1(kX86Jmp8, 0); 505 // The target for cmp/jmp above. 506 minint_branch->target = NewLIR0(kPseudoTargetLabel); 507 // EAX already contains the right value (0x80000000), 508 branch_around->target = NewLIR0(kPseudoTargetLabel); 509 } else { 510 // x % -1 == 0. 511 LoadConstantNoClobber(rs_r0, 0); 512 } 513 // For this case, return the result in EAX. 514 rl_result.reg.SetReg(r0); 515 } else { 516 CHECK(imm <= -2 || imm >= 2); 517 // Use H.S.Warren's Hacker's Delight Chapter 10 and 518 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 519 int magic, shift; 520 CalculateMagicAndShift(imm, magic, shift); 521 522 /* 523 * For imm >= 2, 524 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 525 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 526 * For imm <= -2, 527 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 528 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 529 * We implement this algorithm in the following way: 530 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX 531 * 2. if imm > 0 and magic < 0, add numerator to EDX 532 * if imm < 0 and magic > 0, sub numerator from EDX 533 * 3. if S !=0, SAR S bits for EDX 534 * 4. add 1 to EDX if EDX < 0 535 * 5. Thus, EDX is the quotient 536 */ 537 538 // Numerator into EAX. 539 RegStorage numerator_reg; 540 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { 541 // We will need the value later. 542 if (rl_src.location == kLocPhysReg) { 543 // We can use it directly. 544 DCHECK(rl_src.reg.GetReg() != r0 && rl_src.reg.GetReg() != r2); 545 numerator_reg = rl_src.reg; 546 } else { 547 numerator_reg = rs_r1; 548 LoadValueDirectFixed(rl_src, numerator_reg); 549 } 550 OpRegCopy(rs_r0, numerator_reg); 551 } else { 552 // Only need this once. Just put it into EAX. 553 LoadValueDirectFixed(rl_src, rs_r0); 554 } 555 556 // EDX = magic. 557 LoadConstantNoClobber(rs_r2, magic); 558 559 // EDX:EAX = magic & dividend. 560 NewLIR1(kX86Imul32DaR, r2); 561 562 if (imm > 0 && magic < 0) { 563 // Add numerator to EDX. 564 DCHECK(numerator_reg.Valid()); 565 NewLIR2(kX86Add32RR, r2, numerator_reg.GetReg()); 566 } else if (imm < 0 && magic > 0) { 567 DCHECK(numerator_reg.Valid()); 568 NewLIR2(kX86Sub32RR, r2, numerator_reg.GetReg()); 569 } 570 571 // Do we need the shift? 572 if (shift != 0) { 573 // Shift EDX by 'shift' bits. 574 NewLIR2(kX86Sar32RI, r2, shift); 575 } 576 577 // Add 1 to EDX if EDX < 0. 578 579 // Move EDX to EAX. 580 OpRegCopy(rs_r0, rs_r2); 581 582 // Move sign bit to bit 0, zeroing the rest. 583 NewLIR2(kX86Shr32RI, r2, 31); 584 585 // EDX = EDX + EAX. 586 NewLIR2(kX86Add32RR, r2, r0); 587 588 // Quotient is in EDX. 589 if (!is_div) { 590 // We need to compute the remainder. 591 // Remainder is divisor - (quotient * imm). 592 DCHECK(numerator_reg.Valid()); 593 OpRegCopy(rs_r0, numerator_reg); 594 595 // EAX = numerator * imm. 596 OpRegRegImm(kOpMul, rs_r2, rs_r2, imm); 597 598 // EDX -= EAX. 599 NewLIR2(kX86Sub32RR, r0, r2); 600 601 // For this case, return the result in EAX. 602 rl_result.reg.SetReg(r0); 603 } 604 } 605 606 return rl_result; 607} 608 609RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, 610 bool is_div) { 611 LOG(FATAL) << "Unexpected use of GenDivRem for x86"; 612 return rl_dest; 613} 614 615RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 616 RegLocation rl_src2, bool is_div, bool check_zero) { 617 // We have to use fixed registers, so flush all the temps. 618 FlushAllRegs(); 619 LockCallTemps(); // Prepare for explicit register usage. 620 621 // Load LHS into EAX. 622 LoadValueDirectFixed(rl_src1, rs_r0); 623 624 // Load RHS into EBX. 625 LoadValueDirectFixed(rl_src2, rs_r1); 626 627 // Copy LHS sign bit into EDX. 628 NewLIR0(kx86Cdq32Da); 629 630 if (check_zero) { 631 // Handle division by zero case. 632 GenImmedCheck(kCondEq, rs_r1, 0, kThrowDivZero); 633 } 634 635 // Have to catch 0x80000000/-1 case, or we will get an exception! 636 OpRegImm(kOpCmp, rs_r1, -1); 637 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 638 639 // RHS is -1. 640 OpRegImm(kOpCmp, rs_r0, 0x80000000); 641 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 642 643 // In 0x80000000/-1 case. 644 if (!is_div) { 645 // For DIV, EAX is already right. For REM, we need EDX 0. 646 LoadConstantNoClobber(rs_r2, 0); 647 } 648 LIR* done = NewLIR1(kX86Jmp8, 0); 649 650 // Expected case. 651 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 652 minint_branch->target = minus_one_branch->target; 653 NewLIR1(kX86Idivmod32DaR, r1); 654 done->target = NewLIR0(kPseudoTargetLabel); 655 656 // Result is in EAX for div and EDX for rem. 657 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rs_r0, 658 INVALID_SREG, INVALID_SREG}; 659 if (!is_div) { 660 rl_result.reg.SetReg(r2); 661 } 662 return rl_result; 663} 664 665bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { 666 DCHECK_EQ(cu_->instruction_set, kX86); 667 668 // Get the two arguments to the invoke and place them in GP registers. 669 RegLocation rl_src1 = info->args[0]; 670 RegLocation rl_src2 = info->args[1]; 671 rl_src1 = LoadValue(rl_src1, kCoreReg); 672 rl_src2 = LoadValue(rl_src2, kCoreReg); 673 674 RegLocation rl_dest = InlineTarget(info); 675 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 676 677 /* 678 * If the result register is the same as the second element, then we need to be careful. 679 * The reason is that the first copy will inadvertently clobber the second element with 680 * the first one thus yielding the wrong result. Thus we do a swap in that case. 681 */ 682 if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 683 std::swap(rl_src1, rl_src2); 684 } 685 686 // Pick the first integer as min/max. 687 OpRegCopy(rl_result.reg, rl_src1.reg); 688 689 // If the integers are both in the same register, then there is nothing else to do 690 // because they are equal and we have already moved one into the result. 691 if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) { 692 // It is possible we didn't pick correctly so do the actual comparison now. 693 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 694 695 // Conditionally move the other integer into the destination register. 696 ConditionCode condition_code = is_min ? kCondGt : kCondLt; 697 OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg); 698 } 699 700 StoreValue(rl_dest, rl_result); 701 return true; 702} 703 704bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 705 RegLocation rl_src_address = info->args[0]; // long address 706 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] 707 RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info); 708 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 709 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 710 if (size == kLong) { 711 // Unaligned access is allowed on x86. 712 LoadBaseDispWide(rl_address.reg, 0, rl_result.reg, INVALID_SREG); 713 StoreValueWide(rl_dest, rl_result); 714 } else { 715 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 716 // Unaligned access is allowed on x86. 717 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, INVALID_SREG); 718 StoreValue(rl_dest, rl_result); 719 } 720 return true; 721} 722 723bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 724 RegLocation rl_src_address = info->args[0]; // long address 725 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] 726 RegLocation rl_src_value = info->args[2]; // [size] value 727 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 728 if (size == kLong) { 729 // Unaligned access is allowed on x86. 730 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); 731 StoreBaseDispWide(rl_address.reg, 0, rl_value.reg); 732 } else { 733 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 734 // Unaligned access is allowed on x86. 735 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); 736 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size); 737 } 738 return true; 739} 740 741void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { 742 NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset); 743} 744 745void X86Mir2Lir::OpTlsCmp(ThreadOffset offset, int val) { 746 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 747} 748 749static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) { 750 return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home); 751} 752 753bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 754 DCHECK_EQ(cu_->instruction_set, kX86); 755 // Unused - RegLocation rl_src_unsafe = info->args[0]; 756 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 757 RegLocation rl_src_offset = info->args[2]; // long low 758 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] 759 RegLocation rl_src_expected = info->args[4]; // int, long or Object 760 // If is_long, high half is in info->args[5] 761 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 762 // If is_long, high half is in info->args[7] 763 764 if (is_long) { 765 // TODO: avoid unnecessary loads of SI and DI when the values are in registers. 766 // TODO: CFI support. 767 FlushAllRegs(); 768 LockCallTemps(); 769 RegStorage r_tmp1(RegStorage::k64BitPair, rAX, rDX); 770 RegStorage r_tmp2(RegStorage::k64BitPair, rBX, rCX); 771 LoadValueDirectWideFixed(rl_src_expected, r_tmp1); 772 LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); 773 NewLIR1(kX86Push32R, rDI); 774 MarkTemp(rDI); 775 LockTemp(rDI); 776 NewLIR1(kX86Push32R, rSI); 777 MarkTemp(rSI); 778 LockTemp(rSI); 779 const int push_offset = 4 /* push edi */ + 4 /* push esi */; 780 int srcObjSp = IsInReg(this, rl_src_obj, rs_rSI) ? 0 781 : (IsInReg(this, rl_src_obj, rs_rDI) ? 4 782 : (SRegOffset(rl_src_obj.s_reg_low) + push_offset)); 783 LoadWordDisp(TargetReg(kSp), srcObjSp, rs_rDI); 784 int srcOffsetSp = IsInReg(this, rl_src_offset, rs_rSI) ? 0 785 : (IsInReg(this, rl_src_offset, rs_rDI) ? 4 786 : (SRegOffset(rl_src_offset.s_reg_low) + push_offset)); 787 LoadWordDisp(TargetReg(kSp), srcOffsetSp, rs_rSI); 788 NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0); 789 790 // After a store we need to insert barrier in case of potential load. Since the 791 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. 792 GenMemBarrier(kStoreLoad); 793 794 FreeTemp(rSI); 795 UnmarkTemp(rSI); 796 NewLIR1(kX86Pop32R, rSI); 797 FreeTemp(rDI); 798 UnmarkTemp(rDI); 799 NewLIR1(kX86Pop32R, rDI); 800 FreeCallTemps(); 801 } else { 802 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. 803 FlushReg(rs_r0); 804 LockTemp(rs_r0); 805 806 RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); 807 RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); 808 809 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 810 // Mark card for object assuming new value is stored. 811 FreeTemp(r0); // Temporarily release EAX for MarkGCCard(). 812 MarkGCCard(rl_new_value.reg, rl_object.reg); 813 LockTemp(r0); 814 } 815 816 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); 817 LoadValueDirect(rl_src_expected, rs_r0); 818 NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg()); 819 820 // After a store we need to insert barrier in case of potential load. Since the 821 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. 822 GenMemBarrier(kStoreLoad); 823 824 FreeTemp(r0); 825 } 826 827 // Convert ZF to boolean 828 RegLocation rl_dest = InlineTarget(info); // boolean place for result 829 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 830 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondZ); 831 NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 832 StoreValue(rl_dest, rl_result); 833 return true; 834} 835 836LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 837 CHECK(base_of_code_ != nullptr); 838 839 // Address the start of the method 840 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 841 LoadValueDirectFixed(rl_method, reg); 842 store_method_addr_used_ = true; 843 844 // Load the proper value from the literal area. 845 // We don't know the proper offset for the value, so pick one that will force 846 // 4 byte offset. We will fix this up in the assembler later to have the right 847 // value. 848 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256, 849 0, 0, target); 850 res->target = target; 851 res->flags.fixup = kFixupLoad; 852 SetMemRefType(res, true, kLiteral); 853 store_method_addr_used_ = true; 854 return res; 855} 856 857LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { 858 LOG(FATAL) << "Unexpected use of OpVldm for x86"; 859 return NULL; 860} 861 862LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) { 863 LOG(FATAL) << "Unexpected use of OpVstm for x86"; 864 return NULL; 865} 866 867void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 868 RegLocation rl_result, int lit, 869 int first_bit, int second_bit) { 870 RegStorage t_reg = AllocTemp(); 871 OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit); 872 OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg); 873 FreeTemp(t_reg); 874 if (first_bit != 0) { 875 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); 876 } 877} 878 879void X86Mir2Lir::GenDivZeroCheck(RegStorage reg) { 880 DCHECK(reg.IsPair()); // TODO: allow 64BitSolo. 881 // We are not supposed to clobber the incoming storage, so allocate a temporary. 882 RegStorage t_reg = AllocTemp(); 883 884 // Doing an OR is a quick way to check if both registers are zero. This will set the flags. 885 OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); 886 887 // In case of zero, throw ArithmeticException. 888 GenCheck(kCondEq, kThrowDivZero); 889 890 // The temp is no longer needed so free it at this time. 891 FreeTemp(t_reg); 892} 893 894// Test suspend flag, return target of taken suspend branch 895LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { 896 OpTlsCmp(Thread::ThreadFlagsOffset(), 0); 897 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); 898} 899 900// Decrement register and branch on condition 901LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { 902 OpRegImm(kOpSub, reg, 1); 903 return OpCondBranch(c_code, target); 904} 905 906bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 907 RegLocation rl_src, RegLocation rl_dest, int lit) { 908 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; 909 return false; 910} 911 912bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { 913 LOG(FATAL) << "Unexpected use of easyMultiply in x86"; 914 return false; 915} 916 917LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { 918 LOG(FATAL) << "Unexpected use of OpIT in x86"; 919 return NULL; 920} 921 922void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { 923 switch (val) { 924 case 0: 925 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); 926 break; 927 case 1: 928 OpRegCopy(dest, src); 929 break; 930 default: 931 OpRegRegImm(kOpMul, dest, src, val); 932 break; 933 } 934} 935 936void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { 937 LIR *m; 938 switch (val) { 939 case 0: 940 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); 941 break; 942 case 1: 943 LoadBaseDisp(rs_rX86_SP, displacement, dest, kWord, sreg); 944 break; 945 default: 946 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), rX86_SP, 947 displacement, val); 948 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); 949 break; 950 } 951} 952 953void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 954 RegLocation rl_src2) { 955 if (rl_src1.is_const) { 956 std::swap(rl_src1, rl_src2); 957 } 958 // Are we multiplying by a constant? 959 if (rl_src2.is_const) { 960 // Do special compare/branch against simple const operand 961 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 962 if (val == 0) { 963 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 964 OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow()); 965 OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); 966 StoreValueWide(rl_dest, rl_result); 967 return; 968 } else if (val == 1) { 969 StoreValueWide(rl_dest, rl_src1); 970 return; 971 } else if (val == 2) { 972 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); 973 return; 974 } else if (IsPowerOfTwo(val)) { 975 int shift_amount = LowestSetBit(val); 976 if (!BadOverlap(rl_src1, rl_dest)) { 977 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 978 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, 979 rl_src1, shift_amount); 980 StoreValueWide(rl_dest, rl_result); 981 return; 982 } 983 } 984 985 // Okay, just bite the bullet and do it. 986 int32_t val_lo = Low32Bits(val); 987 int32_t val_hi = High32Bits(val); 988 FlushAllRegs(); 989 LockCallTemps(); // Prepare for explicit register usage. 990 rl_src1 = UpdateLocWide(rl_src1); 991 bool src1_in_reg = rl_src1.location == kLocPhysReg; 992 int displacement = SRegOffset(rl_src1.s_reg_low); 993 994 // ECX <- 1H * 2L 995 // EAX <- 1L * 2H 996 if (src1_in_reg) { 997 GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo); 998 GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi); 999 } else { 1000 GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); 1001 GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); 1002 } 1003 1004 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1005 NewLIR2(kX86Add32RR, r1, r0); 1006 1007 // EAX <- 2L 1008 LoadConstantNoClobber(rs_r0, val_lo); 1009 1010 // EDX:EAX <- 2L * 1L (double precision) 1011 if (src1_in_reg) { 1012 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); 1013 } else { 1014 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 1015 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1016 true /* is_load */, true /* is_64bit */); 1017 } 1018 1019 // EDX <- EDX + ECX (add high words) 1020 NewLIR2(kX86Add32RR, r2, r1); 1021 1022 // Result is EDX:EAX 1023 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 1024 RegStorage::MakeRegPair(rs_r0, rs_r2), 1025 INVALID_SREG, INVALID_SREG}; 1026 StoreValueWide(rl_dest, rl_result); 1027 return; 1028 } 1029 1030 // Nope. Do it the hard way 1031 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. 1032 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == 1033 mir_graph_->SRegToVReg(rl_src2.s_reg_low); 1034 1035 FlushAllRegs(); 1036 LockCallTemps(); // Prepare for explicit register usage. 1037 rl_src1 = UpdateLocWide(rl_src1); 1038 rl_src2 = UpdateLocWide(rl_src2); 1039 1040 // At this point, the VRs are in their home locations. 1041 bool src1_in_reg = rl_src1.location == kLocPhysReg; 1042 bool src2_in_reg = rl_src2.location == kLocPhysReg; 1043 1044 // ECX <- 1H 1045 if (src1_in_reg) { 1046 NewLIR2(kX86Mov32RR, r1, rl_src1.reg.GetHighReg()); 1047 } else { 1048 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, 1049 kWord, GetSRegHi(rl_src1.s_reg_low)); 1050 } 1051 1052 if (is_square) { 1053 // Take advantage of the fact that the values are the same. 1054 // ECX <- ECX * 2L (1H * 2L) 1055 if (src2_in_reg) { 1056 NewLIR2(kX86Imul32RR, r1, rl_src2.reg.GetLowReg()); 1057 } else { 1058 int displacement = SRegOffset(rl_src2.s_reg_low); 1059 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1060 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1061 true /* is_load */, true /* is_64bit */); 1062 } 1063 1064 // ECX <- 2*ECX (2H * 1L) + (1H * 2L) 1065 NewLIR2(kX86Add32RR, r1, r1); 1066 } else { 1067 // EAX <- 2H 1068 if (src2_in_reg) { 1069 NewLIR2(kX86Mov32RR, r0, rl_src2.reg.GetHighReg()); 1070 } else { 1071 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, 1072 kWord, GetSRegHi(rl_src2.s_reg_low)); 1073 } 1074 1075 // EAX <- EAX * 1L (2H * 1L) 1076 if (src1_in_reg) { 1077 NewLIR2(kX86Imul32RR, r0, rl_src1.reg.GetLowReg()); 1078 } else { 1079 int displacement = SRegOffset(rl_src1.s_reg_low); 1080 LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET); 1081 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1082 true /* is_load */, true /* is_64bit */); 1083 } 1084 1085 // ECX <- ECX * 2L (1H * 2L) 1086 if (src2_in_reg) { 1087 NewLIR2(kX86Imul32RR, r1, rl_src2.reg.GetLowReg()); 1088 } else { 1089 int displacement = SRegOffset(rl_src2.s_reg_low); 1090 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1091 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1092 true /* is_load */, true /* is_64bit */); 1093 } 1094 1095 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1096 NewLIR2(kX86Add32RR, r1, r0); 1097 } 1098 1099 // EAX <- 2L 1100 if (src2_in_reg) { 1101 NewLIR2(kX86Mov32RR, r0, rl_src2.reg.GetLowReg()); 1102 } else { 1103 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, 1104 kWord, rl_src2.s_reg_low); 1105 } 1106 1107 // EDX:EAX <- 2L * 1L (double precision) 1108 if (src1_in_reg) { 1109 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); 1110 } else { 1111 int displacement = SRegOffset(rl_src1.s_reg_low); 1112 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 1113 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1114 true /* is_load */, true /* is_64bit */); 1115 } 1116 1117 // EDX <- EDX + ECX (add high words) 1118 NewLIR2(kX86Add32RR, r2, r1); 1119 1120 // Result is EDX:EAX 1121 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 1122 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; 1123 StoreValueWide(rl_dest, rl_result); 1124} 1125 1126void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, 1127 Instruction::Code op) { 1128 DCHECK_EQ(rl_dest.location, kLocPhysReg); 1129 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1130 if (rl_src.location == kLocPhysReg) { 1131 // Both operands are in registers. 1132 // But we must ensure that rl_src is in pair 1133 rl_src = EvalLocWide(rl_src, kCoreReg, true); 1134 if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { 1135 // The registers are the same, so we would clobber it before the use. 1136 RegStorage temp_reg = AllocTemp(); 1137 OpRegCopy(temp_reg, rl_dest.reg); 1138 rl_src.reg.SetHighReg(temp_reg.GetReg()); 1139 } 1140 NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); 1141 1142 x86op = GetOpcode(op, rl_dest, rl_src, true); 1143 NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); 1144 FreeTemp(rl_src.reg); 1145 return; 1146 } 1147 1148 // RHS is in memory. 1149 DCHECK((rl_src.location == kLocDalvikFrame) || 1150 (rl_src.location == kLocCompilerTemp)); 1151 int r_base = TargetReg(kSp).GetReg(); 1152 int displacement = SRegOffset(rl_src.s_reg_low); 1153 1154 LIR *lir = NewLIR3(x86op, rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET); 1155 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1156 true /* is_load */, true /* is64bit */); 1157 x86op = GetOpcode(op, rl_dest, rl_src, true); 1158 lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); 1159 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1160 true /* is_load */, true /* is64bit */); 1161} 1162 1163void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1164 rl_dest = UpdateLocWide(rl_dest); 1165 if (rl_dest.location == kLocPhysReg) { 1166 // Ensure we are in a register pair 1167 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1168 1169 rl_src = UpdateLocWide(rl_src); 1170 GenLongRegOrMemOp(rl_result, rl_src, op); 1171 StoreFinalValueWide(rl_dest, rl_result); 1172 return; 1173 } 1174 1175 // It wasn't in registers, so it better be in memory. 1176 DCHECK((rl_dest.location == kLocDalvikFrame) || 1177 (rl_dest.location == kLocCompilerTemp)); 1178 rl_src = LoadValueWide(rl_src, kCoreReg); 1179 1180 // Operate directly into memory. 1181 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1182 int r_base = TargetReg(kSp).GetReg(); 1183 int displacement = SRegOffset(rl_dest.s_reg_low); 1184 1185 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, rl_src.reg.GetLowReg()); 1186 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1187 false /* is_load */, true /* is64bit */); 1188 x86op = GetOpcode(op, rl_dest, rl_src, true); 1189 lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); 1190 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1191 false /* is_load */, true /* is64bit */); 1192 FreeTemp(rl_src.reg); 1193} 1194 1195void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, 1196 RegLocation rl_src2, Instruction::Code op, 1197 bool is_commutative) { 1198 // Is this really a 2 operand operation? 1199 switch (op) { 1200 case Instruction::ADD_LONG_2ADDR: 1201 case Instruction::SUB_LONG_2ADDR: 1202 case Instruction::AND_LONG_2ADDR: 1203 case Instruction::OR_LONG_2ADDR: 1204 case Instruction::XOR_LONG_2ADDR: 1205 GenLongArith(rl_dest, rl_src2, op); 1206 return; 1207 default: 1208 break; 1209 } 1210 1211 if (rl_dest.location == kLocPhysReg) { 1212 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); 1213 1214 // We are about to clobber the LHS, so it needs to be a temp. 1215 rl_result = ForceTempWide(rl_result); 1216 1217 // Perform the operation using the RHS. 1218 rl_src2 = UpdateLocWide(rl_src2); 1219 GenLongRegOrMemOp(rl_result, rl_src2, op); 1220 1221 // And now record that the result is in the temp. 1222 StoreFinalValueWide(rl_dest, rl_result); 1223 return; 1224 } 1225 1226 // It wasn't in registers, so it better be in memory. 1227 DCHECK((rl_dest.location == kLocDalvikFrame) || 1228 (rl_dest.location == kLocCompilerTemp)); 1229 rl_src1 = UpdateLocWide(rl_src1); 1230 rl_src2 = UpdateLocWide(rl_src2); 1231 1232 // Get one of the source operands into temporary register. 1233 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1234 if (IsTemp(rl_src1.reg.GetLowReg()) && IsTemp(rl_src1.reg.GetHighReg())) { 1235 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1236 } else if (is_commutative) { 1237 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1238 // We need at least one of them to be a temporary. 1239 if (!(IsTemp(rl_src2.reg.GetLowReg()) && IsTemp(rl_src2.reg.GetHighReg()))) { 1240 rl_src1 = ForceTempWide(rl_src1); 1241 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1242 } else { 1243 GenLongRegOrMemOp(rl_src2, rl_src1, op); 1244 StoreFinalValueWide(rl_dest, rl_src2); 1245 return; 1246 } 1247 } else { 1248 // Need LHS to be the temp. 1249 rl_src1 = ForceTempWide(rl_src1); 1250 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1251 } 1252 1253 StoreFinalValueWide(rl_dest, rl_src1); 1254} 1255 1256void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, 1257 RegLocation rl_src1, RegLocation rl_src2) { 1258 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1259} 1260 1261void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, 1262 RegLocation rl_src1, RegLocation rl_src2) { 1263 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); 1264} 1265 1266void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, 1267 RegLocation rl_src1, RegLocation rl_src2) { 1268 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1269} 1270 1271void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, 1272 RegLocation rl_src1, RegLocation rl_src2) { 1273 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1274} 1275 1276void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, 1277 RegLocation rl_src1, RegLocation rl_src2) { 1278 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1279} 1280 1281void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 1282 rl_src = LoadValueWide(rl_src, kCoreReg); 1283 RegLocation rl_result = ForceTempWide(rl_src); 1284 if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && 1285 ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { 1286 // The registers are the same, so we would clobber it before the use. 1287 RegStorage temp_reg = AllocTemp(); 1288 OpRegCopy(temp_reg, rl_result.reg); 1289 rl_result.reg.SetHighReg(temp_reg.GetReg()); 1290 } 1291 OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow 1292 OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF 1293 OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh 1294 StoreValueWide(rl_dest, rl_result); 1295} 1296 1297void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset thread_offset) { 1298 X86OpCode opcode = kX86Bkpt; 1299 switch (op) { 1300 case kOpCmp: opcode = kX86Cmp32RT; break; 1301 case kOpMov: opcode = kX86Mov32RT; break; 1302 default: 1303 LOG(FATAL) << "Bad opcode: " << op; 1304 break; 1305 } 1306 NewLIR2(opcode, r_dest, thread_offset.Int32Value()); 1307} 1308 1309/* 1310 * Generate array load 1311 */ 1312void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 1313 RegLocation rl_index, RegLocation rl_dest, int scale) { 1314 RegisterClass reg_class = oat_reg_class_by_size(size); 1315 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1316 RegLocation rl_result; 1317 rl_array = LoadValue(rl_array, kCoreReg); 1318 1319 int data_offset; 1320 if (size == kLong || size == kDouble) { 1321 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1322 } else { 1323 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1324 } 1325 1326 bool constant_index = rl_index.is_const; 1327 int32_t constant_index_value = 0; 1328 if (!constant_index) { 1329 rl_index = LoadValue(rl_index, kCoreReg); 1330 } else { 1331 constant_index_value = mir_graph_->ConstantValue(rl_index); 1332 // If index is constant, just fold it into the data offset 1333 data_offset += constant_index_value << scale; 1334 // treat as non array below 1335 rl_index.reg = RegStorage::InvalidReg(); 1336 } 1337 1338 /* null object? */ 1339 GenNullCheck(rl_array.reg, opt_flags); 1340 1341 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1342 if (constant_index) { 1343 GenMemImmedCheck(kCondLs, rl_array.reg, len_offset, 1344 constant_index_value, kThrowConstantArrayBounds); 1345 } else { 1346 GenRegMemCheck(kCondUge, rl_index.reg, rl_array.reg, len_offset, kThrowArrayBounds); 1347 } 1348 } 1349 rl_result = EvalLoc(rl_dest, reg_class, true); 1350 if ((size == kLong) || (size == kDouble)) { 1351 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg.GetLow(), 1352 rl_result.reg.GetHigh(), size, INVALID_SREG); 1353 StoreValueWide(rl_dest, rl_result); 1354 } else { 1355 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, 1356 RegStorage::InvalidReg(), size, INVALID_SREG); 1357 StoreValue(rl_dest, rl_result); 1358 } 1359} 1360 1361/* 1362 * Generate array store 1363 * 1364 */ 1365void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1366 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1367 RegisterClass reg_class = oat_reg_class_by_size(size); 1368 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1369 int data_offset; 1370 1371 if (size == kLong || size == kDouble) { 1372 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1373 } else { 1374 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1375 } 1376 1377 rl_array = LoadValue(rl_array, kCoreReg); 1378 bool constant_index = rl_index.is_const; 1379 int32_t constant_index_value = 0; 1380 if (!constant_index) { 1381 rl_index = LoadValue(rl_index, kCoreReg); 1382 } else { 1383 // If index is constant, just fold it into the data offset 1384 constant_index_value = mir_graph_->ConstantValue(rl_index); 1385 data_offset += constant_index_value << scale; 1386 // treat as non array below 1387 rl_index.reg = RegStorage::InvalidReg(); 1388 } 1389 1390 /* null object? */ 1391 GenNullCheck(rl_array.reg, opt_flags); 1392 1393 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1394 if (constant_index) { 1395 GenMemImmedCheck(kCondLs, rl_array.reg, len_offset, 1396 constant_index_value, kThrowConstantArrayBounds); 1397 } else { 1398 GenRegMemCheck(kCondUge, rl_index.reg, rl_array.reg, len_offset, kThrowArrayBounds); 1399 } 1400 } 1401 if ((size == kLong) || (size == kDouble)) { 1402 rl_src = LoadValueWide(rl_src, reg_class); 1403 } else { 1404 rl_src = LoadValue(rl_src, reg_class); 1405 } 1406 // If the src reg can't be byte accessed, move it to a temp first. 1407 if ((size == kSignedByte || size == kUnsignedByte) && rl_src.reg.GetReg() >= 4) { 1408 RegStorage temp = AllocTemp(); 1409 OpRegCopy(temp, rl_src.reg); 1410 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, 1411 RegStorage::InvalidReg(), size, INVALID_SREG); 1412 } else { 1413 if (rl_src.wide) { 1414 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg.GetLow(), 1415 rl_src.reg.GetHigh(), size, INVALID_SREG); 1416 } else { 1417 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, 1418 RegStorage::InvalidReg(), size, INVALID_SREG); 1419 } 1420 } 1421 if (card_mark) { 1422 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. 1423 if (!constant_index) { 1424 FreeTemp(rl_index.reg.GetReg()); 1425 } 1426 MarkGCCard(rl_src.reg, rl_array.reg); 1427 } 1428} 1429 1430RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1431 RegLocation rl_src, int shift_amount) { 1432 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1433 switch (opcode) { 1434 case Instruction::SHL_LONG: 1435 case Instruction::SHL_LONG_2ADDR: 1436 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. 1437 if (shift_amount == 32) { 1438 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); 1439 LoadConstant(rl_result.reg.GetLow(), 0); 1440 } else if (shift_amount > 31) { 1441 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); 1442 FreeTemp(rl_src.reg.GetHighReg()); 1443 NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); 1444 LoadConstant(rl_result.reg.GetLow(), 0); 1445 } else { 1446 OpRegCopy(rl_result.reg, rl_src.reg); 1447 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1448 NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount); 1449 NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); 1450 } 1451 break; 1452 case Instruction::SHR_LONG: 1453 case Instruction::SHR_LONG_2ADDR: 1454 if (shift_amount == 32) { 1455 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1456 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1457 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); 1458 } else if (shift_amount > 31) { 1459 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1460 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1461 NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); 1462 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); 1463 } else { 1464 OpRegCopy(rl_result.reg, rl_src.reg); 1465 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1466 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); 1467 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); 1468 } 1469 break; 1470 case Instruction::USHR_LONG: 1471 case Instruction::USHR_LONG_2ADDR: 1472 if (shift_amount == 32) { 1473 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1474 LoadConstant(rl_result.reg.GetHigh(), 0); 1475 } else if (shift_amount > 31) { 1476 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1477 NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); 1478 LoadConstant(rl_result.reg.GetHigh(), 0); 1479 } else { 1480 OpRegCopy(rl_result.reg, rl_src.reg); 1481 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1482 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount); 1483 NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); 1484 } 1485 break; 1486 default: 1487 LOG(FATAL) << "Unexpected case"; 1488 } 1489 return rl_result; 1490} 1491 1492void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1493 RegLocation rl_src, RegLocation rl_shift) { 1494 // Per spec, we only care about low 6 bits of shift amount. 1495 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1496 if (shift_amount == 0) { 1497 rl_src = LoadValueWide(rl_src, kCoreReg); 1498 StoreValueWide(rl_dest, rl_src); 1499 return; 1500 } else if (shift_amount == 1 && 1501 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { 1502 // Need to handle this here to avoid calling StoreValueWide twice. 1503 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); 1504 return; 1505 } 1506 if (BadOverlap(rl_src, rl_dest)) { 1507 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 1508 return; 1509 } 1510 rl_src = LoadValueWide(rl_src, kCoreReg); 1511 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount); 1512 StoreValueWide(rl_dest, rl_result); 1513} 1514 1515void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, 1516 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 1517 switch (opcode) { 1518 case Instruction::ADD_LONG: 1519 case Instruction::AND_LONG: 1520 case Instruction::OR_LONG: 1521 case Instruction::XOR_LONG: 1522 if (rl_src2.is_const) { 1523 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1524 } else { 1525 DCHECK(rl_src1.is_const); 1526 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1527 } 1528 break; 1529 case Instruction::SUB_LONG: 1530 case Instruction::SUB_LONG_2ADDR: 1531 if (rl_src2.is_const) { 1532 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1533 } else { 1534 GenSubLong(opcode, rl_dest, rl_src1, rl_src2); 1535 } 1536 break; 1537 case Instruction::ADD_LONG_2ADDR: 1538 case Instruction::OR_LONG_2ADDR: 1539 case Instruction::XOR_LONG_2ADDR: 1540 case Instruction::AND_LONG_2ADDR: 1541 if (rl_src2.is_const) { 1542 GenLongImm(rl_dest, rl_src2, opcode); 1543 } else { 1544 DCHECK(rl_src1.is_const); 1545 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1546 } 1547 break; 1548 default: 1549 // Default - bail to non-const handler. 1550 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1551 break; 1552 } 1553} 1554 1555bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { 1556 switch (op) { 1557 case Instruction::AND_LONG_2ADDR: 1558 case Instruction::AND_LONG: 1559 return value == -1; 1560 case Instruction::OR_LONG: 1561 case Instruction::OR_LONG_2ADDR: 1562 case Instruction::XOR_LONG: 1563 case Instruction::XOR_LONG_2ADDR: 1564 return value == 0; 1565 default: 1566 return false; 1567 } 1568} 1569 1570X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, 1571 bool is_high_op) { 1572 bool rhs_in_mem = rhs.location != kLocPhysReg; 1573 bool dest_in_mem = dest.location != kLocPhysReg; 1574 DCHECK(!rhs_in_mem || !dest_in_mem); 1575 switch (op) { 1576 case Instruction::ADD_LONG: 1577 case Instruction::ADD_LONG_2ADDR: 1578 if (dest_in_mem) { 1579 return is_high_op ? kX86Adc32MR : kX86Add32MR; 1580 } else if (rhs_in_mem) { 1581 return is_high_op ? kX86Adc32RM : kX86Add32RM; 1582 } 1583 return is_high_op ? kX86Adc32RR : kX86Add32RR; 1584 case Instruction::SUB_LONG: 1585 case Instruction::SUB_LONG_2ADDR: 1586 if (dest_in_mem) { 1587 return is_high_op ? kX86Sbb32MR : kX86Sub32MR; 1588 } else if (rhs_in_mem) { 1589 return is_high_op ? kX86Sbb32RM : kX86Sub32RM; 1590 } 1591 return is_high_op ? kX86Sbb32RR : kX86Sub32RR; 1592 case Instruction::AND_LONG_2ADDR: 1593 case Instruction::AND_LONG: 1594 if (dest_in_mem) { 1595 return kX86And32MR; 1596 } 1597 return rhs_in_mem ? kX86And32RM : kX86And32RR; 1598 case Instruction::OR_LONG: 1599 case Instruction::OR_LONG_2ADDR: 1600 if (dest_in_mem) { 1601 return kX86Or32MR; 1602 } 1603 return rhs_in_mem ? kX86Or32RM : kX86Or32RR; 1604 case Instruction::XOR_LONG: 1605 case Instruction::XOR_LONG_2ADDR: 1606 if (dest_in_mem) { 1607 return kX86Xor32MR; 1608 } 1609 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; 1610 default: 1611 LOG(FATAL) << "Unexpected opcode: " << op; 1612 return kX86Add32RR; 1613 } 1614} 1615 1616X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, 1617 int32_t value) { 1618 bool in_mem = loc.location != kLocPhysReg; 1619 bool byte_imm = IS_SIMM8(value); 1620 DCHECK(in_mem || !IsFpReg(loc.reg)); 1621 switch (op) { 1622 case Instruction::ADD_LONG: 1623 case Instruction::ADD_LONG_2ADDR: 1624 if (byte_imm) { 1625 if (in_mem) { 1626 return is_high_op ? kX86Adc32MI8 : kX86Add32MI8; 1627 } 1628 return is_high_op ? kX86Adc32RI8 : kX86Add32RI8; 1629 } 1630 if (in_mem) { 1631 return is_high_op ? kX86Adc32MI : kX86Add32MI; 1632 } 1633 return is_high_op ? kX86Adc32RI : kX86Add32RI; 1634 case Instruction::SUB_LONG: 1635 case Instruction::SUB_LONG_2ADDR: 1636 if (byte_imm) { 1637 if (in_mem) { 1638 return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; 1639 } 1640 return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; 1641 } 1642 if (in_mem) { 1643 return is_high_op ? kX86Sbb32MI : kX86Sub32MI; 1644 } 1645 return is_high_op ? kX86Sbb32RI : kX86Sub32RI; 1646 case Instruction::AND_LONG_2ADDR: 1647 case Instruction::AND_LONG: 1648 if (byte_imm) { 1649 return in_mem ? kX86And32MI8 : kX86And32RI8; 1650 } 1651 return in_mem ? kX86And32MI : kX86And32RI; 1652 case Instruction::OR_LONG: 1653 case Instruction::OR_LONG_2ADDR: 1654 if (byte_imm) { 1655 return in_mem ? kX86Or32MI8 : kX86Or32RI8; 1656 } 1657 return in_mem ? kX86Or32MI : kX86Or32RI; 1658 case Instruction::XOR_LONG: 1659 case Instruction::XOR_LONG_2ADDR: 1660 if (byte_imm) { 1661 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; 1662 } 1663 return in_mem ? kX86Xor32MI : kX86Xor32RI; 1664 default: 1665 LOG(FATAL) << "Unexpected opcode: " << op; 1666 return kX86Add32MI; 1667 } 1668} 1669 1670void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1671 DCHECK(rl_src.is_const); 1672 int64_t val = mir_graph_->ConstantValueWide(rl_src); 1673 int32_t val_lo = Low32Bits(val); 1674 int32_t val_hi = High32Bits(val); 1675 rl_dest = UpdateLocWide(rl_dest); 1676 1677 // Can we just do this into memory? 1678 if ((rl_dest.location == kLocDalvikFrame) || 1679 (rl_dest.location == kLocCompilerTemp)) { 1680 int r_base = TargetReg(kSp).GetReg(); 1681 int displacement = SRegOffset(rl_dest.s_reg_low); 1682 1683 if (!IsNoOp(op, val_lo)) { 1684 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1685 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); 1686 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1687 false /* is_load */, true /* is64bit */); 1688 } 1689 if (!IsNoOp(op, val_hi)) { 1690 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1691 LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi); 1692 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1693 false /* is_load */, true /* is64bit */); 1694 } 1695 return; 1696 } 1697 1698 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1699 DCHECK_EQ(rl_result.location, kLocPhysReg); 1700 DCHECK(!IsFpReg(rl_result.reg)); 1701 1702 if (!IsNoOp(op, val_lo)) { 1703 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1704 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); 1705 } 1706 if (!IsNoOp(op, val_hi)) { 1707 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1708 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); 1709 } 1710 StoreValueWide(rl_dest, rl_result); 1711} 1712 1713void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, 1714 RegLocation rl_src2, Instruction::Code op) { 1715 DCHECK(rl_src2.is_const); 1716 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1717 int32_t val_lo = Low32Bits(val); 1718 int32_t val_hi = High32Bits(val); 1719 rl_dest = UpdateLocWide(rl_dest); 1720 rl_src1 = UpdateLocWide(rl_src1); 1721 1722 // Can we do this directly into the destination registers? 1723 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && 1724 rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() && 1725 rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && 1726 !IsFpReg(rl_dest.reg)) { 1727 if (!IsNoOp(op, val_lo)) { 1728 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1729 NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo); 1730 } 1731 if (!IsNoOp(op, val_hi)) { 1732 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1733 NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi); 1734 } 1735 1736 StoreFinalValueWide(rl_dest, rl_dest); 1737 return; 1738 } 1739 1740 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1741 DCHECK_EQ(rl_src1.location, kLocPhysReg); 1742 1743 // We need the values to be in a temporary 1744 RegLocation rl_result = ForceTempWide(rl_src1); 1745 if (!IsNoOp(op, val_lo)) { 1746 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1747 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); 1748 } 1749 if (!IsNoOp(op, val_hi)) { 1750 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1751 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); 1752 } 1753 1754 StoreFinalValueWide(rl_dest, rl_result); 1755} 1756 1757// For final classes there are no sub-classes to check and so we can answer the instance-of 1758// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. 1759void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, 1760 RegLocation rl_dest, RegLocation rl_src) { 1761 RegLocation object = LoadValue(rl_src, kCoreReg); 1762 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1763 RegStorage result_reg = rl_result.reg; 1764 1765 // SETcc only works with EAX..EDX. 1766 if (result_reg == object.reg || result_reg.GetReg() >= 4) { 1767 result_reg = AllocTypedTemp(false, kCoreReg); 1768 DCHECK_LT(result_reg.GetReg(), 4); 1769 } 1770 1771 // Assume that there is no match. 1772 LoadConstant(result_reg, 0); 1773 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); 1774 1775 RegStorage check_class = AllocTypedTemp(false, kCoreReg); 1776 1777 // If Method* is already in a register, we can save a copy. 1778 RegLocation rl_method = mir_graph_->GetMethodLoc(); 1779 int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + 1780 (sizeof(mirror::Class*) * type_idx); 1781 1782 if (rl_method.location == kLocPhysReg) { 1783 if (use_declaring_class) { 1784 LoadWordDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1785 check_class); 1786 } else { 1787 LoadWordDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1788 check_class); 1789 LoadWordDisp(check_class, offset_of_type, check_class); 1790 } 1791 } else { 1792 LoadCurrMethodDirect(check_class); 1793 if (use_declaring_class) { 1794 LoadWordDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1795 check_class); 1796 } else { 1797 LoadWordDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1798 check_class); 1799 LoadWordDisp(check_class, offset_of_type, check_class); 1800 } 1801 } 1802 1803 // Compare the computed class to the class in the object. 1804 DCHECK_EQ(object.location, kLocPhysReg); 1805 OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value()); 1806 1807 // Set the low byte of the result to 0 or 1 from the compare condition code. 1808 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq); 1809 1810 LIR* target = NewLIR0(kPseudoTargetLabel); 1811 null_branchover->target = target; 1812 FreeTemp(check_class); 1813 if (IsTemp(result_reg)) { 1814 OpRegCopy(rl_result.reg, result_reg); 1815 FreeTemp(result_reg); 1816 } 1817 StoreValue(rl_dest, rl_result); 1818} 1819 1820void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, 1821 bool type_known_abstract, bool use_declaring_class, 1822 bool can_assume_type_is_in_dex_cache, 1823 uint32_t type_idx, RegLocation rl_dest, 1824 RegLocation rl_src) { 1825 FlushAllRegs(); 1826 // May generate a call - use explicit registers. 1827 LockCallTemps(); 1828 LoadCurrMethodDirect(TargetReg(kArg1)); // kArg1 gets current Method*. 1829 RegStorage class_reg = TargetReg(kArg2); // kArg2 will hold the Class*. 1830 // Reference must end up in kArg0. 1831 if (needs_access_check) { 1832 // Check we have access to type_idx and if not throw IllegalAccessError, 1833 // Caller function returns Class* in kArg0. 1834 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccess), 1835 type_idx, true); 1836 OpRegCopy(class_reg, TargetReg(kRet0)); 1837 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1838 } else if (use_declaring_class) { 1839 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1840 LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1841 class_reg); 1842 } else { 1843 // Load dex cache entry into class_reg (kArg2). 1844 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1845 LoadWordDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1846 class_reg); 1847 int32_t offset_of_type = 1848 mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*) 1849 * type_idx); 1850 LoadWordDisp(class_reg, offset_of_type, class_reg); 1851 if (!can_assume_type_is_in_dex_cache) { 1852 // Need to test presence of type in dex cache at runtime. 1853 LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); 1854 // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0. 1855 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx, true); 1856 OpRegCopy(TargetReg(kArg2), TargetReg(kRet0)); // Align usage with fast path. 1857 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); /* Reload Ref. */ 1858 // Rejoin code paths 1859 LIR* hop_target = NewLIR0(kPseudoTargetLabel); 1860 hop_branch->target = hop_target; 1861 } 1862 } 1863 /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */ 1864 RegLocation rl_result = GetReturn(false); 1865 1866 // SETcc only works with EAX..EDX. 1867 DCHECK_LT(rl_result.reg.GetReg(), 4); 1868 1869 // Is the class NULL? 1870 LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); 1871 1872 /* Load object->klass_. */ 1873 DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); 1874 LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); 1875 /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */ 1876 LIR* branchover = nullptr; 1877 if (type_known_final) { 1878 // Ensure top 3 bytes of result are 0. 1879 LoadConstant(rl_result.reg, 0); 1880 OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2)); 1881 // Set the low byte of the result to 0 or 1 from the compare condition code. 1882 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondEq); 1883 } else { 1884 if (!type_known_abstract) { 1885 LoadConstant(rl_result.reg, 1); // Assume result succeeds. 1886 branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL); 1887 } 1888 OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); 1889 OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial)); 1890 } 1891 // TODO: only clobber when type isn't final? 1892 ClobberCallerSave(); 1893 /* Branch targets here. */ 1894 LIR* target = NewLIR0(kPseudoTargetLabel); 1895 StoreValue(rl_dest, rl_result); 1896 branch1->target = target; 1897 if (branchover != nullptr) { 1898 branchover->target = target; 1899 } 1900} 1901 1902void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, 1903 RegLocation rl_lhs, RegLocation rl_rhs) { 1904 OpKind op = kOpBkpt; 1905 bool is_div_rem = false; 1906 bool unary = false; 1907 bool shift_op = false; 1908 bool is_two_addr = false; 1909 RegLocation rl_result; 1910 switch (opcode) { 1911 case Instruction::NEG_INT: 1912 op = kOpNeg; 1913 unary = true; 1914 break; 1915 case Instruction::NOT_INT: 1916 op = kOpMvn; 1917 unary = true; 1918 break; 1919 case Instruction::ADD_INT_2ADDR: 1920 is_two_addr = true; 1921 // Fallthrough 1922 case Instruction::ADD_INT: 1923 op = kOpAdd; 1924 break; 1925 case Instruction::SUB_INT_2ADDR: 1926 is_two_addr = true; 1927 // Fallthrough 1928 case Instruction::SUB_INT: 1929 op = kOpSub; 1930 break; 1931 case Instruction::MUL_INT_2ADDR: 1932 is_two_addr = true; 1933 // Fallthrough 1934 case Instruction::MUL_INT: 1935 op = kOpMul; 1936 break; 1937 case Instruction::DIV_INT_2ADDR: 1938 is_two_addr = true; 1939 // Fallthrough 1940 case Instruction::DIV_INT: 1941 op = kOpDiv; 1942 is_div_rem = true; 1943 break; 1944 /* NOTE: returns in kArg1 */ 1945 case Instruction::REM_INT_2ADDR: 1946 is_two_addr = true; 1947 // Fallthrough 1948 case Instruction::REM_INT: 1949 op = kOpRem; 1950 is_div_rem = true; 1951 break; 1952 case Instruction::AND_INT_2ADDR: 1953 is_two_addr = true; 1954 // Fallthrough 1955 case Instruction::AND_INT: 1956 op = kOpAnd; 1957 break; 1958 case Instruction::OR_INT_2ADDR: 1959 is_two_addr = true; 1960 // Fallthrough 1961 case Instruction::OR_INT: 1962 op = kOpOr; 1963 break; 1964 case Instruction::XOR_INT_2ADDR: 1965 is_two_addr = true; 1966 // Fallthrough 1967 case Instruction::XOR_INT: 1968 op = kOpXor; 1969 break; 1970 case Instruction::SHL_INT_2ADDR: 1971 is_two_addr = true; 1972 // Fallthrough 1973 case Instruction::SHL_INT: 1974 shift_op = true; 1975 op = kOpLsl; 1976 break; 1977 case Instruction::SHR_INT_2ADDR: 1978 is_two_addr = true; 1979 // Fallthrough 1980 case Instruction::SHR_INT: 1981 shift_op = true; 1982 op = kOpAsr; 1983 break; 1984 case Instruction::USHR_INT_2ADDR: 1985 is_two_addr = true; 1986 // Fallthrough 1987 case Instruction::USHR_INT: 1988 shift_op = true; 1989 op = kOpLsr; 1990 break; 1991 default: 1992 LOG(FATAL) << "Invalid word arith op: " << opcode; 1993 } 1994 1995 // Can we convert to a two address instruction? 1996 if (!is_two_addr && 1997 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 1998 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { 1999 is_two_addr = true; 2000 } 2001 2002 // Get the div/rem stuff out of the way. 2003 if (is_div_rem) { 2004 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true); 2005 StoreValue(rl_dest, rl_result); 2006 return; 2007 } 2008 2009 if (unary) { 2010 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2011 rl_result = UpdateLoc(rl_dest); 2012 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2013 OpRegReg(op, rl_result.reg, rl_lhs.reg); 2014 } else { 2015 if (shift_op) { 2016 // X86 doesn't require masking and must use ECX. 2017 RegStorage t_reg = TargetReg(kCount); // rCX 2018 LoadValueDirectFixed(rl_rhs, t_reg); 2019 if (is_two_addr) { 2020 // Can we do this directly into memory? 2021 rl_result = UpdateLoc(rl_dest); 2022 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2023 if (rl_result.location != kLocPhysReg) { 2024 // Okay, we can do this into memory 2025 OpMemReg(op, rl_result, t_reg.GetReg()); 2026 FreeTemp(t_reg); 2027 return; 2028 } else if (!IsFpReg(rl_result.reg.GetReg())) { 2029 // Can do this directly into the result register 2030 OpRegReg(op, rl_result.reg, t_reg); 2031 FreeTemp(t_reg); 2032 StoreFinalValue(rl_dest, rl_result); 2033 return; 2034 } 2035 } 2036 // Three address form, or we can't do directly. 2037 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2038 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2039 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg); 2040 FreeTemp(t_reg); 2041 } else { 2042 // Multiply is 3 operand only (sort of). 2043 if (is_two_addr && op != kOpMul) { 2044 // Can we do this directly into memory? 2045 rl_result = UpdateLoc(rl_dest); 2046 if (rl_result.location == kLocPhysReg) { 2047 // Can we do this from memory directly? 2048 rl_rhs = UpdateLoc(rl_rhs); 2049 if (rl_rhs.location != kLocPhysReg) { 2050 OpRegMem(op, rl_result.reg, rl_rhs); 2051 StoreFinalValue(rl_dest, rl_result); 2052 return; 2053 } else if (!IsFpReg(rl_rhs.reg)) { 2054 OpRegReg(op, rl_result.reg, rl_rhs.reg); 2055 StoreFinalValue(rl_dest, rl_result); 2056 return; 2057 } 2058 } 2059 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2060 if (rl_result.location != kLocPhysReg) { 2061 // Okay, we can do this into memory. 2062 OpMemReg(op, rl_result, rl_rhs.reg.GetReg()); 2063 return; 2064 } else if (!IsFpReg(rl_result.reg)) { 2065 // Can do this directly into the result register. 2066 OpRegReg(op, rl_result.reg, rl_rhs.reg); 2067 StoreFinalValue(rl_dest, rl_result); 2068 return; 2069 } else { 2070 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2071 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2072 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2073 } 2074 } else { 2075 // Try to use reg/memory instructions. 2076 rl_lhs = UpdateLoc(rl_lhs); 2077 rl_rhs = UpdateLoc(rl_rhs); 2078 // We can't optimize with FP registers. 2079 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { 2080 // Something is difficult, so fall back to the standard case. 2081 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2082 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2083 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2084 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2085 } else { 2086 // We can optimize by moving to result and using memory operands. 2087 if (rl_rhs.location != kLocPhysReg) { 2088 // Force LHS into result. 2089 // We should be careful with order here 2090 // If rl_dest and rl_lhs points to the same VR we should load first 2091 // If the are different we should find a register first for dest 2092 if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { 2093 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2094 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2095 } else { 2096 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2097 LoadValueDirect(rl_lhs, rl_result.reg); 2098 } 2099 OpRegMem(op, rl_result.reg, rl_rhs); 2100 } else if (rl_lhs.location != kLocPhysReg) { 2101 // RHS is in a register; LHS is in memory. 2102 if (op != kOpSub) { 2103 // Force RHS into result and operate on memory. 2104 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2105 OpRegCopy(rl_result.reg, rl_rhs.reg); 2106 OpRegMem(op, rl_result.reg, rl_lhs); 2107 } else { 2108 // Subtraction isn't commutative. 2109 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2110 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2111 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2112 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2113 } 2114 } else { 2115 // Both are in registers. 2116 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2117 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2118 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2119 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2120 } 2121 } 2122 } 2123 } 2124 } 2125 StoreValue(rl_dest, rl_result); 2126} 2127 2128bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { 2129 // If we have non-core registers, then we can't do good things. 2130 if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.reg.GetReg())) { 2131 return false; 2132 } 2133 if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.reg.GetReg())) { 2134 return false; 2135 } 2136 2137 // Everything will be fine :-). 2138 return true; 2139} 2140} // namespace art 2141