int_x86.cc revision de99bba92b4beac4c347772177f47bdd61e8f376
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the X86 ISA */ 18 19#include "codegen_x86.h" 20#include "dex/quick/mir_to_lir-inl.h" 21#include "mirror/array.h" 22#include "x86_lir.h" 23 24namespace art { 25 26/* 27 * Perform register memory operation. 28 */ 29LIR* X86Mir2Lir::GenRegMemCheck(ConditionCode c_code, 30 int reg1, int base, int offset, ThrowKind kind) { 31 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 32 current_dalvik_offset_, reg1, base, offset); 33 OpRegMem(kOpCmp, reg1, base, offset); 34 LIR* branch = OpCondBranch(c_code, tgt); 35 // Remember branch target - will process later 36 throw_launchpads_.Insert(tgt); 37 return branch; 38} 39 40/* 41 * Perform a compare of memory to immediate value 42 */ 43LIR* X86Mir2Lir::GenMemImmedCheck(ConditionCode c_code, 44 int base, int offset, int check_value, ThrowKind kind) { 45 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 46 current_dalvik_offset_, base, check_value, 0); 47 NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base, offset, check_value); 48 LIR* branch = OpCondBranch(c_code, tgt); 49 // Remember branch target - will process later 50 throw_launchpads_.Insert(tgt); 51 return branch; 52} 53 54/* 55 * Compare two 64-bit values 56 * x = y return 0 57 * x < y return -1 58 * x > y return 1 59 */ 60void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 61 RegLocation rl_src2) { 62 FlushAllRegs(); 63 LockCallTemps(); // Prepare for explicit register usage 64 LoadValueDirectWideFixed(rl_src1, r0, r1); 65 LoadValueDirectWideFixed(rl_src2, r2, r3); 66 // Compute (r1:r0) = (r1:r0) - (r3:r2) 67 OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 68 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF 69 NewLIR2(kX86Set8R, r2, kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 70 NewLIR2(kX86Movzx8RR, r2, r2); 71 OpReg(kOpNeg, r2); // r2 = -r2 72 OpRegReg(kOpOr, r0, r1); // r0 = high | low - sets ZF 73 NewLIR2(kX86Set8R, r0, kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 74 NewLIR2(kX86Movzx8RR, r0, r0); 75 OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2 76 RegLocation rl_result = LocCReturn(); 77 StoreValue(rl_dest, rl_result); 78} 79 80X86ConditionCode X86ConditionEncoding(ConditionCode cond) { 81 switch (cond) { 82 case kCondEq: return kX86CondEq; 83 case kCondNe: return kX86CondNe; 84 case kCondCs: return kX86CondC; 85 case kCondCc: return kX86CondNc; 86 case kCondUlt: return kX86CondC; 87 case kCondUge: return kX86CondNc; 88 case kCondMi: return kX86CondS; 89 case kCondPl: return kX86CondNs; 90 case kCondVs: return kX86CondO; 91 case kCondVc: return kX86CondNo; 92 case kCondHi: return kX86CondA; 93 case kCondLs: return kX86CondBe; 94 case kCondGe: return kX86CondGe; 95 case kCondLt: return kX86CondL; 96 case kCondGt: return kX86CondG; 97 case kCondLe: return kX86CondLe; 98 case kCondAl: 99 case kCondNv: LOG(FATAL) << "Should not reach here"; 100 } 101 return kX86CondO; 102} 103 104LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, 105 LIR* target) { 106 NewLIR2(kX86Cmp32RR, src1, src2); 107 X86ConditionCode cc = X86ConditionEncoding(cond); 108 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , 109 cc); 110 branch->target = target; 111 return branch; 112} 113 114LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, 115 int check_value, LIR* target) { 116 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { 117 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode 118 NewLIR2(kX86Test32RR, reg, reg); 119 } else { 120 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg, check_value); 121 } 122 X86ConditionCode cc = X86ConditionEncoding(cond); 123 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); 124 branch->target = target; 125 return branch; 126} 127 128LIR* X86Mir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) { 129 if (X86_FPREG(r_dest) || X86_FPREG(r_src)) 130 return OpFpRegCopy(r_dest, r_src); 131 LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, 132 r_dest, r_src); 133 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 134 res->flags.is_nop = true; 135 } 136 return res; 137} 138 139LIR* X86Mir2Lir::OpRegCopy(int r_dest, int r_src) { 140 LIR *res = OpRegCopyNoInsert(r_dest, r_src); 141 AppendLIR(res); 142 return res; 143} 144 145void X86Mir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, 146 int src_lo, int src_hi) { 147 bool dest_fp = X86_FPREG(dest_lo) && X86_FPREG(dest_hi); 148 bool src_fp = X86_FPREG(src_lo) && X86_FPREG(src_hi); 149 assert(X86_FPREG(src_lo) == X86_FPREG(src_hi)); 150 assert(X86_FPREG(dest_lo) == X86_FPREG(dest_hi)); 151 if (dest_fp) { 152 if (src_fp) { 153 OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); 154 } else { 155 // TODO: Prevent this from happening in the code. The result is often 156 // unused or could have been loaded more easily from memory. 157 NewLIR2(kX86MovdxrRR, dest_lo, src_lo); 158 dest_hi = AllocTempDouble(); 159 NewLIR2(kX86MovdxrRR, dest_hi, src_hi); 160 NewLIR2(kX86PunpckldqRR, dest_lo, dest_hi); 161 FreeTemp(dest_hi); 162 } 163 } else { 164 if (src_fp) { 165 NewLIR2(kX86MovdrxRR, dest_lo, src_lo); 166 NewLIR2(kX86PsrlqRI, src_lo, 32); 167 NewLIR2(kX86MovdrxRR, dest_hi, src_lo); 168 } else { 169 // Handle overlap 170 if (src_hi == dest_lo) { 171 OpRegCopy(dest_hi, src_hi); 172 OpRegCopy(dest_lo, src_lo); 173 } else { 174 OpRegCopy(dest_lo, src_lo); 175 OpRegCopy(dest_hi, src_hi); 176 } 177 } 178 } 179} 180 181void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 182 RegLocation rl_result; 183 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 184 RegLocation rl_dest = mir_graph_->GetDest(mir); 185 rl_src = LoadValue(rl_src, kCoreReg); 186 187 // The kMirOpSelect has two variants, one for constants and one for moves. 188 const bool is_constant_case = (mir->ssa_rep->num_uses == 1); 189 190 if (is_constant_case) { 191 int true_val = mir->dalvikInsn.vB; 192 int false_val = mir->dalvikInsn.vC; 193 rl_result = EvalLoc(rl_dest, kCoreReg, true); 194 195 /* 196 * 1) When the true case is zero and result_reg is not same as src_reg: 197 * xor result_reg, result_reg 198 * cmp $0, src_reg 199 * mov t1, $false_case 200 * cmovnz result_reg, t1 201 * 2) When the false case is zero and result_reg is not same as src_reg: 202 * xor result_reg, result_reg 203 * cmp $0, src_reg 204 * mov t1, $true_case 205 * cmovz result_reg, t1 206 * 3) All other cases (we do compare first to set eflags): 207 * cmp $0, src_reg 208 * mov result_reg, $true_case 209 * mov t1, $false_case 210 * cmovnz result_reg, t1 211 */ 212 const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg); 213 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); 214 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); 215 const bool catch_all_case = !(true_zero_case || false_zero_case); 216 217 if (true_zero_case || false_zero_case) { 218 OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); 219 } 220 221 if (true_zero_case || false_zero_case || catch_all_case) { 222 OpRegImm(kOpCmp, rl_src.low_reg, 0); 223 } 224 225 if (catch_all_case) { 226 OpRegImm(kOpMov, rl_result.low_reg, true_val); 227 } 228 229 if (true_zero_case || false_zero_case || catch_all_case) { 230 int immediateForTemp = false_zero_case ? true_val : false_val; 231 int temp1_reg = AllocTemp(); 232 OpRegImm(kOpMov, temp1_reg, immediateForTemp); 233 234 ConditionCode cc = false_zero_case ? kCondEq : kCondNe; 235 OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg); 236 237 FreeTemp(temp1_reg); 238 } 239 } else { 240 RegLocation rl_true = mir_graph_->GetSrc(mir, 1); 241 RegLocation rl_false = mir_graph_->GetSrc(mir, 2); 242 rl_true = LoadValue(rl_true, kCoreReg); 243 rl_false = LoadValue(rl_false, kCoreReg); 244 rl_result = EvalLoc(rl_dest, kCoreReg, true); 245 246 /* 247 * 1) When true case is already in place: 248 * cmp $0, src_reg 249 * cmovnz result_reg, false_reg 250 * 2) When false case is already in place: 251 * cmp $0, src_reg 252 * cmovz result_reg, true_reg 253 * 3) When neither cases are in place: 254 * cmp $0, src_reg 255 * mov result_reg, true_reg 256 * cmovnz result_reg, false_reg 257 */ 258 259 // kMirOpSelect is generated just for conditional cases when comparison is done with zero. 260 OpRegImm(kOpCmp, rl_src.low_reg, 0); 261 262 if (rl_result.low_reg == rl_true.low_reg) { 263 OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg); 264 } else if (rl_result.low_reg == rl_false.low_reg) { 265 OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg); 266 } else { 267 OpRegCopy(rl_result.low_reg, rl_true.low_reg); 268 OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg); 269 } 270 } 271 272 StoreValue(rl_dest, rl_result); 273} 274 275void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 276 LIR* taken = &block_label_list_[bb->taken]; 277 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 278 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 279 ConditionCode ccode = mir->meta.ccode; 280 281 if (rl_src1.is_const) { 282 std::swap(rl_src1, rl_src2); 283 ccode = FlipComparisonOrder(ccode); 284 } 285 if (rl_src2.is_const) { 286 // Do special compare/branch against simple const operand 287 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 288 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 289 return; 290 } 291 292 FlushAllRegs(); 293 LockCallTemps(); // Prepare for explicit register usage 294 LoadValueDirectWideFixed(rl_src1, r0, r1); 295 LoadValueDirectWideFixed(rl_src2, r2, r3); 296 // Swap operands and condition code to prevent use of zero flag. 297 if (ccode == kCondLe || ccode == kCondGt) { 298 // Compute (r3:r2) = (r3:r2) - (r1:r0) 299 OpRegReg(kOpSub, r2, r0); // r2 = r2 - r0 300 OpRegReg(kOpSbc, r3, r1); // r3 = r3 - r1 - CF 301 } else { 302 // Compute (r1:r0) = (r1:r0) - (r3:r2) 303 OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 304 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF 305 } 306 switch (ccode) { 307 case kCondEq: 308 case kCondNe: 309 OpRegReg(kOpOr, r0, r1); // r0 = r0 | r1 310 break; 311 case kCondLe: 312 ccode = kCondGe; 313 break; 314 case kCondGt: 315 ccode = kCondLt; 316 break; 317 case kCondLt: 318 case kCondGe: 319 break; 320 default: 321 LOG(FATAL) << "Unexpected ccode: " << ccode; 322 } 323 OpCondBranch(ccode, taken); 324} 325 326void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 327 int64_t val, ConditionCode ccode) { 328 int32_t val_lo = Low32Bits(val); 329 int32_t val_hi = High32Bits(val); 330 LIR* taken = &block_label_list_[bb->taken]; 331 LIR* not_taken = &block_label_list_[bb->fall_through]; 332 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 333 int32_t low_reg = rl_src1.low_reg; 334 int32_t high_reg = rl_src1.high_reg; 335 336 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 337 int t_reg = AllocTemp(); 338 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); 339 FreeTemp(t_reg); 340 OpCondBranch(ccode, taken); 341 return; 342 } 343 344 OpRegImm(kOpCmp, high_reg, val_hi); 345 switch (ccode) { 346 case kCondEq: 347 case kCondNe: 348 OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken); 349 break; 350 case kCondLt: 351 OpCondBranch(kCondLt, taken); 352 OpCondBranch(kCondGt, not_taken); 353 ccode = kCondUlt; 354 break; 355 case kCondLe: 356 OpCondBranch(kCondLt, taken); 357 OpCondBranch(kCondGt, not_taken); 358 ccode = kCondLs; 359 break; 360 case kCondGt: 361 OpCondBranch(kCondGt, taken); 362 OpCondBranch(kCondLt, not_taken); 363 ccode = kCondHi; 364 break; 365 case kCondGe: 366 OpCondBranch(kCondGt, taken); 367 OpCondBranch(kCondLt, not_taken); 368 ccode = kCondUge; 369 break; 370 default: 371 LOG(FATAL) << "Unexpected ccode: " << ccode; 372 } 373 OpCmpImmBranch(ccode, low_reg, val_lo, taken); 374} 375 376void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) { 377 // It does not make sense to calculate magic and shift for zero divisor. 378 DCHECK_NE(divisor, 0); 379 380 /* According to H.S.Warren's Hacker's Delight Chapter 10 and 381 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 382 * The magic number M and shift S can be calculated in the following way: 383 * Let nc be the most positive value of numerator(n) such that nc = kd - 1, 384 * where divisor(d) >=2. 385 * Let nc be the most negative value of numerator(n) such that nc = kd + 1, 386 * where divisor(d) <= -2. 387 * Thus nc can be calculated like: 388 * nc = 2^31 + 2^31 % d - 1, where d >= 2 389 * nc = -2^31 + (2^31 + 1) % d, where d >= 2. 390 * 391 * So the shift p is the smallest p satisfying 392 * 2^p > nc * (d - 2^p % d), where d >= 2 393 * 2^p > nc * (d + 2^p % d), where d <= -2. 394 * 395 * the magic number M is calcuated by 396 * M = (2^p + d - 2^p % d) / d, where d >= 2 397 * M = (2^p - d - 2^p % d) / d, where d <= -2. 398 * 399 * Notice that p is always bigger than or equal to 32, so we just return 32-p as 400 * the shift number S. 401 */ 402 403 int32_t p = 31; 404 const uint32_t two31 = 0x80000000U; 405 406 // Initialize the computations. 407 uint32_t abs_d = (divisor >= 0) ? divisor : -divisor; 408 uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31); 409 uint32_t abs_nc = tmp - 1 - tmp % abs_d; 410 uint32_t quotient1 = two31 / abs_nc; 411 uint32_t remainder1 = two31 % abs_nc; 412 uint32_t quotient2 = two31 / abs_d; 413 uint32_t remainder2 = two31 % abs_d; 414 415 /* 416 * To avoid handling both positive and negative divisor, Hacker's Delight 417 * introduces a method to handle these 2 cases together to avoid duplication. 418 */ 419 uint32_t delta; 420 do { 421 p++; 422 quotient1 = 2 * quotient1; 423 remainder1 = 2 * remainder1; 424 if (remainder1 >= abs_nc) { 425 quotient1++; 426 remainder1 = remainder1 - abs_nc; 427 } 428 quotient2 = 2 * quotient2; 429 remainder2 = 2 * remainder2; 430 if (remainder2 >= abs_d) { 431 quotient2++; 432 remainder2 = remainder2 - abs_d; 433 } 434 delta = abs_d - remainder2; 435 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); 436 437 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); 438 shift = p - 32; 439} 440 441RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo, 442 int lit, bool is_div) { 443 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; 444 return rl_dest; 445} 446 447RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, 448 int imm, bool is_div) { 449 // Use a multiply (and fixup) to perform an int div/rem by a constant. 450 451 // We have to use fixed registers, so flush all the temps. 452 FlushAllRegs(); 453 LockCallTemps(); // Prepare for explicit register usage. 454 455 // Assume that the result will be in EDX. 456 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 457 r2, INVALID_REG, INVALID_SREG, INVALID_SREG}; 458 459 // handle div/rem by 1 special case. 460 if (imm == 1) { 461 if (is_div) { 462 // x / 1 == x. 463 StoreValue(rl_result, rl_src); 464 } else { 465 // x % 1 == 0. 466 LoadConstantNoClobber(r0, 0); 467 // For this case, return the result in EAX. 468 rl_result.low_reg = r0; 469 } 470 } else if (imm == -1) { // handle 0x80000000 / -1 special case. 471 if (is_div) { 472 LIR *minint_branch = 0; 473 LoadValueDirectFixed(rl_src, r0); 474 OpRegImm(kOpCmp, r0, 0x80000000); 475 minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 476 477 // for x != MIN_INT, x / -1 == -x. 478 NewLIR1(kX86Neg32R, r0); 479 480 LIR* branch_around = NewLIR1(kX86Jmp8, 0); 481 // The target for cmp/jmp above. 482 minint_branch->target = NewLIR0(kPseudoTargetLabel); 483 // EAX already contains the right value (0x80000000), 484 branch_around->target = NewLIR0(kPseudoTargetLabel); 485 } else { 486 // x % -1 == 0. 487 LoadConstantNoClobber(r0, 0); 488 } 489 // For this case, return the result in EAX. 490 rl_result.low_reg = r0; 491 } else { 492 CHECK(imm <= -2 || imm >= 2); 493 // Use H.S.Warren's Hacker's Delight Chapter 10 and 494 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 495 int magic, shift; 496 CalculateMagicAndShift(imm, magic, shift); 497 498 /* 499 * For imm >= 2, 500 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 501 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 502 * For imm <= -2, 503 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 504 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 505 * We implement this algorithm in the following way: 506 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX 507 * 2. if imm > 0 and magic < 0, add numerator to EDX 508 * if imm < 0 and magic > 0, sub numerator from EDX 509 * 3. if S !=0, SAR S bits for EDX 510 * 4. add 1 to EDX if EDX < 0 511 * 5. Thus, EDX is the quotient 512 */ 513 514 // Numerator into EAX. 515 int numerator_reg = -1; 516 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { 517 // We will need the value later. 518 if (rl_src.location == kLocPhysReg) { 519 // We can use it directly. 520 DCHECK(rl_src.low_reg != r0 && rl_src.low_reg != r2); 521 numerator_reg = rl_src.low_reg; 522 } else { 523 LoadValueDirectFixed(rl_src, r1); 524 numerator_reg = r1; 525 } 526 OpRegCopy(r0, numerator_reg); 527 } else { 528 // Only need this once. Just put it into EAX. 529 LoadValueDirectFixed(rl_src, r0); 530 } 531 532 // EDX = magic. 533 LoadConstantNoClobber(r2, magic); 534 535 // EDX:EAX = magic & dividend. 536 NewLIR1(kX86Imul32DaR, r2); 537 538 if (imm > 0 && magic < 0) { 539 // Add numerator to EDX. 540 DCHECK_NE(numerator_reg, -1); 541 NewLIR2(kX86Add32RR, r2, numerator_reg); 542 } else if (imm < 0 && magic > 0) { 543 DCHECK_NE(numerator_reg, -1); 544 NewLIR2(kX86Sub32RR, r2, numerator_reg); 545 } 546 547 // Do we need the shift? 548 if (shift != 0) { 549 // Shift EDX by 'shift' bits. 550 NewLIR2(kX86Sar32RI, r2, shift); 551 } 552 553 // Add 1 to EDX if EDX < 0. 554 555 // Move EDX to EAX. 556 OpRegCopy(r0, r2); 557 558 // Move sign bit to bit 0, zeroing the rest. 559 NewLIR2(kX86Shr32RI, r2, 31); 560 561 // EDX = EDX + EAX. 562 NewLIR2(kX86Add32RR, r2, r0); 563 564 // Quotient is in EDX. 565 if (!is_div) { 566 // We need to compute the remainder. 567 // Remainder is divisor - (quotient * imm). 568 DCHECK_NE(numerator_reg, -1); 569 OpRegCopy(r0, numerator_reg); 570 571 // EAX = numerator * imm. 572 OpRegRegImm(kOpMul, r2, r2, imm); 573 574 // EDX -= EAX. 575 NewLIR2(kX86Sub32RR, r0, r2); 576 577 // For this case, return the result in EAX. 578 rl_result.low_reg = r0; 579 } 580 } 581 582 return rl_result; 583} 584 585RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo, 586 int reg_hi, bool is_div) { 587 LOG(FATAL) << "Unexpected use of GenDivRem for x86"; 588 return rl_dest; 589} 590 591RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 592 RegLocation rl_src2, bool is_div, bool check_zero) { 593 // We have to use fixed registers, so flush all the temps. 594 FlushAllRegs(); 595 LockCallTemps(); // Prepare for explicit register usage. 596 597 // Load LHS into EAX. 598 LoadValueDirectFixed(rl_src1, r0); 599 600 // Load RHS into EBX. 601 LoadValueDirectFixed(rl_src2, r1); 602 603 // Copy LHS sign bit into EDX. 604 NewLIR0(kx86Cdq32Da); 605 606 if (check_zero) { 607 // Handle division by zero case. 608 GenImmedCheck(kCondEq, r1, 0, kThrowDivZero); 609 } 610 611 // Have to catch 0x80000000/-1 case, or we will get an exception! 612 OpRegImm(kOpCmp, r1, -1); 613 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 614 615 // RHS is -1. 616 OpRegImm(kOpCmp, r0, 0x80000000); 617 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 618 619 // In 0x80000000/-1 case. 620 if (!is_div) { 621 // For DIV, EAX is already right. For REM, we need EDX 0. 622 LoadConstantNoClobber(r2, 0); 623 } 624 LIR* done = NewLIR1(kX86Jmp8, 0); 625 626 // Expected case. 627 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 628 minint_branch->target = minus_one_branch->target; 629 NewLIR1(kX86Idivmod32DaR, r1); 630 done->target = NewLIR0(kPseudoTargetLabel); 631 632 // Result is in EAX for div and EDX for rem. 633 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 634 r0, INVALID_REG, INVALID_SREG, INVALID_SREG}; 635 if (!is_div) { 636 rl_result.low_reg = r2; 637 } 638 return rl_result; 639} 640 641bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { 642 DCHECK_EQ(cu_->instruction_set, kX86); 643 644 // Get the two arguments to the invoke and place them in GP registers. 645 RegLocation rl_src1 = info->args[0]; 646 RegLocation rl_src2 = info->args[1]; 647 rl_src1 = LoadValue(rl_src1, kCoreReg); 648 rl_src2 = LoadValue(rl_src2, kCoreReg); 649 650 RegLocation rl_dest = InlineTarget(info); 651 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 652 653 /* 654 * If the result register is the same as the second element, then we need to be careful. 655 * The reason is that the first copy will inadvertently clobber the second element with 656 * the first one thus yielding the wrong result. Thus we do a swap in that case. 657 */ 658 if (rl_result.low_reg == rl_src2.low_reg) { 659 std::swap(rl_src1, rl_src2); 660 } 661 662 // Pick the first integer as min/max. 663 OpRegCopy(rl_result.low_reg, rl_src1.low_reg); 664 665 // If the integers are both in the same register, then there is nothing else to do 666 // because they are equal and we have already moved one into the result. 667 if (rl_src1.low_reg != rl_src2.low_reg) { 668 // It is possible we didn't pick correctly so do the actual comparison now. 669 OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg); 670 671 // Conditionally move the other integer into the destination register. 672 ConditionCode condition_code = is_min ? kCondGt : kCondLt; 673 OpCondRegReg(kOpCmov, condition_code, rl_result.low_reg, rl_src2.low_reg); 674 } 675 676 StoreValue(rl_dest, rl_result); 677 return true; 678} 679 680bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 681 RegLocation rl_src_address = info->args[0]; // long address 682 rl_src_address.wide = 0; // ignore high half in info->args[1] 683 RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info); 684 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 685 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 686 if (size == kLong) { 687 // Unaligned access is allowed on x86. 688 LoadBaseDispWide(rl_address.low_reg, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); 689 StoreValueWide(rl_dest, rl_result); 690 } else { 691 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 692 // Unaligned access is allowed on x86. 693 LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); 694 StoreValue(rl_dest, rl_result); 695 } 696 return true; 697} 698 699bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 700 RegLocation rl_src_address = info->args[0]; // long address 701 rl_src_address.wide = 0; // ignore high half in info->args[1] 702 RegLocation rl_src_value = info->args[2]; // [size] value 703 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 704 if (size == kLong) { 705 // Unaligned access is allowed on x86. 706 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); 707 StoreBaseDispWide(rl_address.low_reg, 0, rl_value.low_reg, rl_value.high_reg); 708 } else { 709 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 710 // Unaligned access is allowed on x86. 711 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); 712 StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); 713 } 714 return true; 715} 716 717void X86Mir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { 718 NewLIR5(kX86Lea32RA, rBase, reg1, reg2, scale, offset); 719} 720 721void X86Mir2Lir::OpTlsCmp(ThreadOffset offset, int val) { 722 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 723} 724 725bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 726 DCHECK_EQ(cu_->instruction_set, kX86); 727 // Unused - RegLocation rl_src_unsafe = info->args[0]; 728 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 729 RegLocation rl_src_offset = info->args[2]; // long low 730 rl_src_offset.wide = 0; // ignore high half in info->args[3] 731 RegLocation rl_src_expected = info->args[4]; // int, long or Object 732 // If is_long, high half is in info->args[5] 733 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 734 // If is_long, high half is in info->args[7] 735 736 if (is_long) { 737 FlushAllRegs(); 738 LockCallTemps(); 739 LoadValueDirectWideFixed(rl_src_expected, rAX, rDX); 740 LoadValueDirectWideFixed(rl_src_new_value, rBX, rCX); 741 NewLIR1(kX86Push32R, rDI); 742 MarkTemp(rDI); 743 LockTemp(rDI); 744 NewLIR1(kX86Push32R, rSI); 745 MarkTemp(rSI); 746 LockTemp(rSI); 747 const int push_offset = 4 /* push edi */ + 4 /* push esi */; 748 LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rDI); 749 LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rSI); 750 NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0); 751 FreeTemp(rSI); 752 UnmarkTemp(rSI); 753 NewLIR1(kX86Pop32R, rSI); 754 FreeTemp(rDI); 755 UnmarkTemp(rDI); 756 NewLIR1(kX86Pop32R, rDI); 757 FreeCallTemps(); 758 } else { 759 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. 760 FlushReg(r0); 761 LockTemp(r0); 762 763 // Release store semantics, get the barrier out of the way. TODO: revisit 764 GenMemBarrier(kStoreLoad); 765 766 RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); 767 RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); 768 769 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 770 // Mark card for object assuming new value is stored. 771 FreeTemp(r0); // Temporarily release EAX for MarkGCCard(). 772 MarkGCCard(rl_new_value.low_reg, rl_object.low_reg); 773 LockTemp(r0); 774 } 775 776 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); 777 LoadValueDirect(rl_src_expected, r0); 778 NewLIR5(kX86LockCmpxchgAR, rl_object.low_reg, rl_offset.low_reg, 0, 0, rl_new_value.low_reg); 779 780 FreeTemp(r0); 781 } 782 783 // Convert ZF to boolean 784 RegLocation rl_dest = InlineTarget(info); // boolean place for result 785 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 786 NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondZ); 787 NewLIR2(kX86Movzx8RR, rl_result.low_reg, rl_result.low_reg); 788 StoreValue(rl_dest, rl_result); 789 return true; 790} 791 792LIR* X86Mir2Lir::OpPcRelLoad(int reg, LIR* target) { 793 CHECK(base_of_code_ != nullptr); 794 795 // Address the start of the method 796 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 797 LoadValueDirectFixed(rl_method, reg); 798 store_method_addr_used_ = true; 799 800 // Load the proper value from the literal area. 801 // We don't know the proper offset for the value, so pick one that will force 802 // 4 byte offset. We will fix this up in the assembler later to have the right 803 // value. 804 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg, reg, 256, 0, 0, target); 805 res->target = target; 806 res->flags.fixup = kFixupLoad; 807 SetMemRefType(res, true, kLiteral); 808 store_method_addr_used_ = true; 809 return res; 810} 811 812LIR* X86Mir2Lir::OpVldm(int rBase, int count) { 813 LOG(FATAL) << "Unexpected use of OpVldm for x86"; 814 return NULL; 815} 816 817LIR* X86Mir2Lir::OpVstm(int rBase, int count) { 818 LOG(FATAL) << "Unexpected use of OpVstm for x86"; 819 return NULL; 820} 821 822void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 823 RegLocation rl_result, int lit, 824 int first_bit, int second_bit) { 825 int t_reg = AllocTemp(); 826 OpRegRegImm(kOpLsl, t_reg, rl_src.low_reg, second_bit - first_bit); 827 OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, t_reg); 828 FreeTemp(t_reg); 829 if (first_bit != 0) { 830 OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit); 831 } 832} 833 834void X86Mir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) { 835 // We are not supposed to clobber either of the provided registers, so allocate 836 // a temporary to use for the check. 837 int t_reg = AllocTemp(); 838 839 // Doing an OR is a quick way to check if both registers are zero. This will set the flags. 840 OpRegRegReg(kOpOr, t_reg, reg_lo, reg_hi); 841 842 // In case of zero, throw ArithmeticException. 843 GenCheck(kCondEq, kThrowDivZero); 844 845 // The temp is no longer needed so free it at this time. 846 FreeTemp(t_reg); 847} 848 849// Test suspend flag, return target of taken suspend branch 850LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { 851 OpTlsCmp(Thread::ThreadFlagsOffset(), 0); 852 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); 853} 854 855// Decrement register and branch on condition 856LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) { 857 OpRegImm(kOpSub, reg, 1); 858 return OpCondBranch(c_code, target); 859} 860 861bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 862 RegLocation rl_src, RegLocation rl_dest, int lit) { 863 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; 864 return false; 865} 866 867LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { 868 LOG(FATAL) << "Unexpected use of OpIT in x86"; 869 return NULL; 870} 871 872void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) { 873 switch (val) { 874 case 0: 875 NewLIR2(kX86Xor32RR, dest, dest); 876 break; 877 case 1: 878 OpRegCopy(dest, src); 879 break; 880 default: 881 OpRegRegImm(kOpMul, dest, src, val); 882 break; 883 } 884} 885 886void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) { 887 LIR *m; 888 switch (val) { 889 case 0: 890 NewLIR2(kX86Xor32RR, dest, dest); 891 break; 892 case 1: 893 LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg); 894 break; 895 default: 896 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP, 897 displacement, val); 898 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); 899 break; 900 } 901} 902 903void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 904 RegLocation rl_src2) { 905 if (rl_src1.is_const) { 906 std::swap(rl_src1, rl_src2); 907 } 908 // Are we multiplying by a constant? 909 if (rl_src2.is_const) { 910 // Do special compare/branch against simple const operand 911 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 912 if (val == 0) { 913 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 914 OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); 915 OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg); 916 StoreValueWide(rl_dest, rl_result); 917 return; 918 } else if (val == 1) { 919 rl_src1 = EvalLocWide(rl_src1, kCoreReg, true); 920 StoreValueWide(rl_dest, rl_src1); 921 return; 922 } else if (val == 2) { 923 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); 924 return; 925 } else if (IsPowerOfTwo(val)) { 926 int shift_amount = LowestSetBit(val); 927 if (!BadOverlap(rl_src1, rl_dest)) { 928 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 929 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, 930 rl_src1, shift_amount); 931 StoreValueWide(rl_dest, rl_result); 932 return; 933 } 934 } 935 936 // Okay, just bite the bullet and do it. 937 int32_t val_lo = Low32Bits(val); 938 int32_t val_hi = High32Bits(val); 939 FlushAllRegs(); 940 LockCallTemps(); // Prepare for explicit register usage. 941 rl_src1 = UpdateLocWide(rl_src1); 942 bool src1_in_reg = rl_src1.location == kLocPhysReg; 943 int displacement = SRegOffset(rl_src1.s_reg_low); 944 945 // ECX <- 1H * 2L 946 // EAX <- 1L * 2H 947 if (src1_in_reg) { 948 GenImulRegImm(r1, rl_src1.high_reg, val_lo); 949 GenImulRegImm(r0, rl_src1.low_reg, val_hi); 950 } else { 951 GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); 952 GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); 953 } 954 955 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 956 NewLIR2(kX86Add32RR, r1, r0); 957 958 // EAX <- 2L 959 LoadConstantNoClobber(r0, val_lo); 960 961 // EDX:EAX <- 2L * 1L (double precision) 962 if (src1_in_reg) { 963 NewLIR1(kX86Mul32DaR, rl_src1.low_reg); 964 } else { 965 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 966 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 967 true /* is_load */, true /* is_64bit */); 968 } 969 970 // EDX <- EDX + ECX (add high words) 971 NewLIR2(kX86Add32RR, r2, r1); 972 973 // Result is EDX:EAX 974 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, 975 INVALID_SREG, INVALID_SREG}; 976 StoreValueWide(rl_dest, rl_result); 977 return; 978 } 979 980 // Nope. Do it the hard way 981 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. 982 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == 983 mir_graph_->SRegToVReg(rl_src2.s_reg_low); 984 985 FlushAllRegs(); 986 LockCallTemps(); // Prepare for explicit register usage. 987 rl_src1 = UpdateLocWide(rl_src1); 988 rl_src2 = UpdateLocWide(rl_src2); 989 990 // At this point, the VRs are in their home locations. 991 bool src1_in_reg = rl_src1.location == kLocPhysReg; 992 bool src2_in_reg = rl_src2.location == kLocPhysReg; 993 994 // ECX <- 1H 995 if (src1_in_reg) { 996 NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg); 997 } else { 998 LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1, 999 kWord, GetSRegHi(rl_src1.s_reg_low)); 1000 } 1001 1002 if (is_square) { 1003 // Take advantage of the fact that the values are the same. 1004 // ECX <- ECX * 2L (1H * 2L) 1005 if (src2_in_reg) { 1006 NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg); 1007 } else { 1008 int displacement = SRegOffset(rl_src2.s_reg_low); 1009 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1010 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1011 true /* is_load */, true /* is_64bit */); 1012 } 1013 1014 // ECX <- 2*ECX (2H * 1L) + (1H * 2L) 1015 NewLIR2(kX86Add32RR, r1, r1); 1016 } else { 1017 // EAX <- 2H 1018 if (src2_in_reg) { 1019 NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg); 1020 } else { 1021 LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0, 1022 kWord, GetSRegHi(rl_src2.s_reg_low)); 1023 } 1024 1025 // EAX <- EAX * 1L (2H * 1L) 1026 if (src1_in_reg) { 1027 NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg); 1028 } else { 1029 int displacement = SRegOffset(rl_src1.s_reg_low); 1030 LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET); 1031 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1032 true /* is_load */, true /* is_64bit */); 1033 } 1034 1035 // ECX <- ECX * 2L (1H * 2L) 1036 if (src2_in_reg) { 1037 NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg); 1038 } else { 1039 int displacement = SRegOffset(rl_src2.s_reg_low); 1040 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1041 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1042 true /* is_load */, true /* is_64bit */); 1043 } 1044 1045 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1046 NewLIR2(kX86Add32RR, r1, r0); 1047 } 1048 1049 // EAX <- 2L 1050 if (src2_in_reg) { 1051 NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg); 1052 } else { 1053 LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0, 1054 kWord, rl_src2.s_reg_low); 1055 } 1056 1057 // EDX:EAX <- 2L * 1L (double precision) 1058 if (src1_in_reg) { 1059 NewLIR1(kX86Mul32DaR, rl_src1.low_reg); 1060 } else { 1061 int displacement = SRegOffset(rl_src1.s_reg_low); 1062 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 1063 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1064 true /* is_load */, true /* is_64bit */); 1065 } 1066 1067 // EDX <- EDX + ECX (add high words) 1068 NewLIR2(kX86Add32RR, r2, r1); 1069 1070 // Result is EDX:EAX 1071 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, 1072 INVALID_SREG, INVALID_SREG}; 1073 StoreValueWide(rl_dest, rl_result); 1074} 1075 1076void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, 1077 Instruction::Code op) { 1078 DCHECK_EQ(rl_dest.location, kLocPhysReg); 1079 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1080 if (rl_src.location == kLocPhysReg) { 1081 // Both operands are in registers. 1082 if (rl_dest.low_reg == rl_src.high_reg) { 1083 // The registers are the same, so we would clobber it before the use. 1084 int temp_reg = AllocTemp(); 1085 OpRegCopy(temp_reg, rl_dest.low_reg); 1086 rl_src.high_reg = temp_reg; 1087 } 1088 NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg); 1089 1090 x86op = GetOpcode(op, rl_dest, rl_src, true); 1091 NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg); 1092 FreeTemp(rl_src.low_reg); 1093 FreeTemp(rl_src.high_reg); 1094 return; 1095 } 1096 1097 // RHS is in memory. 1098 DCHECK((rl_src.location == kLocDalvikFrame) || 1099 (rl_src.location == kLocCompilerTemp)); 1100 int rBase = TargetReg(kSp); 1101 int displacement = SRegOffset(rl_src.s_reg_low); 1102 1103 LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET); 1104 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1105 true /* is_load */, true /* is64bit */); 1106 x86op = GetOpcode(op, rl_dest, rl_src, true); 1107 lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET); 1108 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1109 true /* is_load */, true /* is64bit */); 1110} 1111 1112void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1113 rl_dest = UpdateLocWide(rl_dest); 1114 if (rl_dest.location == kLocPhysReg) { 1115 // Ensure we are in a register pair 1116 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1117 1118 rl_src = UpdateLocWide(rl_src); 1119 GenLongRegOrMemOp(rl_result, rl_src, op); 1120 StoreFinalValueWide(rl_dest, rl_result); 1121 return; 1122 } 1123 1124 // It wasn't in registers, so it better be in memory. 1125 DCHECK((rl_dest.location == kLocDalvikFrame) || 1126 (rl_dest.location == kLocCompilerTemp)); 1127 rl_src = LoadValueWide(rl_src, kCoreReg); 1128 1129 // Operate directly into memory. 1130 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1131 int rBase = TargetReg(kSp); 1132 int displacement = SRegOffset(rl_dest.s_reg_low); 1133 1134 LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg); 1135 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1136 false /* is_load */, true /* is64bit */); 1137 x86op = GetOpcode(op, rl_dest, rl_src, true); 1138 lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg); 1139 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1140 false /* is_load */, true /* is64bit */); 1141 FreeTemp(rl_src.low_reg); 1142 FreeTemp(rl_src.high_reg); 1143} 1144 1145void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, 1146 RegLocation rl_src2, Instruction::Code op, 1147 bool is_commutative) { 1148 // Is this really a 2 operand operation? 1149 switch (op) { 1150 case Instruction::ADD_LONG_2ADDR: 1151 case Instruction::SUB_LONG_2ADDR: 1152 case Instruction::AND_LONG_2ADDR: 1153 case Instruction::OR_LONG_2ADDR: 1154 case Instruction::XOR_LONG_2ADDR: 1155 GenLongArith(rl_dest, rl_src2, op); 1156 return; 1157 default: 1158 break; 1159 } 1160 1161 if (rl_dest.location == kLocPhysReg) { 1162 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); 1163 1164 // We are about to clobber the LHS, so it needs to be a temp. 1165 rl_result = ForceTempWide(rl_result); 1166 1167 // Perform the operation using the RHS. 1168 rl_src2 = UpdateLocWide(rl_src2); 1169 GenLongRegOrMemOp(rl_result, rl_src2, op); 1170 1171 // And now record that the result is in the temp. 1172 StoreFinalValueWide(rl_dest, rl_result); 1173 return; 1174 } 1175 1176 // It wasn't in registers, so it better be in memory. 1177 DCHECK((rl_dest.location == kLocDalvikFrame) || 1178 (rl_dest.location == kLocCompilerTemp)); 1179 rl_src1 = UpdateLocWide(rl_src1); 1180 rl_src2 = UpdateLocWide(rl_src2); 1181 1182 // Get one of the source operands into temporary register. 1183 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1184 if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) { 1185 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1186 } else if (is_commutative) { 1187 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1188 // We need at least one of them to be a temporary. 1189 if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) { 1190 rl_src1 = ForceTempWide(rl_src1); 1191 } 1192 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1193 } else { 1194 // Need LHS to be the temp. 1195 rl_src1 = ForceTempWide(rl_src1); 1196 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1197 } 1198 1199 StoreFinalValueWide(rl_dest, rl_src1); 1200} 1201 1202void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, 1203 RegLocation rl_src1, RegLocation rl_src2) { 1204 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1205} 1206 1207void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, 1208 RegLocation rl_src1, RegLocation rl_src2) { 1209 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); 1210} 1211 1212void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, 1213 RegLocation rl_src1, RegLocation rl_src2) { 1214 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1215} 1216 1217void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, 1218 RegLocation rl_src1, RegLocation rl_src2) { 1219 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1220} 1221 1222void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, 1223 RegLocation rl_src1, RegLocation rl_src2) { 1224 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1225} 1226 1227void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 1228 rl_src = LoadValueWide(rl_src, kCoreReg); 1229 RegLocation rl_result = ForceTempWide(rl_src); 1230 if (rl_dest.low_reg == rl_src.high_reg) { 1231 // The registers are the same, so we would clobber it before the use. 1232 int temp_reg = AllocTemp(); 1233 OpRegCopy(temp_reg, rl_result.low_reg); 1234 rl_result.high_reg = temp_reg; 1235 } 1236 OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg); // rLow = -rLow 1237 OpRegImm(kOpAdc, rl_result.high_reg, 0); // rHigh = rHigh + CF 1238 OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg); // rHigh = -rHigh 1239 StoreValueWide(rl_dest, rl_result); 1240} 1241 1242void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset thread_offset) { 1243 X86OpCode opcode = kX86Bkpt; 1244 switch (op) { 1245 case kOpCmp: opcode = kX86Cmp32RT; break; 1246 case kOpMov: opcode = kX86Mov32RT; break; 1247 default: 1248 LOG(FATAL) << "Bad opcode: " << op; 1249 break; 1250 } 1251 NewLIR2(opcode, r_dest, thread_offset.Int32Value()); 1252} 1253 1254/* 1255 * Generate array load 1256 */ 1257void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 1258 RegLocation rl_index, RegLocation rl_dest, int scale) { 1259 RegisterClass reg_class = oat_reg_class_by_size(size); 1260 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1261 RegLocation rl_result; 1262 rl_array = LoadValue(rl_array, kCoreReg); 1263 1264 int data_offset; 1265 if (size == kLong || size == kDouble) { 1266 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1267 } else { 1268 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1269 } 1270 1271 bool constant_index = rl_index.is_const; 1272 int32_t constant_index_value = 0; 1273 if (!constant_index) { 1274 rl_index = LoadValue(rl_index, kCoreReg); 1275 } else { 1276 constant_index_value = mir_graph_->ConstantValue(rl_index); 1277 // If index is constant, just fold it into the data offset 1278 data_offset += constant_index_value << scale; 1279 // treat as non array below 1280 rl_index.low_reg = INVALID_REG; 1281 } 1282 1283 /* null object? */ 1284 GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); 1285 1286 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1287 if (constant_index) { 1288 GenMemImmedCheck(kCondLs, rl_array.low_reg, len_offset, 1289 constant_index_value, kThrowConstantArrayBounds); 1290 } else { 1291 GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, 1292 len_offset, kThrowArrayBounds); 1293 } 1294 } 1295 rl_result = EvalLoc(rl_dest, reg_class, true); 1296 if ((size == kLong) || (size == kDouble)) { 1297 LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_result.low_reg, 1298 rl_result.high_reg, size, INVALID_SREG); 1299 StoreValueWide(rl_dest, rl_result); 1300 } else { 1301 LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, 1302 data_offset, rl_result.low_reg, INVALID_REG, size, 1303 INVALID_SREG); 1304 StoreValue(rl_dest, rl_result); 1305 } 1306} 1307 1308/* 1309 * Generate array store 1310 * 1311 */ 1312void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1313 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1314 RegisterClass reg_class = oat_reg_class_by_size(size); 1315 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1316 int data_offset; 1317 1318 if (size == kLong || size == kDouble) { 1319 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1320 } else { 1321 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1322 } 1323 1324 rl_array = LoadValue(rl_array, kCoreReg); 1325 bool constant_index = rl_index.is_const; 1326 int32_t constant_index_value = 0; 1327 if (!constant_index) { 1328 rl_index = LoadValue(rl_index, kCoreReg); 1329 } else { 1330 // If index is constant, just fold it into the data offset 1331 constant_index_value = mir_graph_->ConstantValue(rl_index); 1332 data_offset += constant_index_value << scale; 1333 // treat as non array below 1334 rl_index.low_reg = INVALID_REG; 1335 } 1336 1337 /* null object? */ 1338 GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); 1339 1340 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1341 if (constant_index) { 1342 GenMemImmedCheck(kCondLs, rl_array.low_reg, len_offset, 1343 constant_index_value, kThrowConstantArrayBounds); 1344 } else { 1345 GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, 1346 len_offset, kThrowArrayBounds); 1347 } 1348 } 1349 if ((size == kLong) || (size == kDouble)) { 1350 rl_src = LoadValueWide(rl_src, reg_class); 1351 } else { 1352 rl_src = LoadValue(rl_src, reg_class); 1353 } 1354 // If the src reg can't be byte accessed, move it to a temp first. 1355 if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) { 1356 int temp = AllocTemp(); 1357 OpRegCopy(temp, rl_src.low_reg); 1358 StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp, 1359 INVALID_REG, size, INVALID_SREG); 1360 } else { 1361 StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg, 1362 rl_src.high_reg, size, INVALID_SREG); 1363 } 1364 if (card_mark) { 1365 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. 1366 if (!constant_index) { 1367 FreeTemp(rl_index.low_reg); 1368 } 1369 MarkGCCard(rl_src.low_reg, rl_array.low_reg); 1370 } 1371} 1372 1373RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1374 RegLocation rl_src, int shift_amount) { 1375 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1376 switch (opcode) { 1377 case Instruction::SHL_LONG: 1378 case Instruction::SHL_LONG_2ADDR: 1379 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. 1380 if (shift_amount == 32) { 1381 OpRegCopy(rl_result.high_reg, rl_src.low_reg); 1382 LoadConstant(rl_result.low_reg, 0); 1383 } else if (shift_amount > 31) { 1384 OpRegCopy(rl_result.high_reg, rl_src.low_reg); 1385 FreeTemp(rl_src.high_reg); 1386 NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32); 1387 LoadConstant(rl_result.low_reg, 0); 1388 } else { 1389 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1390 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1391 NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount); 1392 NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount); 1393 } 1394 break; 1395 case Instruction::SHR_LONG: 1396 case Instruction::SHR_LONG_2ADDR: 1397 if (shift_amount == 32) { 1398 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1399 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1400 NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); 1401 } else if (shift_amount > 31) { 1402 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1403 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1404 NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32); 1405 NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); 1406 } else { 1407 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1408 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1409 NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); 1410 NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount); 1411 } 1412 break; 1413 case Instruction::USHR_LONG: 1414 case Instruction::USHR_LONG_2ADDR: 1415 if (shift_amount == 32) { 1416 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1417 LoadConstant(rl_result.high_reg, 0); 1418 } else if (shift_amount > 31) { 1419 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1420 NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32); 1421 LoadConstant(rl_result.high_reg, 0); 1422 } else { 1423 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1424 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1425 NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); 1426 NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount); 1427 } 1428 break; 1429 default: 1430 LOG(FATAL) << "Unexpected case"; 1431 } 1432 return rl_result; 1433} 1434 1435void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1436 RegLocation rl_src, RegLocation rl_shift) { 1437 // Per spec, we only care about low 6 bits of shift amount. 1438 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1439 if (shift_amount == 0) { 1440 rl_src = LoadValueWide(rl_src, kCoreReg); 1441 StoreValueWide(rl_dest, rl_src); 1442 return; 1443 } else if (shift_amount == 1 && 1444 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { 1445 // Need to handle this here to avoid calling StoreValueWide twice. 1446 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); 1447 return; 1448 } 1449 if (BadOverlap(rl_src, rl_dest)) { 1450 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 1451 return; 1452 } 1453 rl_src = LoadValueWide(rl_src, kCoreReg); 1454 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount); 1455 StoreValueWide(rl_dest, rl_result); 1456} 1457 1458void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, 1459 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 1460 switch (opcode) { 1461 case Instruction::ADD_LONG: 1462 case Instruction::AND_LONG: 1463 case Instruction::OR_LONG: 1464 case Instruction::XOR_LONG: 1465 if (rl_src2.is_const) { 1466 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1467 } else { 1468 DCHECK(rl_src1.is_const); 1469 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1470 } 1471 break; 1472 case Instruction::SUB_LONG: 1473 case Instruction::SUB_LONG_2ADDR: 1474 if (rl_src2.is_const) { 1475 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1476 } else { 1477 GenSubLong(opcode, rl_dest, rl_src1, rl_src2); 1478 } 1479 break; 1480 case Instruction::ADD_LONG_2ADDR: 1481 case Instruction::OR_LONG_2ADDR: 1482 case Instruction::XOR_LONG_2ADDR: 1483 case Instruction::AND_LONG_2ADDR: 1484 if (rl_src2.is_const) { 1485 GenLongImm(rl_dest, rl_src2, opcode); 1486 } else { 1487 DCHECK(rl_src1.is_const); 1488 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1489 } 1490 break; 1491 default: 1492 // Default - bail to non-const handler. 1493 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1494 break; 1495 } 1496} 1497 1498bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { 1499 switch (op) { 1500 case Instruction::AND_LONG_2ADDR: 1501 case Instruction::AND_LONG: 1502 return value == -1; 1503 case Instruction::OR_LONG: 1504 case Instruction::OR_LONG_2ADDR: 1505 case Instruction::XOR_LONG: 1506 case Instruction::XOR_LONG_2ADDR: 1507 return value == 0; 1508 default: 1509 return false; 1510 } 1511} 1512 1513X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, 1514 bool is_high_op) { 1515 bool rhs_in_mem = rhs.location != kLocPhysReg; 1516 bool dest_in_mem = dest.location != kLocPhysReg; 1517 DCHECK(!rhs_in_mem || !dest_in_mem); 1518 switch (op) { 1519 case Instruction::ADD_LONG: 1520 case Instruction::ADD_LONG_2ADDR: 1521 if (dest_in_mem) { 1522 return is_high_op ? kX86Adc32MR : kX86Add32MR; 1523 } else if (rhs_in_mem) { 1524 return is_high_op ? kX86Adc32RM : kX86Add32RM; 1525 } 1526 return is_high_op ? kX86Adc32RR : kX86Add32RR; 1527 case Instruction::SUB_LONG: 1528 case Instruction::SUB_LONG_2ADDR: 1529 if (dest_in_mem) { 1530 return is_high_op ? kX86Sbb32MR : kX86Sub32MR; 1531 } else if (rhs_in_mem) { 1532 return is_high_op ? kX86Sbb32RM : kX86Sub32RM; 1533 } 1534 return is_high_op ? kX86Sbb32RR : kX86Sub32RR; 1535 case Instruction::AND_LONG_2ADDR: 1536 case Instruction::AND_LONG: 1537 if (dest_in_mem) { 1538 return kX86And32MR; 1539 } 1540 return rhs_in_mem ? kX86And32RM : kX86And32RR; 1541 case Instruction::OR_LONG: 1542 case Instruction::OR_LONG_2ADDR: 1543 if (dest_in_mem) { 1544 return kX86Or32MR; 1545 } 1546 return rhs_in_mem ? kX86Or32RM : kX86Or32RR; 1547 case Instruction::XOR_LONG: 1548 case Instruction::XOR_LONG_2ADDR: 1549 if (dest_in_mem) { 1550 return kX86Xor32MR; 1551 } 1552 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; 1553 default: 1554 LOG(FATAL) << "Unexpected opcode: " << op; 1555 return kX86Add32RR; 1556 } 1557} 1558 1559X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, 1560 int32_t value) { 1561 bool in_mem = loc.location != kLocPhysReg; 1562 bool byte_imm = IS_SIMM8(value); 1563 DCHECK(in_mem || !IsFpReg(loc.low_reg)); 1564 switch (op) { 1565 case Instruction::ADD_LONG: 1566 case Instruction::ADD_LONG_2ADDR: 1567 if (byte_imm) { 1568 if (in_mem) { 1569 return is_high_op ? kX86Adc32MI8 : kX86Add32MI8; 1570 } 1571 return is_high_op ? kX86Adc32RI8 : kX86Add32RI8; 1572 } 1573 if (in_mem) { 1574 return is_high_op ? kX86Adc32MI : kX86Add32MI; 1575 } 1576 return is_high_op ? kX86Adc32RI : kX86Add32RI; 1577 case Instruction::SUB_LONG: 1578 case Instruction::SUB_LONG_2ADDR: 1579 if (byte_imm) { 1580 if (in_mem) { 1581 return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; 1582 } 1583 return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; 1584 } 1585 if (in_mem) { 1586 return is_high_op ? kX86Sbb32MI : kX86Sub32MI; 1587 } 1588 return is_high_op ? kX86Sbb32RI : kX86Sub32RI; 1589 case Instruction::AND_LONG_2ADDR: 1590 case Instruction::AND_LONG: 1591 if (byte_imm) { 1592 return in_mem ? kX86And32MI8 : kX86And32RI8; 1593 } 1594 return in_mem ? kX86And32MI : kX86And32RI; 1595 case Instruction::OR_LONG: 1596 case Instruction::OR_LONG_2ADDR: 1597 if (byte_imm) { 1598 return in_mem ? kX86Or32MI8 : kX86Or32RI8; 1599 } 1600 return in_mem ? kX86Or32MI : kX86Or32RI; 1601 case Instruction::XOR_LONG: 1602 case Instruction::XOR_LONG_2ADDR: 1603 if (byte_imm) { 1604 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; 1605 } 1606 return in_mem ? kX86Xor32MI : kX86Xor32RI; 1607 default: 1608 LOG(FATAL) << "Unexpected opcode: " << op; 1609 return kX86Add32MI; 1610 } 1611} 1612 1613void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1614 DCHECK(rl_src.is_const); 1615 int64_t val = mir_graph_->ConstantValueWide(rl_src); 1616 int32_t val_lo = Low32Bits(val); 1617 int32_t val_hi = High32Bits(val); 1618 rl_dest = UpdateLocWide(rl_dest); 1619 1620 // Can we just do this into memory? 1621 if ((rl_dest.location == kLocDalvikFrame) || 1622 (rl_dest.location == kLocCompilerTemp)) { 1623 int rBase = TargetReg(kSp); 1624 int displacement = SRegOffset(rl_dest.s_reg_low); 1625 1626 if (!IsNoOp(op, val_lo)) { 1627 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1628 LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo); 1629 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1630 false /* is_load */, true /* is64bit */); 1631 } 1632 if (!IsNoOp(op, val_hi)) { 1633 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1634 LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi); 1635 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1636 false /* is_load */, true /* is64bit */); 1637 } 1638 return; 1639 } 1640 1641 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1642 DCHECK_EQ(rl_result.location, kLocPhysReg); 1643 DCHECK(!IsFpReg(rl_result.low_reg)); 1644 1645 if (!IsNoOp(op, val_lo)) { 1646 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1647 NewLIR2(x86op, rl_result.low_reg, val_lo); 1648 } 1649 if (!IsNoOp(op, val_hi)) { 1650 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1651 NewLIR2(x86op, rl_result.high_reg, val_hi); 1652 } 1653 StoreValueWide(rl_dest, rl_result); 1654} 1655 1656void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, 1657 RegLocation rl_src2, Instruction::Code op) { 1658 DCHECK(rl_src2.is_const); 1659 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1660 int32_t val_lo = Low32Bits(val); 1661 int32_t val_hi = High32Bits(val); 1662 rl_dest = UpdateLocWide(rl_dest); 1663 rl_src1 = UpdateLocWide(rl_src1); 1664 1665 // Can we do this directly into the destination registers? 1666 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && 1667 rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg && 1668 !IsFpReg(rl_dest.low_reg)) { 1669 if (!IsNoOp(op, val_lo)) { 1670 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1671 NewLIR2(x86op, rl_dest.low_reg, val_lo); 1672 } 1673 if (!IsNoOp(op, val_hi)) { 1674 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1675 NewLIR2(x86op, rl_dest.high_reg, val_hi); 1676 } 1677 return; 1678 } 1679 1680 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1681 DCHECK_EQ(rl_src1.location, kLocPhysReg); 1682 1683 // We need the values to be in a temporary 1684 RegLocation rl_result = ForceTempWide(rl_src1); 1685 if (!IsNoOp(op, val_lo)) { 1686 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1687 NewLIR2(x86op, rl_result.low_reg, val_lo); 1688 } 1689 if (!IsNoOp(op, val_hi)) { 1690 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1691 NewLIR2(x86op, rl_result.high_reg, val_hi); 1692 } 1693 1694 StoreFinalValueWide(rl_dest, rl_result); 1695} 1696 1697// For final classes there are no sub-classes to check and so we can answer the instance-of 1698// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. 1699void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, 1700 RegLocation rl_dest, RegLocation rl_src) { 1701 RegLocation object = LoadValue(rl_src, kCoreReg); 1702 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1703 int result_reg = rl_result.low_reg; 1704 1705 // SETcc only works with EAX..EDX. 1706 if (result_reg == object.low_reg || result_reg >= 4) { 1707 result_reg = AllocTypedTemp(false, kCoreReg); 1708 DCHECK_LT(result_reg, 4); 1709 } 1710 1711 // Assume that there is no match. 1712 LoadConstant(result_reg, 0); 1713 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL); 1714 1715 int check_class = AllocTypedTemp(false, kCoreReg); 1716 1717 // If Method* is already in a register, we can save a copy. 1718 RegLocation rl_method = mir_graph_->GetMethodLoc(); 1719 int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + 1720 (sizeof(mirror::Class*) * type_idx); 1721 1722 if (rl_method.location == kLocPhysReg) { 1723 if (use_declaring_class) { 1724 LoadWordDisp(rl_method.low_reg, 1725 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1726 check_class); 1727 } else { 1728 LoadWordDisp(rl_method.low_reg, 1729 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1730 check_class); 1731 LoadWordDisp(check_class, offset_of_type, check_class); 1732 } 1733 } else { 1734 LoadCurrMethodDirect(check_class); 1735 if (use_declaring_class) { 1736 LoadWordDisp(check_class, 1737 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1738 check_class); 1739 } else { 1740 LoadWordDisp(check_class, 1741 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1742 check_class); 1743 LoadWordDisp(check_class, offset_of_type, check_class); 1744 } 1745 } 1746 1747 // Compare the computed class to the class in the object. 1748 DCHECK_EQ(object.location, kLocPhysReg); 1749 OpRegMem(kOpCmp, check_class, object.low_reg, 1750 mirror::Object::ClassOffset().Int32Value()); 1751 1752 // Set the low byte of the result to 0 or 1 from the compare condition code. 1753 NewLIR2(kX86Set8R, result_reg, kX86CondEq); 1754 1755 LIR* target = NewLIR0(kPseudoTargetLabel); 1756 null_branchover->target = target; 1757 FreeTemp(check_class); 1758 if (IsTemp(result_reg)) { 1759 OpRegCopy(rl_result.low_reg, result_reg); 1760 FreeTemp(result_reg); 1761 } 1762 StoreValue(rl_dest, rl_result); 1763} 1764 1765void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, 1766 bool type_known_abstract, bool use_declaring_class, 1767 bool can_assume_type_is_in_dex_cache, 1768 uint32_t type_idx, RegLocation rl_dest, 1769 RegLocation rl_src) { 1770 FlushAllRegs(); 1771 // May generate a call - use explicit registers. 1772 LockCallTemps(); 1773 LoadCurrMethodDirect(TargetReg(kArg1)); // kArg1 gets current Method*. 1774 int class_reg = TargetReg(kArg2); // kArg2 will hold the Class*. 1775 // Reference must end up in kArg0. 1776 if (needs_access_check) { 1777 // Check we have access to type_idx and if not throw IllegalAccessError, 1778 // Caller function returns Class* in kArg0. 1779 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccess), 1780 type_idx, true); 1781 OpRegCopy(class_reg, TargetReg(kRet0)); 1782 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1783 } else if (use_declaring_class) { 1784 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1785 LoadWordDisp(TargetReg(kArg1), 1786 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg); 1787 } else { 1788 // Load dex cache entry into class_reg (kArg2). 1789 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1790 LoadWordDisp(TargetReg(kArg1), 1791 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg); 1792 int32_t offset_of_type = 1793 mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*) 1794 * type_idx); 1795 LoadWordDisp(class_reg, offset_of_type, class_reg); 1796 if (!can_assume_type_is_in_dex_cache) { 1797 // Need to test presence of type in dex cache at runtime. 1798 LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); 1799 // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0. 1800 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx, true); 1801 OpRegCopy(TargetReg(kArg2), TargetReg(kRet0)); // Align usage with fast path. 1802 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); /* Reload Ref. */ 1803 // Rejoin code paths 1804 LIR* hop_target = NewLIR0(kPseudoTargetLabel); 1805 hop_branch->target = hop_target; 1806 } 1807 } 1808 /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */ 1809 RegLocation rl_result = GetReturn(false); 1810 1811 // SETcc only works with EAX..EDX. 1812 DCHECK_LT(rl_result.low_reg, 4); 1813 1814 // Is the class NULL? 1815 LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); 1816 1817 /* Load object->klass_. */ 1818 DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); 1819 LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); 1820 /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */ 1821 LIR* branchover = nullptr; 1822 if (type_known_final) { 1823 // Ensure top 3 bytes of result are 0. 1824 LoadConstant(rl_result.low_reg, 0); 1825 OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2)); 1826 // Set the low byte of the result to 0 or 1 from the compare condition code. 1827 NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondEq); 1828 } else { 1829 if (!type_known_abstract) { 1830 LoadConstant(rl_result.low_reg, 1); // Assume result succeeds. 1831 branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL); 1832 } 1833 OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); 1834 OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial)); 1835 } 1836 // TODO: only clobber when type isn't final? 1837 ClobberCallerSave(); 1838 /* Branch targets here. */ 1839 LIR* target = NewLIR0(kPseudoTargetLabel); 1840 StoreValue(rl_dest, rl_result); 1841 branch1->target = target; 1842 if (branchover != nullptr) { 1843 branchover->target = target; 1844 } 1845} 1846 1847void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, 1848 RegLocation rl_lhs, RegLocation rl_rhs) { 1849 OpKind op = kOpBkpt; 1850 bool is_div_rem = false; 1851 bool unary = false; 1852 bool shift_op = false; 1853 bool is_two_addr = false; 1854 RegLocation rl_result; 1855 switch (opcode) { 1856 case Instruction::NEG_INT: 1857 op = kOpNeg; 1858 unary = true; 1859 break; 1860 case Instruction::NOT_INT: 1861 op = kOpMvn; 1862 unary = true; 1863 break; 1864 case Instruction::ADD_INT_2ADDR: 1865 is_two_addr = true; 1866 // Fallthrough 1867 case Instruction::ADD_INT: 1868 op = kOpAdd; 1869 break; 1870 case Instruction::SUB_INT_2ADDR: 1871 is_two_addr = true; 1872 // Fallthrough 1873 case Instruction::SUB_INT: 1874 op = kOpSub; 1875 break; 1876 case Instruction::MUL_INT_2ADDR: 1877 is_two_addr = true; 1878 // Fallthrough 1879 case Instruction::MUL_INT: 1880 op = kOpMul; 1881 break; 1882 case Instruction::DIV_INT_2ADDR: 1883 is_two_addr = true; 1884 // Fallthrough 1885 case Instruction::DIV_INT: 1886 op = kOpDiv; 1887 is_div_rem = true; 1888 break; 1889 /* NOTE: returns in kArg1 */ 1890 case Instruction::REM_INT_2ADDR: 1891 is_two_addr = true; 1892 // Fallthrough 1893 case Instruction::REM_INT: 1894 op = kOpRem; 1895 is_div_rem = true; 1896 break; 1897 case Instruction::AND_INT_2ADDR: 1898 is_two_addr = true; 1899 // Fallthrough 1900 case Instruction::AND_INT: 1901 op = kOpAnd; 1902 break; 1903 case Instruction::OR_INT_2ADDR: 1904 is_two_addr = true; 1905 // Fallthrough 1906 case Instruction::OR_INT: 1907 op = kOpOr; 1908 break; 1909 case Instruction::XOR_INT_2ADDR: 1910 is_two_addr = true; 1911 // Fallthrough 1912 case Instruction::XOR_INT: 1913 op = kOpXor; 1914 break; 1915 case Instruction::SHL_INT_2ADDR: 1916 is_two_addr = true; 1917 // Fallthrough 1918 case Instruction::SHL_INT: 1919 shift_op = true; 1920 op = kOpLsl; 1921 break; 1922 case Instruction::SHR_INT_2ADDR: 1923 is_two_addr = true; 1924 // Fallthrough 1925 case Instruction::SHR_INT: 1926 shift_op = true; 1927 op = kOpAsr; 1928 break; 1929 case Instruction::USHR_INT_2ADDR: 1930 is_two_addr = true; 1931 // Fallthrough 1932 case Instruction::USHR_INT: 1933 shift_op = true; 1934 op = kOpLsr; 1935 break; 1936 default: 1937 LOG(FATAL) << "Invalid word arith op: " << opcode; 1938 } 1939 1940 // Can we convert to a two address instruction? 1941 if (!is_two_addr && 1942 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 1943 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { 1944 is_two_addr = true; 1945 } 1946 1947 // Get the div/rem stuff out of the way. 1948 if (is_div_rem) { 1949 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true); 1950 StoreValue(rl_dest, rl_result); 1951 return; 1952 } 1953 1954 if (unary) { 1955 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1956 rl_result = UpdateLoc(rl_dest); 1957 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1958 OpRegReg(op, rl_result.low_reg, rl_lhs.low_reg); 1959 } else { 1960 if (shift_op) { 1961 // X86 doesn't require masking and must use ECX. 1962 int t_reg = TargetReg(kCount); // rCX 1963 LoadValueDirectFixed(rl_rhs, t_reg); 1964 if (is_two_addr) { 1965 // Can we do this directly into memory? 1966 rl_result = UpdateLoc(rl_dest); 1967 rl_rhs = LoadValue(rl_rhs, kCoreReg); 1968 if (rl_result.location != kLocPhysReg) { 1969 // Okay, we can do this into memory 1970 OpMemReg(op, rl_result, t_reg); 1971 FreeTemp(t_reg); 1972 return; 1973 } else if (!IsFpReg(rl_result.low_reg)) { 1974 // Can do this directly into the result register 1975 OpRegReg(op, rl_result.low_reg, t_reg); 1976 FreeTemp(t_reg); 1977 StoreFinalValue(rl_dest, rl_result); 1978 return; 1979 } 1980 } 1981 // Three address form, or we can't do directly. 1982 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1983 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1984 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, t_reg); 1985 FreeTemp(t_reg); 1986 } else { 1987 // Multiply is 3 operand only (sort of). 1988 if (is_two_addr && op != kOpMul) { 1989 // Can we do this directly into memory? 1990 rl_result = UpdateLoc(rl_dest); 1991 if (rl_result.location == kLocPhysReg) { 1992 // Can we do this from memory directly? 1993 rl_rhs = UpdateLoc(rl_rhs); 1994 if (rl_rhs.location != kLocPhysReg) { 1995 OpRegMem(op, rl_result.low_reg, rl_rhs); 1996 StoreFinalValue(rl_dest, rl_result); 1997 return; 1998 } else if (!IsFpReg(rl_rhs.low_reg)) { 1999 OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg); 2000 StoreFinalValue(rl_dest, rl_result); 2001 return; 2002 } 2003 } 2004 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2005 if (rl_result.location != kLocPhysReg) { 2006 // Okay, we can do this into memory. 2007 OpMemReg(op, rl_result, rl_rhs.low_reg); 2008 return; 2009 } else if (!IsFpReg(rl_result.low_reg)) { 2010 // Can do this directly into the result register. 2011 OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg); 2012 StoreFinalValue(rl_dest, rl_result); 2013 return; 2014 } else { 2015 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2016 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2017 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2018 } 2019 } else { 2020 // Try to use reg/memory instructions. 2021 rl_lhs = UpdateLoc(rl_lhs); 2022 rl_rhs = UpdateLoc(rl_rhs); 2023 // We can't optimize with FP registers. 2024 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { 2025 // Something is difficult, so fall back to the standard case. 2026 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2027 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2028 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2029 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2030 } else { 2031 // We can optimize by moving to result and using memory operands. 2032 if (rl_rhs.location != kLocPhysReg) { 2033 // Force LHS into result. 2034 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2035 LoadValueDirect(rl_lhs, rl_result.low_reg); 2036 OpRegMem(op, rl_result.low_reg, rl_rhs); 2037 } else if (rl_lhs.location != kLocPhysReg) { 2038 // RHS is in a register; LHS is in memory. 2039 if (op != kOpSub) { 2040 // Force RHS into result and operate on memory. 2041 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2042 OpRegCopy(rl_result.low_reg, rl_rhs.low_reg); 2043 OpRegMem(op, rl_result.low_reg, rl_lhs); 2044 } else { 2045 // Subtraction isn't commutative. 2046 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2047 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2048 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2049 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2050 } 2051 } else { 2052 // Both are in registers. 2053 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2054 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2055 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2056 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2057 } 2058 } 2059 } 2060 } 2061 } 2062 StoreValue(rl_dest, rl_result); 2063} 2064 2065bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { 2066 // If we have non-core registers, then we can't do good things. 2067 if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.low_reg)) { 2068 return false; 2069 } 2070 if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.low_reg)) { 2071 return false; 2072 } 2073 2074 // Everything will be fine :-). 2075 return true; 2076} 2077} // namespace art 2078