int_x86.cc revision 79aa423fce400db3f551a3874e69e7cc4fb4f68f
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the X86 ISA */ 18 19#include "codegen_x86.h" 20#include "dex/quick/mir_to_lir-inl.h" 21#include "mirror/array.h" 22#include "x86_lir.h" 23 24namespace art { 25 26/* 27 * Perform register memory operation. 28 */ 29LIR* X86Mir2Lir::GenRegMemCheck(ConditionCode c_code, 30 int reg1, int base, int offset, ThrowKind kind) { 31 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 32 current_dalvik_offset_, reg1, base, offset); 33 OpRegMem(kOpCmp, reg1, base, offset); 34 LIR* branch = OpCondBranch(c_code, tgt); 35 // Remember branch target - will process later 36 throw_launchpads_.Insert(tgt); 37 return branch; 38} 39 40/* 41 * Perform a compare of memory to immediate value 42 */ 43LIR* X86Mir2Lir::GenMemImmedCheck(ConditionCode c_code, 44 int base, int offset, int check_value, ThrowKind kind) { 45 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 46 current_dalvik_offset_, base, check_value, 0); 47 NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base, offset, check_value); 48 LIR* branch = OpCondBranch(c_code, tgt); 49 // Remember branch target - will process later 50 throw_launchpads_.Insert(tgt); 51 return branch; 52} 53 54/* 55 * Compare two 64-bit values 56 * x = y return 0 57 * x < y return -1 58 * x > y return 1 59 */ 60void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 61 RegLocation rl_src2) { 62 FlushAllRegs(); 63 LockCallTemps(); // Prepare for explicit register usage 64 LoadValueDirectWideFixed(rl_src1, r0, r1); 65 LoadValueDirectWideFixed(rl_src2, r2, r3); 66 // Compute (r1:r0) = (r1:r0) - (r3:r2) 67 OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 68 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF 69 NewLIR2(kX86Set8R, r2, kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 70 NewLIR2(kX86Movzx8RR, r2, r2); 71 OpReg(kOpNeg, r2); // r2 = -r2 72 OpRegReg(kOpOr, r0, r1); // r0 = high | low - sets ZF 73 NewLIR2(kX86Set8R, r0, kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 74 NewLIR2(kX86Movzx8RR, r0, r0); 75 OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2 76 RegLocation rl_result = LocCReturn(); 77 StoreValue(rl_dest, rl_result); 78} 79 80X86ConditionCode X86ConditionEncoding(ConditionCode cond) { 81 switch (cond) { 82 case kCondEq: return kX86CondEq; 83 case kCondNe: return kX86CondNe; 84 case kCondCs: return kX86CondC; 85 case kCondCc: return kX86CondNc; 86 case kCondUlt: return kX86CondC; 87 case kCondUge: return kX86CondNc; 88 case kCondMi: return kX86CondS; 89 case kCondPl: return kX86CondNs; 90 case kCondVs: return kX86CondO; 91 case kCondVc: return kX86CondNo; 92 case kCondHi: return kX86CondA; 93 case kCondLs: return kX86CondBe; 94 case kCondGe: return kX86CondGe; 95 case kCondLt: return kX86CondL; 96 case kCondGt: return kX86CondG; 97 case kCondLe: return kX86CondLe; 98 case kCondAl: 99 case kCondNv: LOG(FATAL) << "Should not reach here"; 100 } 101 return kX86CondO; 102} 103 104LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, 105 LIR* target) { 106 NewLIR2(kX86Cmp32RR, src1, src2); 107 X86ConditionCode cc = X86ConditionEncoding(cond); 108 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , 109 cc); 110 branch->target = target; 111 return branch; 112} 113 114LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, 115 int check_value, LIR* target) { 116 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { 117 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode 118 NewLIR2(kX86Test32RR, reg, reg); 119 } else { 120 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg, check_value); 121 } 122 X86ConditionCode cc = X86ConditionEncoding(cond); 123 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); 124 branch->target = target; 125 return branch; 126} 127 128LIR* X86Mir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) { 129 if (X86_FPREG(r_dest) || X86_FPREG(r_src)) 130 return OpFpRegCopy(r_dest, r_src); 131 LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, 132 r_dest, r_src); 133 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 134 res->flags.is_nop = true; 135 } 136 return res; 137} 138 139LIR* X86Mir2Lir::OpRegCopy(int r_dest, int r_src) { 140 LIR *res = OpRegCopyNoInsert(r_dest, r_src); 141 AppendLIR(res); 142 return res; 143} 144 145void X86Mir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, 146 int src_lo, int src_hi) { 147 bool dest_fp = X86_FPREG(dest_lo) && X86_FPREG(dest_hi); 148 bool src_fp = X86_FPREG(src_lo) && X86_FPREG(src_hi); 149 assert(X86_FPREG(src_lo) == X86_FPREG(src_hi)); 150 assert(X86_FPREG(dest_lo) == X86_FPREG(dest_hi)); 151 if (dest_fp) { 152 if (src_fp) { 153 OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); 154 } else { 155 // TODO: Prevent this from happening in the code. The result is often 156 // unused or could have been loaded more easily from memory. 157 NewLIR2(kX86MovdxrRR, dest_lo, src_lo); 158 dest_hi = AllocTempDouble(); 159 NewLIR2(kX86MovdxrRR, dest_hi, src_hi); 160 NewLIR2(kX86PunpckldqRR, dest_lo, dest_hi); 161 FreeTemp(dest_hi); 162 } 163 } else { 164 if (src_fp) { 165 NewLIR2(kX86MovdrxRR, dest_lo, src_lo); 166 NewLIR2(kX86PsrlqRI, src_lo, 32); 167 NewLIR2(kX86MovdrxRR, dest_hi, src_lo); 168 } else { 169 // Handle overlap 170 if (src_hi == dest_lo) { 171 OpRegCopy(dest_hi, src_hi); 172 OpRegCopy(dest_lo, src_lo); 173 } else { 174 OpRegCopy(dest_lo, src_lo); 175 OpRegCopy(dest_hi, src_hi); 176 } 177 } 178 } 179} 180 181void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 182 RegLocation rl_result; 183 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 184 RegLocation rl_dest = mir_graph_->GetDest(mir); 185 rl_src = LoadValue(rl_src, kCoreReg); 186 187 // The kMirOpSelect has two variants, one for constants and one for moves. 188 const bool is_constant_case = (mir->ssa_rep->num_uses == 1); 189 190 if (is_constant_case) { 191 int true_val = mir->dalvikInsn.vB; 192 int false_val = mir->dalvikInsn.vC; 193 rl_result = EvalLoc(rl_dest, kCoreReg, true); 194 195 /* 196 * 1) When the true case is zero and result_reg is not same as src_reg: 197 * xor result_reg, result_reg 198 * cmp $0, src_reg 199 * mov t1, $false_case 200 * cmovnz result_reg, t1 201 * 2) When the false case is zero and result_reg is not same as src_reg: 202 * xor result_reg, result_reg 203 * cmp $0, src_reg 204 * mov t1, $true_case 205 * cmovz result_reg, t1 206 * 3) All other cases (we do compare first to set eflags): 207 * cmp $0, src_reg 208 * mov result_reg, $true_case 209 * mov t1, $false_case 210 * cmovnz result_reg, t1 211 */ 212 const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg); 213 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); 214 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); 215 const bool catch_all_case = !(true_zero_case || false_zero_case); 216 217 if (true_zero_case || false_zero_case) { 218 OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); 219 } 220 221 if (true_zero_case || false_zero_case || catch_all_case) { 222 OpRegImm(kOpCmp, rl_src.low_reg, 0); 223 } 224 225 if (catch_all_case) { 226 OpRegImm(kOpMov, rl_result.low_reg, true_val); 227 } 228 229 if (true_zero_case || false_zero_case || catch_all_case) { 230 int immediateForTemp = false_zero_case ? true_val : false_val; 231 int temp1_reg = AllocTemp(); 232 OpRegImm(kOpMov, temp1_reg, immediateForTemp); 233 234 ConditionCode cc = false_zero_case ? kCondEq : kCondNe; 235 OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg); 236 237 FreeTemp(temp1_reg); 238 } 239 } else { 240 RegLocation rl_true = mir_graph_->GetSrc(mir, 1); 241 RegLocation rl_false = mir_graph_->GetSrc(mir, 2); 242 rl_true = LoadValue(rl_true, kCoreReg); 243 rl_false = LoadValue(rl_false, kCoreReg); 244 rl_result = EvalLoc(rl_dest, kCoreReg, true); 245 246 /* 247 * 1) When true case is already in place: 248 * cmp $0, src_reg 249 * cmovnz result_reg, false_reg 250 * 2) When false case is already in place: 251 * cmp $0, src_reg 252 * cmovz result_reg, true_reg 253 * 3) When neither cases are in place: 254 * cmp $0, src_reg 255 * mov result_reg, true_reg 256 * cmovnz result_reg, false_reg 257 */ 258 259 // kMirOpSelect is generated just for conditional cases when comparison is done with zero. 260 OpRegImm(kOpCmp, rl_src.low_reg, 0); 261 262 if (rl_result.low_reg == rl_true.low_reg) { 263 OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg); 264 } else if (rl_result.low_reg == rl_false.low_reg) { 265 OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg); 266 } else { 267 OpRegCopy(rl_result.low_reg, rl_true.low_reg); 268 OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg); 269 } 270 } 271 272 StoreValue(rl_dest, rl_result); 273} 274 275void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 276 LIR* taken = &block_label_list_[bb->taken]; 277 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 278 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 279 ConditionCode ccode = mir->meta.ccode; 280 281 if (rl_src1.is_const) { 282 std::swap(rl_src1, rl_src2); 283 ccode = FlipComparisonOrder(ccode); 284 } 285 if (rl_src2.is_const) { 286 // Do special compare/branch against simple const operand 287 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 288 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 289 return; 290 } 291 292 FlushAllRegs(); 293 LockCallTemps(); // Prepare for explicit register usage 294 LoadValueDirectWideFixed(rl_src1, r0, r1); 295 LoadValueDirectWideFixed(rl_src2, r2, r3); 296 // Swap operands and condition code to prevent use of zero flag. 297 if (ccode == kCondLe || ccode == kCondGt) { 298 // Compute (r3:r2) = (r3:r2) - (r1:r0) 299 OpRegReg(kOpSub, r2, r0); // r2 = r2 - r0 300 OpRegReg(kOpSbc, r3, r1); // r3 = r3 - r1 - CF 301 } else { 302 // Compute (r1:r0) = (r1:r0) - (r3:r2) 303 OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 304 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF 305 } 306 switch (ccode) { 307 case kCondEq: 308 case kCondNe: 309 OpRegReg(kOpOr, r0, r1); // r0 = r0 | r1 310 break; 311 case kCondLe: 312 ccode = kCondGe; 313 break; 314 case kCondGt: 315 ccode = kCondLt; 316 break; 317 case kCondLt: 318 case kCondGe: 319 break; 320 default: 321 LOG(FATAL) << "Unexpected ccode: " << ccode; 322 } 323 OpCondBranch(ccode, taken); 324} 325 326void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 327 int64_t val, ConditionCode ccode) { 328 int32_t val_lo = Low32Bits(val); 329 int32_t val_hi = High32Bits(val); 330 LIR* taken = &block_label_list_[bb->taken]; 331 LIR* not_taken = &block_label_list_[bb->fall_through]; 332 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 333 int32_t low_reg = rl_src1.low_reg; 334 int32_t high_reg = rl_src1.high_reg; 335 336 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 337 int t_reg = AllocTemp(); 338 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); 339 FreeTemp(t_reg); 340 OpCondBranch(ccode, taken); 341 return; 342 } 343 344 OpRegImm(kOpCmp, high_reg, val_hi); 345 switch (ccode) { 346 case kCondEq: 347 case kCondNe: 348 OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken); 349 break; 350 case kCondLt: 351 OpCondBranch(kCondLt, taken); 352 OpCondBranch(kCondGt, not_taken); 353 ccode = kCondUlt; 354 break; 355 case kCondLe: 356 OpCondBranch(kCondLt, taken); 357 OpCondBranch(kCondGt, not_taken); 358 ccode = kCondLs; 359 break; 360 case kCondGt: 361 OpCondBranch(kCondGt, taken); 362 OpCondBranch(kCondLt, not_taken); 363 ccode = kCondHi; 364 break; 365 case kCondGe: 366 OpCondBranch(kCondGt, taken); 367 OpCondBranch(kCondLt, not_taken); 368 ccode = kCondUge; 369 break; 370 default: 371 LOG(FATAL) << "Unexpected ccode: " << ccode; 372 } 373 OpCmpImmBranch(ccode, low_reg, val_lo, taken); 374} 375 376void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) { 377 // It does not make sense to calculate magic and shift for zero divisor. 378 DCHECK_NE(divisor, 0); 379 380 /* According to H.S.Warren's Hacker's Delight Chapter 10 and 381 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 382 * The magic number M and shift S can be calculated in the following way: 383 * Let nc be the most positive value of numerator(n) such that nc = kd - 1, 384 * where divisor(d) >=2. 385 * Let nc be the most negative value of numerator(n) such that nc = kd + 1, 386 * where divisor(d) <= -2. 387 * Thus nc can be calculated like: 388 * nc = 2^31 + 2^31 % d - 1, where d >= 2 389 * nc = -2^31 + (2^31 + 1) % d, where d >= 2. 390 * 391 * So the shift p is the smallest p satisfying 392 * 2^p > nc * (d - 2^p % d), where d >= 2 393 * 2^p > nc * (d + 2^p % d), where d <= -2. 394 * 395 * the magic number M is calcuated by 396 * M = (2^p + d - 2^p % d) / d, where d >= 2 397 * M = (2^p - d - 2^p % d) / d, where d <= -2. 398 * 399 * Notice that p is always bigger than or equal to 32, so we just return 32-p as 400 * the shift number S. 401 */ 402 403 int32_t p = 31; 404 const uint32_t two31 = 0x80000000U; 405 406 // Initialize the computations. 407 uint32_t abs_d = (divisor >= 0) ? divisor : -divisor; 408 uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31); 409 uint32_t abs_nc = tmp - 1 - tmp % abs_d; 410 uint32_t quotient1 = two31 / abs_nc; 411 uint32_t remainder1 = two31 % abs_nc; 412 uint32_t quotient2 = two31 / abs_d; 413 uint32_t remainder2 = two31 % abs_d; 414 415 /* 416 * To avoid handling both positive and negative divisor, Hacker's Delight 417 * introduces a method to handle these 2 cases together to avoid duplication. 418 */ 419 uint32_t delta; 420 do { 421 p++; 422 quotient1 = 2 * quotient1; 423 remainder1 = 2 * remainder1; 424 if (remainder1 >= abs_nc) { 425 quotient1++; 426 remainder1 = remainder1 - abs_nc; 427 } 428 quotient2 = 2 * quotient2; 429 remainder2 = 2 * remainder2; 430 if (remainder2 >= abs_d) { 431 quotient2++; 432 remainder2 = remainder2 - abs_d; 433 } 434 delta = abs_d - remainder2; 435 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); 436 437 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); 438 shift = p - 32; 439} 440 441RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo, 442 int lit, bool is_div) { 443 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; 444 return rl_dest; 445} 446 447RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, 448 int imm, bool is_div) { 449 // Use a multiply (and fixup) to perform an int div/rem by a constant. 450 451 // We have to use fixed registers, so flush all the temps. 452 FlushAllRegs(); 453 LockCallTemps(); // Prepare for explicit register usage. 454 455 // Assume that the result will be in EDX. 456 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 457 r2, INVALID_REG, INVALID_SREG, INVALID_SREG}; 458 459 // handle div/rem by 1 special case. 460 if (imm == 1) { 461 if (is_div) { 462 // x / 1 == x. 463 StoreValue(rl_result, rl_src); 464 } else { 465 // x % 1 == 0. 466 LoadConstantNoClobber(r0, 0); 467 // For this case, return the result in EAX. 468 rl_result.low_reg = r0; 469 } 470 } else if (imm == -1) { // handle 0x80000000 / -1 special case. 471 if (is_div) { 472 LIR *minint_branch = 0; 473 LoadValueDirectFixed(rl_src, r0); 474 OpRegImm(kOpCmp, r0, 0x80000000); 475 minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 476 477 // for x != MIN_INT, x / -1 == -x. 478 NewLIR1(kX86Neg32R, r0); 479 480 LIR* branch_around = NewLIR1(kX86Jmp8, 0); 481 // The target for cmp/jmp above. 482 minint_branch->target = NewLIR0(kPseudoTargetLabel); 483 // EAX already contains the right value (0x80000000), 484 branch_around->target = NewLIR0(kPseudoTargetLabel); 485 } else { 486 // x % -1 == 0. 487 LoadConstantNoClobber(r0, 0); 488 } 489 // For this case, return the result in EAX. 490 rl_result.low_reg = r0; 491 } else { 492 CHECK(imm <= -2 || imm >= 2); 493 // Use H.S.Warren's Hacker's Delight Chapter 10 and 494 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 495 int magic, shift; 496 CalculateMagicAndShift(imm, magic, shift); 497 498 /* 499 * For imm >= 2, 500 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 501 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 502 * For imm <= -2, 503 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 504 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 505 * We implement this algorithm in the following way: 506 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX 507 * 2. if imm > 0 and magic < 0, add numerator to EDX 508 * if imm < 0 and magic > 0, sub numerator from EDX 509 * 3. if S !=0, SAR S bits for EDX 510 * 4. add 1 to EDX if EDX < 0 511 * 5. Thus, EDX is the quotient 512 */ 513 514 // Numerator into EAX. 515 int numerator_reg = -1; 516 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { 517 // We will need the value later. 518 if (rl_src.location == kLocPhysReg) { 519 // We can use it directly. 520 DCHECK(rl_src.low_reg != r0 && rl_src.low_reg != r2); 521 numerator_reg = rl_src.low_reg; 522 } else { 523 LoadValueDirectFixed(rl_src, r1); 524 numerator_reg = r1; 525 } 526 OpRegCopy(r0, numerator_reg); 527 } else { 528 // Only need this once. Just put it into EAX. 529 LoadValueDirectFixed(rl_src, r0); 530 } 531 532 // EDX = magic. 533 LoadConstantNoClobber(r2, magic); 534 535 // EDX:EAX = magic & dividend. 536 NewLIR1(kX86Imul32DaR, r2); 537 538 if (imm > 0 && magic < 0) { 539 // Add numerator to EDX. 540 DCHECK_NE(numerator_reg, -1); 541 NewLIR2(kX86Add32RR, r2, numerator_reg); 542 } else if (imm < 0 && magic > 0) { 543 DCHECK_NE(numerator_reg, -1); 544 NewLIR2(kX86Sub32RR, r2, numerator_reg); 545 } 546 547 // Do we need the shift? 548 if (shift != 0) { 549 // Shift EDX by 'shift' bits. 550 NewLIR2(kX86Sar32RI, r2, shift); 551 } 552 553 // Add 1 to EDX if EDX < 0. 554 555 // Move EDX to EAX. 556 OpRegCopy(r0, r2); 557 558 // Move sign bit to bit 0, zeroing the rest. 559 NewLIR2(kX86Shr32RI, r2, 31); 560 561 // EDX = EDX + EAX. 562 NewLIR2(kX86Add32RR, r2, r0); 563 564 // Quotient is in EDX. 565 if (!is_div) { 566 // We need to compute the remainder. 567 // Remainder is divisor - (quotient * imm). 568 DCHECK_NE(numerator_reg, -1); 569 OpRegCopy(r0, numerator_reg); 570 571 // EAX = numerator * imm. 572 OpRegRegImm(kOpMul, r2, r2, imm); 573 574 // EDX -= EAX. 575 NewLIR2(kX86Sub32RR, r0, r2); 576 577 // For this case, return the result in EAX. 578 rl_result.low_reg = r0; 579 } 580 } 581 582 return rl_result; 583} 584 585RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo, 586 int reg_hi, bool is_div) { 587 LOG(FATAL) << "Unexpected use of GenDivRem for x86"; 588 return rl_dest; 589} 590 591RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 592 RegLocation rl_src2, bool is_div, bool check_zero) { 593 // We have to use fixed registers, so flush all the temps. 594 FlushAllRegs(); 595 LockCallTemps(); // Prepare for explicit register usage. 596 597 // Load LHS into EAX. 598 LoadValueDirectFixed(rl_src1, r0); 599 600 // Load RHS into EBX. 601 LoadValueDirectFixed(rl_src2, r1); 602 603 // Copy LHS sign bit into EDX. 604 NewLIR0(kx86Cdq32Da); 605 606 if (check_zero) { 607 // Handle division by zero case. 608 GenImmedCheck(kCondEq, r1, 0, kThrowDivZero); 609 } 610 611 // Have to catch 0x80000000/-1 case, or we will get an exception! 612 OpRegImm(kOpCmp, r1, -1); 613 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 614 615 // RHS is -1. 616 OpRegImm(kOpCmp, r0, 0x80000000); 617 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 618 619 // In 0x80000000/-1 case. 620 if (!is_div) { 621 // For DIV, EAX is already right. For REM, we need EDX 0. 622 LoadConstantNoClobber(r2, 0); 623 } 624 LIR* done = NewLIR1(kX86Jmp8, 0); 625 626 // Expected case. 627 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 628 minint_branch->target = minus_one_branch->target; 629 NewLIR1(kX86Idivmod32DaR, r1); 630 done->target = NewLIR0(kPseudoTargetLabel); 631 632 // Result is in EAX for div and EDX for rem. 633 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 634 r0, INVALID_REG, INVALID_SREG, INVALID_SREG}; 635 if (!is_div) { 636 rl_result.low_reg = r2; 637 } 638 return rl_result; 639} 640 641bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { 642 DCHECK_EQ(cu_->instruction_set, kX86); 643 644 // Get the two arguments to the invoke and place them in GP registers. 645 RegLocation rl_src1 = info->args[0]; 646 RegLocation rl_src2 = info->args[1]; 647 rl_src1 = LoadValue(rl_src1, kCoreReg); 648 rl_src2 = LoadValue(rl_src2, kCoreReg); 649 650 RegLocation rl_dest = InlineTarget(info); 651 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 652 653 /* 654 * If the result register is the same as the second element, then we need to be careful. 655 * The reason is that the first copy will inadvertently clobber the second element with 656 * the first one thus yielding the wrong result. Thus we do a swap in that case. 657 */ 658 if (rl_result.low_reg == rl_src2.low_reg) { 659 std::swap(rl_src1, rl_src2); 660 } 661 662 // Pick the first integer as min/max. 663 OpRegCopy(rl_result.low_reg, rl_src1.low_reg); 664 665 // If the integers are both in the same register, then there is nothing else to do 666 // because they are equal and we have already moved one into the result. 667 if (rl_src1.low_reg != rl_src2.low_reg) { 668 // It is possible we didn't pick correctly so do the actual comparison now. 669 OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg); 670 671 // Conditionally move the other integer into the destination register. 672 ConditionCode condition_code = is_min ? kCondGt : kCondLt; 673 OpCondRegReg(kOpCmov, condition_code, rl_result.low_reg, rl_src2.low_reg); 674 } 675 676 StoreValue(rl_dest, rl_result); 677 return true; 678} 679 680bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 681 RegLocation rl_src_address = info->args[0]; // long address 682 rl_src_address.wide = 0; // ignore high half in info->args[1] 683 RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info); 684 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 685 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 686 if (size == kLong) { 687 // Unaligned access is allowed on x86. 688 LoadBaseDispWide(rl_address.low_reg, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); 689 StoreValueWide(rl_dest, rl_result); 690 } else { 691 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 692 // Unaligned access is allowed on x86. 693 LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); 694 StoreValue(rl_dest, rl_result); 695 } 696 return true; 697} 698 699bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 700 RegLocation rl_src_address = info->args[0]; // long address 701 rl_src_address.wide = 0; // ignore high half in info->args[1] 702 RegLocation rl_src_value = info->args[2]; // [size] value 703 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 704 if (size == kLong) { 705 // Unaligned access is allowed on x86. 706 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); 707 StoreBaseDispWide(rl_address.low_reg, 0, rl_value.low_reg, rl_value.high_reg); 708 } else { 709 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 710 // Unaligned access is allowed on x86. 711 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); 712 StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); 713 } 714 return true; 715} 716 717void X86Mir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { 718 NewLIR5(kX86Lea32RA, rBase, reg1, reg2, scale, offset); 719} 720 721void X86Mir2Lir::OpTlsCmp(ThreadOffset offset, int val) { 722 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 723} 724 725bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 726 DCHECK_EQ(cu_->instruction_set, kX86); 727 // Unused - RegLocation rl_src_unsafe = info->args[0]; 728 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 729 RegLocation rl_src_offset = info->args[2]; // long low 730 rl_src_offset.wide = 0; // ignore high half in info->args[3] 731 RegLocation rl_src_expected = info->args[4]; // int, long or Object 732 // If is_long, high half is in info->args[5] 733 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 734 // If is_long, high half is in info->args[7] 735 736 if (is_long) { 737 FlushAllRegs(); 738 LockCallTemps(); 739 LoadValueDirectWideFixed(rl_src_expected, rAX, rDX); 740 LoadValueDirectWideFixed(rl_src_new_value, rBX, rCX); 741 NewLIR1(kX86Push32R, rDI); 742 MarkTemp(rDI); 743 LockTemp(rDI); 744 NewLIR1(kX86Push32R, rSI); 745 MarkTemp(rSI); 746 LockTemp(rSI); 747 const int push_offset = 4 /* push edi */ + 4 /* push esi */; 748 LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rDI); 749 LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rSI); 750 NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0); 751 FreeTemp(rSI); 752 UnmarkTemp(rSI); 753 NewLIR1(kX86Pop32R, rSI); 754 FreeTemp(rDI); 755 UnmarkTemp(rDI); 756 NewLIR1(kX86Pop32R, rDI); 757 FreeCallTemps(); 758 } else { 759 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. 760 FlushReg(r0); 761 LockTemp(r0); 762 763 // Release store semantics, get the barrier out of the way. TODO: revisit 764 GenMemBarrier(kStoreLoad); 765 766 RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); 767 RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); 768 769 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 770 // Mark card for object assuming new value is stored. 771 FreeTemp(r0); // Temporarily release EAX for MarkGCCard(). 772 MarkGCCard(rl_new_value.low_reg, rl_object.low_reg); 773 LockTemp(r0); 774 } 775 776 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); 777 LoadValueDirect(rl_src_expected, r0); 778 NewLIR5(kX86LockCmpxchgAR, rl_object.low_reg, rl_offset.low_reg, 0, 0, rl_new_value.low_reg); 779 780 FreeTemp(r0); 781 } 782 783 // Convert ZF to boolean 784 RegLocation rl_dest = InlineTarget(info); // boolean place for result 785 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 786 NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondZ); 787 NewLIR2(kX86Movzx8RR, rl_result.low_reg, rl_result.low_reg); 788 StoreValue(rl_dest, rl_result); 789 return true; 790} 791 792LIR* X86Mir2Lir::OpPcRelLoad(int reg, LIR* target) { 793 CHECK(base_of_code_ != nullptr); 794 795 // Address the start of the method 796 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 797 LoadValueDirectFixed(rl_method, reg); 798 store_method_addr_used_ = true; 799 800 // Load the proper value from the literal area. 801 // We don't know the proper offset for the value, so pick one that will force 802 // 4 byte offset. We will fix this up in the assembler later to have the right 803 // value. 804 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg, reg, 256, 0, 0, target); 805 res->target = target; 806 res->flags.fixup = kFixupLoad; 807 SetMemRefType(res, true, kLiteral); 808 store_method_addr_used_ = true; 809 return res; 810} 811 812LIR* X86Mir2Lir::OpVldm(int rBase, int count) { 813 LOG(FATAL) << "Unexpected use of OpVldm for x86"; 814 return NULL; 815} 816 817LIR* X86Mir2Lir::OpVstm(int rBase, int count) { 818 LOG(FATAL) << "Unexpected use of OpVstm for x86"; 819 return NULL; 820} 821 822void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 823 RegLocation rl_result, int lit, 824 int first_bit, int second_bit) { 825 int t_reg = AllocTemp(); 826 OpRegRegImm(kOpLsl, t_reg, rl_src.low_reg, second_bit - first_bit); 827 OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, t_reg); 828 FreeTemp(t_reg); 829 if (first_bit != 0) { 830 OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit); 831 } 832} 833 834void X86Mir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) { 835 // We are not supposed to clobber either of the provided registers, so allocate 836 // a temporary to use for the check. 837 int t_reg = AllocTemp(); 838 839 // Doing an OR is a quick way to check if both registers are zero. This will set the flags. 840 OpRegRegReg(kOpOr, t_reg, reg_lo, reg_hi); 841 842 // In case of zero, throw ArithmeticException. 843 GenCheck(kCondEq, kThrowDivZero); 844 845 // The temp is no longer needed so free it at this time. 846 FreeTemp(t_reg); 847} 848 849// Test suspend flag, return target of taken suspend branch 850LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { 851 OpTlsCmp(Thread::ThreadFlagsOffset(), 0); 852 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); 853} 854 855// Decrement register and branch on condition 856LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) { 857 OpRegImm(kOpSub, reg, 1); 858 return OpCondBranch(c_code, target); 859} 860 861bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 862 RegLocation rl_src, RegLocation rl_dest, int lit) { 863 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; 864 return false; 865} 866 867LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { 868 LOG(FATAL) << "Unexpected use of OpIT in x86"; 869 return NULL; 870} 871 872void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) { 873 switch (val) { 874 case 0: 875 NewLIR2(kX86Xor32RR, dest, dest); 876 break; 877 case 1: 878 OpRegCopy(dest, src); 879 break; 880 default: 881 OpRegRegImm(kOpMul, dest, src, val); 882 break; 883 } 884} 885 886void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) { 887 LIR *m; 888 switch (val) { 889 case 0: 890 NewLIR2(kX86Xor32RR, dest, dest); 891 break; 892 case 1: 893 LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg); 894 break; 895 default: 896 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP, 897 displacement, val); 898 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); 899 break; 900 } 901} 902 903void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 904 RegLocation rl_src2) { 905 if (rl_src1.is_const) { 906 std::swap(rl_src1, rl_src2); 907 } 908 // Are we multiplying by a constant? 909 if (rl_src2.is_const) { 910 // Do special compare/branch against simple const operand 911 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 912 if (val == 0) { 913 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 914 OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); 915 OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg); 916 StoreValueWide(rl_dest, rl_result); 917 return; 918 } else if (val == 1) { 919 rl_src1 = EvalLocWide(rl_src1, kCoreReg, true); 920 StoreValueWide(rl_dest, rl_src1); 921 return; 922 } else if (val == 2) { 923 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); 924 return; 925 } else if (IsPowerOfTwo(val)) { 926 int shift_amount = LowestSetBit(val); 927 if (!BadOverlap(rl_src1, rl_dest)) { 928 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 929 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, 930 rl_src1, shift_amount); 931 StoreValueWide(rl_dest, rl_result); 932 return; 933 } 934 } 935 936 // Okay, just bite the bullet and do it. 937 int32_t val_lo = Low32Bits(val); 938 int32_t val_hi = High32Bits(val); 939 FlushAllRegs(); 940 LockCallTemps(); // Prepare for explicit register usage. 941 rl_src1 = UpdateLocWide(rl_src1); 942 bool src1_in_reg = rl_src1.location == kLocPhysReg; 943 int displacement = SRegOffset(rl_src1.s_reg_low); 944 945 // ECX <- 1H * 2L 946 // EAX <- 1L * 2H 947 if (src1_in_reg) { 948 GenImulRegImm(r1, rl_src1.high_reg, val_lo); 949 GenImulRegImm(r0, rl_src1.low_reg, val_hi); 950 } else { 951 GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); 952 GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); 953 } 954 955 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 956 NewLIR2(kX86Add32RR, r1, r0); 957 958 // EAX <- 2L 959 LoadConstantNoClobber(r0, val_lo); 960 961 // EDX:EAX <- 2L * 1L (double precision) 962 if (src1_in_reg) { 963 NewLIR1(kX86Mul32DaR, rl_src1.low_reg); 964 } else { 965 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 966 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 967 true /* is_load */, true /* is_64bit */); 968 } 969 970 // EDX <- EDX + ECX (add high words) 971 NewLIR2(kX86Add32RR, r2, r1); 972 973 // Result is EDX:EAX 974 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, 975 INVALID_SREG, INVALID_SREG}; 976 StoreValueWide(rl_dest, rl_result); 977 return; 978 } 979 980 // Nope. Do it the hard way 981 FlushAllRegs(); 982 LockCallTemps(); // Prepare for explicit register usage. 983 rl_src1 = UpdateLocWide(rl_src1); 984 rl_src2 = UpdateLocWide(rl_src2); 985 986 // At this point, the VRs are in their home locations. 987 bool src1_in_reg = rl_src1.location == kLocPhysReg; 988 bool src2_in_reg = rl_src2.location == kLocPhysReg; 989 990 // ECX <- 1H 991 if (src1_in_reg) { 992 NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg); 993 } else { 994 LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1, 995 kWord, GetSRegHi(rl_src1.s_reg_low)); 996 } 997 998 // EAX <- 2H 999 if (src2_in_reg) { 1000 NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg); 1001 } else { 1002 LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0, 1003 kWord, GetSRegHi(rl_src2.s_reg_low)); 1004 } 1005 1006 // EAX <- EAX * 1L (2H * 1L) 1007 if (src1_in_reg) { 1008 NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg); 1009 } else { 1010 int displacement = SRegOffset(rl_src1.s_reg_low); 1011 LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET); 1012 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1013 true /* is_load */, true /* is_64bit */); 1014 } 1015 1016 // ECX <- ECX * 2L (1H * 2L) 1017 if (src2_in_reg) { 1018 NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg); 1019 } else { 1020 int displacement = SRegOffset(rl_src2.s_reg_low); 1021 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1022 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1023 true /* is_load */, true /* is_64bit */); 1024 } 1025 1026 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1027 NewLIR2(kX86Add32RR, r1, r0); 1028 1029 // EAX <- 2L 1030 if (src2_in_reg) { 1031 NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg); 1032 } else { 1033 LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0, 1034 kWord, rl_src2.s_reg_low); 1035 } 1036 1037 // EDX:EAX <- 2L * 1L (double precision) 1038 if (src1_in_reg) { 1039 NewLIR1(kX86Mul32DaR, rl_src1.low_reg); 1040 } else { 1041 int displacement = SRegOffset(rl_src1.s_reg_low); 1042 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 1043 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1044 true /* is_load */, true /* is_64bit */); 1045 } 1046 1047 // EDX <- EDX + ECX (add high words) 1048 NewLIR2(kX86Add32RR, r2, r1); 1049 1050 // Result is EDX:EAX 1051 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, 1052 INVALID_SREG, INVALID_SREG}; 1053 StoreValueWide(rl_dest, rl_result); 1054} 1055 1056void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, 1057 Instruction::Code op) { 1058 DCHECK_EQ(rl_dest.location, kLocPhysReg); 1059 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1060 if (rl_src.location == kLocPhysReg) { 1061 // Both operands are in registers. 1062 if (rl_dest.low_reg == rl_src.high_reg) { 1063 // The registers are the same, so we would clobber it before the use. 1064 int temp_reg = AllocTemp(); 1065 OpRegCopy(temp_reg, rl_dest.low_reg); 1066 rl_src.high_reg = temp_reg; 1067 } 1068 NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg); 1069 1070 x86op = GetOpcode(op, rl_dest, rl_src, true); 1071 NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg); 1072 FreeTemp(rl_src.low_reg); 1073 FreeTemp(rl_src.high_reg); 1074 return; 1075 } 1076 1077 // RHS is in memory. 1078 DCHECK((rl_src.location == kLocDalvikFrame) || 1079 (rl_src.location == kLocCompilerTemp)); 1080 int rBase = TargetReg(kSp); 1081 int displacement = SRegOffset(rl_src.s_reg_low); 1082 1083 LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET); 1084 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1085 true /* is_load */, true /* is64bit */); 1086 x86op = GetOpcode(op, rl_dest, rl_src, true); 1087 lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET); 1088 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1089 true /* is_load */, true /* is64bit */); 1090} 1091 1092void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1093 rl_dest = UpdateLocWide(rl_dest); 1094 if (rl_dest.location == kLocPhysReg) { 1095 // Ensure we are in a register pair 1096 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1097 1098 rl_src = UpdateLocWide(rl_src); 1099 GenLongRegOrMemOp(rl_result, rl_src, op); 1100 StoreFinalValueWide(rl_dest, rl_result); 1101 return; 1102 } 1103 1104 // It wasn't in registers, so it better be in memory. 1105 DCHECK((rl_dest.location == kLocDalvikFrame) || 1106 (rl_dest.location == kLocCompilerTemp)); 1107 rl_src = LoadValueWide(rl_src, kCoreReg); 1108 1109 // Operate directly into memory. 1110 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1111 int rBase = TargetReg(kSp); 1112 int displacement = SRegOffset(rl_dest.s_reg_low); 1113 1114 LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg); 1115 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1116 false /* is_load */, true /* is64bit */); 1117 x86op = GetOpcode(op, rl_dest, rl_src, true); 1118 lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg); 1119 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1120 false /* is_load */, true /* is64bit */); 1121 FreeTemp(rl_src.low_reg); 1122 FreeTemp(rl_src.high_reg); 1123} 1124 1125void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, 1126 RegLocation rl_src2, Instruction::Code op, 1127 bool is_commutative) { 1128 // Is this really a 2 operand operation? 1129 switch (op) { 1130 case Instruction::ADD_LONG_2ADDR: 1131 case Instruction::SUB_LONG_2ADDR: 1132 case Instruction::AND_LONG_2ADDR: 1133 case Instruction::OR_LONG_2ADDR: 1134 case Instruction::XOR_LONG_2ADDR: 1135 GenLongArith(rl_dest, rl_src2, op); 1136 return; 1137 default: 1138 break; 1139 } 1140 1141 if (rl_dest.location == kLocPhysReg) { 1142 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); 1143 1144 // We are about to clobber the LHS, so it needs to be a temp. 1145 rl_result = ForceTempWide(rl_result); 1146 1147 // Perform the operation using the RHS. 1148 rl_src2 = UpdateLocWide(rl_src2); 1149 GenLongRegOrMemOp(rl_result, rl_src2, op); 1150 1151 // And now record that the result is in the temp. 1152 StoreFinalValueWide(rl_dest, rl_result); 1153 return; 1154 } 1155 1156 // It wasn't in registers, so it better be in memory. 1157 DCHECK((rl_dest.location == kLocDalvikFrame) || 1158 (rl_dest.location == kLocCompilerTemp)); 1159 rl_src1 = UpdateLocWide(rl_src1); 1160 rl_src2 = UpdateLocWide(rl_src2); 1161 1162 // Get one of the source operands into temporary register. 1163 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1164 if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) { 1165 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1166 } else if (is_commutative) { 1167 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1168 // We need at least one of them to be a temporary. 1169 if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) { 1170 rl_src1 = ForceTempWide(rl_src1); 1171 } 1172 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1173 } else { 1174 // Need LHS to be the temp. 1175 rl_src1 = ForceTempWide(rl_src1); 1176 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1177 } 1178 1179 StoreFinalValueWide(rl_dest, rl_src1); 1180} 1181 1182void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, 1183 RegLocation rl_src1, RegLocation rl_src2) { 1184 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1185} 1186 1187void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, 1188 RegLocation rl_src1, RegLocation rl_src2) { 1189 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); 1190} 1191 1192void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, 1193 RegLocation rl_src1, RegLocation rl_src2) { 1194 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1195} 1196 1197void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, 1198 RegLocation rl_src1, RegLocation rl_src2) { 1199 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1200} 1201 1202void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, 1203 RegLocation rl_src1, RegLocation rl_src2) { 1204 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1205} 1206 1207void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 1208 rl_src = LoadValueWide(rl_src, kCoreReg); 1209 RegLocation rl_result = ForceTempWide(rl_src); 1210 if (rl_dest.low_reg == rl_src.high_reg) { 1211 // The registers are the same, so we would clobber it before the use. 1212 int temp_reg = AllocTemp(); 1213 OpRegCopy(temp_reg, rl_result.low_reg); 1214 rl_result.high_reg = temp_reg; 1215 } 1216 OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg); // rLow = -rLow 1217 OpRegImm(kOpAdc, rl_result.high_reg, 0); // rHigh = rHigh + CF 1218 OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg); // rHigh = -rHigh 1219 StoreValueWide(rl_dest, rl_result); 1220} 1221 1222void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset thread_offset) { 1223 X86OpCode opcode = kX86Bkpt; 1224 switch (op) { 1225 case kOpCmp: opcode = kX86Cmp32RT; break; 1226 case kOpMov: opcode = kX86Mov32RT; break; 1227 default: 1228 LOG(FATAL) << "Bad opcode: " << op; 1229 break; 1230 } 1231 NewLIR2(opcode, r_dest, thread_offset.Int32Value()); 1232} 1233 1234/* 1235 * Generate array load 1236 */ 1237void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 1238 RegLocation rl_index, RegLocation rl_dest, int scale) { 1239 RegisterClass reg_class = oat_reg_class_by_size(size); 1240 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1241 RegLocation rl_result; 1242 rl_array = LoadValue(rl_array, kCoreReg); 1243 1244 int data_offset; 1245 if (size == kLong || size == kDouble) { 1246 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1247 } else { 1248 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1249 } 1250 1251 bool constant_index = rl_index.is_const; 1252 int32_t constant_index_value = 0; 1253 if (!constant_index) { 1254 rl_index = LoadValue(rl_index, kCoreReg); 1255 } else { 1256 constant_index_value = mir_graph_->ConstantValue(rl_index); 1257 // If index is constant, just fold it into the data offset 1258 data_offset += constant_index_value << scale; 1259 // treat as non array below 1260 rl_index.low_reg = INVALID_REG; 1261 } 1262 1263 /* null object? */ 1264 GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); 1265 1266 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1267 if (constant_index) { 1268 GenMemImmedCheck(kCondLs, rl_array.low_reg, len_offset, 1269 constant_index_value, kThrowConstantArrayBounds); 1270 } else { 1271 GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, 1272 len_offset, kThrowArrayBounds); 1273 } 1274 } 1275 rl_result = EvalLoc(rl_dest, reg_class, true); 1276 if ((size == kLong) || (size == kDouble)) { 1277 LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_result.low_reg, 1278 rl_result.high_reg, size, INVALID_SREG); 1279 StoreValueWide(rl_dest, rl_result); 1280 } else { 1281 LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, 1282 data_offset, rl_result.low_reg, INVALID_REG, size, 1283 INVALID_SREG); 1284 StoreValue(rl_dest, rl_result); 1285 } 1286} 1287 1288/* 1289 * Generate array store 1290 * 1291 */ 1292void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1293 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1294 RegisterClass reg_class = oat_reg_class_by_size(size); 1295 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1296 int data_offset; 1297 1298 if (size == kLong || size == kDouble) { 1299 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1300 } else { 1301 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1302 } 1303 1304 rl_array = LoadValue(rl_array, kCoreReg); 1305 bool constant_index = rl_index.is_const; 1306 int32_t constant_index_value = 0; 1307 if (!constant_index) { 1308 rl_index = LoadValue(rl_index, kCoreReg); 1309 } else { 1310 // If index is constant, just fold it into the data offset 1311 constant_index_value = mir_graph_->ConstantValue(rl_index); 1312 data_offset += constant_index_value << scale; 1313 // treat as non array below 1314 rl_index.low_reg = INVALID_REG; 1315 } 1316 1317 /* null object? */ 1318 GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); 1319 1320 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1321 if (constant_index) { 1322 GenMemImmedCheck(kCondLs, rl_array.low_reg, len_offset, 1323 constant_index_value, kThrowConstantArrayBounds); 1324 } else { 1325 GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, 1326 len_offset, kThrowArrayBounds); 1327 } 1328 } 1329 if ((size == kLong) || (size == kDouble)) { 1330 rl_src = LoadValueWide(rl_src, reg_class); 1331 } else { 1332 rl_src = LoadValue(rl_src, reg_class); 1333 } 1334 // If the src reg can't be byte accessed, move it to a temp first. 1335 if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) { 1336 int temp = AllocTemp(); 1337 OpRegCopy(temp, rl_src.low_reg); 1338 StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp, 1339 INVALID_REG, size, INVALID_SREG); 1340 } else { 1341 StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg, 1342 rl_src.high_reg, size, INVALID_SREG); 1343 } 1344 if (card_mark) { 1345 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. 1346 if (!constant_index) { 1347 FreeTemp(rl_index.low_reg); 1348 } 1349 MarkGCCard(rl_src.low_reg, rl_array.low_reg); 1350 } 1351} 1352 1353RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1354 RegLocation rl_src, int shift_amount) { 1355 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1356 switch (opcode) { 1357 case Instruction::SHL_LONG: 1358 case Instruction::SHL_LONG_2ADDR: 1359 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. 1360 if (shift_amount == 32) { 1361 OpRegCopy(rl_result.high_reg, rl_src.low_reg); 1362 LoadConstant(rl_result.low_reg, 0); 1363 } else if (shift_amount > 31) { 1364 OpRegCopy(rl_result.high_reg, rl_src.low_reg); 1365 FreeTemp(rl_src.high_reg); 1366 NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32); 1367 LoadConstant(rl_result.low_reg, 0); 1368 } else { 1369 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1370 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1371 NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount); 1372 NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount); 1373 } 1374 break; 1375 case Instruction::SHR_LONG: 1376 case Instruction::SHR_LONG_2ADDR: 1377 if (shift_amount == 32) { 1378 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1379 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1380 NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); 1381 } else if (shift_amount > 31) { 1382 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1383 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1384 NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32); 1385 NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); 1386 } else { 1387 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1388 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1389 NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); 1390 NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount); 1391 } 1392 break; 1393 case Instruction::USHR_LONG: 1394 case Instruction::USHR_LONG_2ADDR: 1395 if (shift_amount == 32) { 1396 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1397 LoadConstant(rl_result.high_reg, 0); 1398 } else if (shift_amount > 31) { 1399 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1400 NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32); 1401 LoadConstant(rl_result.high_reg, 0); 1402 } else { 1403 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1404 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1405 NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); 1406 NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount); 1407 } 1408 break; 1409 default: 1410 LOG(FATAL) << "Unexpected case"; 1411 } 1412 return rl_result; 1413} 1414 1415void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1416 RegLocation rl_src, RegLocation rl_shift) { 1417 // Per spec, we only care about low 6 bits of shift amount. 1418 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1419 if (shift_amount == 0) { 1420 rl_src = LoadValueWide(rl_src, kCoreReg); 1421 StoreValueWide(rl_dest, rl_src); 1422 return; 1423 } else if (shift_amount == 1 && 1424 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { 1425 // Need to handle this here to avoid calling StoreValueWide twice. 1426 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); 1427 return; 1428 } 1429 if (BadOverlap(rl_src, rl_dest)) { 1430 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 1431 return; 1432 } 1433 rl_src = LoadValueWide(rl_src, kCoreReg); 1434 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount); 1435 StoreValueWide(rl_dest, rl_result); 1436} 1437 1438void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, 1439 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 1440 switch (opcode) { 1441 case Instruction::ADD_LONG: 1442 case Instruction::AND_LONG: 1443 case Instruction::OR_LONG: 1444 case Instruction::XOR_LONG: 1445 if (rl_src2.is_const) { 1446 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1447 } else { 1448 DCHECK(rl_src1.is_const); 1449 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1450 } 1451 break; 1452 case Instruction::SUB_LONG: 1453 case Instruction::SUB_LONG_2ADDR: 1454 if (rl_src2.is_const) { 1455 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1456 } else { 1457 GenSubLong(opcode, rl_dest, rl_src1, rl_src2); 1458 } 1459 break; 1460 case Instruction::ADD_LONG_2ADDR: 1461 case Instruction::OR_LONG_2ADDR: 1462 case Instruction::XOR_LONG_2ADDR: 1463 case Instruction::AND_LONG_2ADDR: 1464 if (rl_src2.is_const) { 1465 GenLongImm(rl_dest, rl_src2, opcode); 1466 } else { 1467 DCHECK(rl_src1.is_const); 1468 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1469 } 1470 break; 1471 default: 1472 // Default - bail to non-const handler. 1473 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1474 break; 1475 } 1476} 1477 1478bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { 1479 switch (op) { 1480 case Instruction::AND_LONG_2ADDR: 1481 case Instruction::AND_LONG: 1482 return value == -1; 1483 case Instruction::OR_LONG: 1484 case Instruction::OR_LONG_2ADDR: 1485 case Instruction::XOR_LONG: 1486 case Instruction::XOR_LONG_2ADDR: 1487 return value == 0; 1488 default: 1489 return false; 1490 } 1491} 1492 1493X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, 1494 bool is_high_op) { 1495 bool rhs_in_mem = rhs.location != kLocPhysReg; 1496 bool dest_in_mem = dest.location != kLocPhysReg; 1497 DCHECK(!rhs_in_mem || !dest_in_mem); 1498 switch (op) { 1499 case Instruction::ADD_LONG: 1500 case Instruction::ADD_LONG_2ADDR: 1501 if (dest_in_mem) { 1502 return is_high_op ? kX86Adc32MR : kX86Add32MR; 1503 } else if (rhs_in_mem) { 1504 return is_high_op ? kX86Adc32RM : kX86Add32RM; 1505 } 1506 return is_high_op ? kX86Adc32RR : kX86Add32RR; 1507 case Instruction::SUB_LONG: 1508 case Instruction::SUB_LONG_2ADDR: 1509 if (dest_in_mem) { 1510 return is_high_op ? kX86Sbb32MR : kX86Sub32MR; 1511 } else if (rhs_in_mem) { 1512 return is_high_op ? kX86Sbb32RM : kX86Sub32RM; 1513 } 1514 return is_high_op ? kX86Sbb32RR : kX86Sub32RR; 1515 case Instruction::AND_LONG_2ADDR: 1516 case Instruction::AND_LONG: 1517 if (dest_in_mem) { 1518 return kX86And32MR; 1519 } 1520 return rhs_in_mem ? kX86And32RM : kX86And32RR; 1521 case Instruction::OR_LONG: 1522 case Instruction::OR_LONG_2ADDR: 1523 if (dest_in_mem) { 1524 return kX86Or32MR; 1525 } 1526 return rhs_in_mem ? kX86Or32RM : kX86Or32RR; 1527 case Instruction::XOR_LONG: 1528 case Instruction::XOR_LONG_2ADDR: 1529 if (dest_in_mem) { 1530 return kX86Xor32MR; 1531 } 1532 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; 1533 default: 1534 LOG(FATAL) << "Unexpected opcode: " << op; 1535 return kX86Add32RR; 1536 } 1537} 1538 1539X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, 1540 int32_t value) { 1541 bool in_mem = loc.location != kLocPhysReg; 1542 bool byte_imm = IS_SIMM8(value); 1543 DCHECK(in_mem || !IsFpReg(loc.low_reg)); 1544 switch (op) { 1545 case Instruction::ADD_LONG: 1546 case Instruction::ADD_LONG_2ADDR: 1547 if (byte_imm) { 1548 if (in_mem) { 1549 return is_high_op ? kX86Adc32MI8 : kX86Add32MI8; 1550 } 1551 return is_high_op ? kX86Adc32RI8 : kX86Add32RI8; 1552 } 1553 if (in_mem) { 1554 return is_high_op ? kX86Adc32MI : kX86Add32MI; 1555 } 1556 return is_high_op ? kX86Adc32RI : kX86Add32RI; 1557 case Instruction::SUB_LONG: 1558 case Instruction::SUB_LONG_2ADDR: 1559 if (byte_imm) { 1560 if (in_mem) { 1561 return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; 1562 } 1563 return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; 1564 } 1565 if (in_mem) { 1566 return is_high_op ? kX86Sbb32MI : kX86Sub32MI; 1567 } 1568 return is_high_op ? kX86Sbb32RI : kX86Sub32RI; 1569 case Instruction::AND_LONG_2ADDR: 1570 case Instruction::AND_LONG: 1571 if (byte_imm) { 1572 return in_mem ? kX86And32MI8 : kX86And32RI8; 1573 } 1574 return in_mem ? kX86And32MI : kX86And32RI; 1575 case Instruction::OR_LONG: 1576 case Instruction::OR_LONG_2ADDR: 1577 if (byte_imm) { 1578 return in_mem ? kX86Or32MI8 : kX86Or32RI8; 1579 } 1580 return in_mem ? kX86Or32MI : kX86Or32RI; 1581 case Instruction::XOR_LONG: 1582 case Instruction::XOR_LONG_2ADDR: 1583 if (byte_imm) { 1584 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; 1585 } 1586 return in_mem ? kX86Xor32MI : kX86Xor32RI; 1587 default: 1588 LOG(FATAL) << "Unexpected opcode: " << op; 1589 return kX86Add32MI; 1590 } 1591} 1592 1593void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1594 DCHECK(rl_src.is_const); 1595 int64_t val = mir_graph_->ConstantValueWide(rl_src); 1596 int32_t val_lo = Low32Bits(val); 1597 int32_t val_hi = High32Bits(val); 1598 rl_dest = UpdateLocWide(rl_dest); 1599 1600 // Can we just do this into memory? 1601 if ((rl_dest.location == kLocDalvikFrame) || 1602 (rl_dest.location == kLocCompilerTemp)) { 1603 int rBase = TargetReg(kSp); 1604 int displacement = SRegOffset(rl_dest.s_reg_low); 1605 1606 if (!IsNoOp(op, val_lo)) { 1607 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1608 LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo); 1609 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1610 false /* is_load */, true /* is64bit */); 1611 } 1612 if (!IsNoOp(op, val_hi)) { 1613 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1614 LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi); 1615 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1616 false /* is_load */, true /* is64bit */); 1617 } 1618 return; 1619 } 1620 1621 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1622 DCHECK_EQ(rl_result.location, kLocPhysReg); 1623 DCHECK(!IsFpReg(rl_result.low_reg)); 1624 1625 if (!IsNoOp(op, val_lo)) { 1626 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1627 NewLIR2(x86op, rl_result.low_reg, val_lo); 1628 } 1629 if (!IsNoOp(op, val_hi)) { 1630 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1631 NewLIR2(x86op, rl_result.high_reg, val_hi); 1632 } 1633 StoreValueWide(rl_dest, rl_result); 1634} 1635 1636void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, 1637 RegLocation rl_src2, Instruction::Code op) { 1638 DCHECK(rl_src2.is_const); 1639 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1640 int32_t val_lo = Low32Bits(val); 1641 int32_t val_hi = High32Bits(val); 1642 rl_dest = UpdateLocWide(rl_dest); 1643 rl_src1 = UpdateLocWide(rl_src1); 1644 1645 // Can we do this directly into the destination registers? 1646 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && 1647 rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg && 1648 !IsFpReg(rl_dest.low_reg)) { 1649 if (!IsNoOp(op, val_lo)) { 1650 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1651 NewLIR2(x86op, rl_dest.low_reg, val_lo); 1652 } 1653 if (!IsNoOp(op, val_hi)) { 1654 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1655 NewLIR2(x86op, rl_dest.high_reg, val_hi); 1656 } 1657 return; 1658 } 1659 1660 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1661 DCHECK_EQ(rl_src1.location, kLocPhysReg); 1662 1663 // We need the values to be in a temporary 1664 RegLocation rl_result = ForceTempWide(rl_src1); 1665 if (!IsNoOp(op, val_lo)) { 1666 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1667 NewLIR2(x86op, rl_result.low_reg, val_lo); 1668 } 1669 if (!IsNoOp(op, val_hi)) { 1670 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1671 NewLIR2(x86op, rl_result.high_reg, val_hi); 1672 } 1673 1674 StoreFinalValueWide(rl_dest, rl_result); 1675} 1676 1677// For final classes there are no sub-classes to check and so we can answer the instance-of 1678// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. 1679void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, 1680 RegLocation rl_dest, RegLocation rl_src) { 1681 RegLocation object = LoadValue(rl_src, kCoreReg); 1682 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1683 int result_reg = rl_result.low_reg; 1684 1685 // SETcc only works with EAX..EDX. 1686 if (result_reg == object.low_reg || result_reg >= 4) { 1687 result_reg = AllocTypedTemp(false, kCoreReg); 1688 DCHECK_LT(result_reg, 4); 1689 } 1690 1691 // Assume that there is no match. 1692 LoadConstant(result_reg, 0); 1693 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL); 1694 1695 int check_class = AllocTypedTemp(false, kCoreReg); 1696 1697 // If Method* is already in a register, we can save a copy. 1698 RegLocation rl_method = mir_graph_->GetMethodLoc(); 1699 int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + 1700 (sizeof(mirror::Class*) * type_idx); 1701 1702 if (rl_method.location == kLocPhysReg) { 1703 if (use_declaring_class) { 1704 LoadWordDisp(rl_method.low_reg, 1705 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1706 check_class); 1707 } else { 1708 LoadWordDisp(rl_method.low_reg, 1709 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1710 check_class); 1711 LoadWordDisp(check_class, offset_of_type, check_class); 1712 } 1713 } else { 1714 LoadCurrMethodDirect(check_class); 1715 if (use_declaring_class) { 1716 LoadWordDisp(check_class, 1717 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1718 check_class); 1719 } else { 1720 LoadWordDisp(check_class, 1721 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1722 check_class); 1723 LoadWordDisp(check_class, offset_of_type, check_class); 1724 } 1725 } 1726 1727 // Compare the computed class to the class in the object. 1728 DCHECK_EQ(object.location, kLocPhysReg); 1729 OpRegMem(kOpCmp, check_class, object.low_reg, 1730 mirror::Object::ClassOffset().Int32Value()); 1731 1732 // Set the low byte of the result to 0 or 1 from the compare condition code. 1733 NewLIR2(kX86Set8R, result_reg, kX86CondEq); 1734 1735 LIR* target = NewLIR0(kPseudoTargetLabel); 1736 null_branchover->target = target; 1737 FreeTemp(check_class); 1738 if (IsTemp(result_reg)) { 1739 OpRegCopy(rl_result.low_reg, result_reg); 1740 FreeTemp(result_reg); 1741 } 1742 StoreValue(rl_dest, rl_result); 1743} 1744 1745void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, 1746 bool type_known_abstract, bool use_declaring_class, 1747 bool can_assume_type_is_in_dex_cache, 1748 uint32_t type_idx, RegLocation rl_dest, 1749 RegLocation rl_src) { 1750 FlushAllRegs(); 1751 // May generate a call - use explicit registers. 1752 LockCallTemps(); 1753 LoadCurrMethodDirect(TargetReg(kArg1)); // kArg1 gets current Method*. 1754 int class_reg = TargetReg(kArg2); // kArg2 will hold the Class*. 1755 // Reference must end up in kArg0. 1756 if (needs_access_check) { 1757 // Check we have access to type_idx and if not throw IllegalAccessError, 1758 // Caller function returns Class* in kArg0. 1759 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccess), 1760 type_idx, true); 1761 OpRegCopy(class_reg, TargetReg(kRet0)); 1762 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1763 } else if (use_declaring_class) { 1764 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1765 LoadWordDisp(TargetReg(kArg1), 1766 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg); 1767 } else { 1768 // Load dex cache entry into class_reg (kArg2). 1769 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1770 LoadWordDisp(TargetReg(kArg1), 1771 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg); 1772 int32_t offset_of_type = 1773 mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*) 1774 * type_idx); 1775 LoadWordDisp(class_reg, offset_of_type, class_reg); 1776 if (!can_assume_type_is_in_dex_cache) { 1777 // Need to test presence of type in dex cache at runtime. 1778 LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); 1779 // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0. 1780 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx, true); 1781 OpRegCopy(TargetReg(kArg2), TargetReg(kRet0)); // Align usage with fast path. 1782 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); /* Reload Ref. */ 1783 // Rejoin code paths 1784 LIR* hop_target = NewLIR0(kPseudoTargetLabel); 1785 hop_branch->target = hop_target; 1786 } 1787 } 1788 /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */ 1789 RegLocation rl_result = GetReturn(false); 1790 1791 // SETcc only works with EAX..EDX. 1792 DCHECK_LT(rl_result.low_reg, 4); 1793 1794 // Is the class NULL? 1795 LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); 1796 1797 /* Load object->klass_. */ 1798 DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); 1799 LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); 1800 /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */ 1801 LIR* branchover = nullptr; 1802 if (type_known_final) { 1803 // Ensure top 3 bytes of result are 0. 1804 LoadConstant(rl_result.low_reg, 0); 1805 OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2)); 1806 // Set the low byte of the result to 0 or 1 from the compare condition code. 1807 NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondEq); 1808 } else { 1809 if (!type_known_abstract) { 1810 LoadConstant(rl_result.low_reg, 1); // Assume result succeeds. 1811 branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL); 1812 } 1813 OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); 1814 OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial)); 1815 } 1816 // TODO: only clobber when type isn't final? 1817 ClobberCallerSave(); 1818 /* Branch targets here. */ 1819 LIR* target = NewLIR0(kPseudoTargetLabel); 1820 StoreValue(rl_dest, rl_result); 1821 branch1->target = target; 1822 if (branchover != nullptr) { 1823 branchover->target = target; 1824 } 1825} 1826 1827void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, 1828 RegLocation rl_lhs, RegLocation rl_rhs) { 1829 OpKind op = kOpBkpt; 1830 bool is_div_rem = false; 1831 bool unary = false; 1832 bool shift_op = false; 1833 bool is_two_addr = false; 1834 RegLocation rl_result; 1835 switch (opcode) { 1836 case Instruction::NEG_INT: 1837 op = kOpNeg; 1838 unary = true; 1839 break; 1840 case Instruction::NOT_INT: 1841 op = kOpMvn; 1842 unary = true; 1843 break; 1844 case Instruction::ADD_INT_2ADDR: 1845 is_two_addr = true; 1846 // Fallthrough 1847 case Instruction::ADD_INT: 1848 op = kOpAdd; 1849 break; 1850 case Instruction::SUB_INT_2ADDR: 1851 is_two_addr = true; 1852 // Fallthrough 1853 case Instruction::SUB_INT: 1854 op = kOpSub; 1855 break; 1856 case Instruction::MUL_INT_2ADDR: 1857 is_two_addr = true; 1858 // Fallthrough 1859 case Instruction::MUL_INT: 1860 op = kOpMul; 1861 break; 1862 case Instruction::DIV_INT_2ADDR: 1863 is_two_addr = true; 1864 // Fallthrough 1865 case Instruction::DIV_INT: 1866 op = kOpDiv; 1867 is_div_rem = true; 1868 break; 1869 /* NOTE: returns in kArg1 */ 1870 case Instruction::REM_INT_2ADDR: 1871 is_two_addr = true; 1872 // Fallthrough 1873 case Instruction::REM_INT: 1874 op = kOpRem; 1875 is_div_rem = true; 1876 break; 1877 case Instruction::AND_INT_2ADDR: 1878 is_two_addr = true; 1879 // Fallthrough 1880 case Instruction::AND_INT: 1881 op = kOpAnd; 1882 break; 1883 case Instruction::OR_INT_2ADDR: 1884 is_two_addr = true; 1885 // Fallthrough 1886 case Instruction::OR_INT: 1887 op = kOpOr; 1888 break; 1889 case Instruction::XOR_INT_2ADDR: 1890 is_two_addr = true; 1891 // Fallthrough 1892 case Instruction::XOR_INT: 1893 op = kOpXor; 1894 break; 1895 case Instruction::SHL_INT_2ADDR: 1896 is_two_addr = true; 1897 // Fallthrough 1898 case Instruction::SHL_INT: 1899 shift_op = true; 1900 op = kOpLsl; 1901 break; 1902 case Instruction::SHR_INT_2ADDR: 1903 is_two_addr = true; 1904 // Fallthrough 1905 case Instruction::SHR_INT: 1906 shift_op = true; 1907 op = kOpAsr; 1908 break; 1909 case Instruction::USHR_INT_2ADDR: 1910 is_two_addr = true; 1911 // Fallthrough 1912 case Instruction::USHR_INT: 1913 shift_op = true; 1914 op = kOpLsr; 1915 break; 1916 default: 1917 LOG(FATAL) << "Invalid word arith op: " << opcode; 1918 } 1919 1920 // Can we convert to a two address instruction? 1921 if (!is_two_addr && 1922 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 1923 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { 1924 is_two_addr = true; 1925 } 1926 1927 // Get the div/rem stuff out of the way. 1928 if (is_div_rem) { 1929 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true); 1930 StoreValue(rl_dest, rl_result); 1931 return; 1932 } 1933 1934 if (unary) { 1935 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1936 rl_result = UpdateLoc(rl_dest); 1937 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1938 OpRegReg(op, rl_result.low_reg, rl_lhs.low_reg); 1939 } else { 1940 if (shift_op) { 1941 // X86 doesn't require masking and must use ECX. 1942 int t_reg = TargetReg(kCount); // rCX 1943 LoadValueDirectFixed(rl_rhs, t_reg); 1944 if (is_two_addr) { 1945 // Can we do this directly into memory? 1946 rl_result = UpdateLoc(rl_dest); 1947 rl_rhs = LoadValue(rl_rhs, kCoreReg); 1948 if (rl_result.location != kLocPhysReg) { 1949 // Okay, we can do this into memory 1950 OpMemReg(op, rl_result, t_reg); 1951 FreeTemp(t_reg); 1952 return; 1953 } else if (!IsFpReg(rl_result.low_reg)) { 1954 // Can do this directly into the result register 1955 OpRegReg(op, rl_result.low_reg, t_reg); 1956 FreeTemp(t_reg); 1957 StoreFinalValue(rl_dest, rl_result); 1958 return; 1959 } 1960 } 1961 // Three address form, or we can't do directly. 1962 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1963 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1964 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, t_reg); 1965 FreeTemp(t_reg); 1966 } else { 1967 // Multiply is 3 operand only (sort of). 1968 if (is_two_addr && op != kOpMul) { 1969 // Can we do this directly into memory? 1970 rl_result = UpdateLoc(rl_dest); 1971 if (rl_result.location == kLocPhysReg) { 1972 // Can we do this from memory directly? 1973 rl_rhs = UpdateLoc(rl_rhs); 1974 if (rl_rhs.location != kLocPhysReg) { 1975 OpRegMem(op, rl_result.low_reg, rl_rhs); 1976 StoreFinalValue(rl_dest, rl_result); 1977 return; 1978 } else if (!IsFpReg(rl_rhs.low_reg)) { 1979 OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg); 1980 StoreFinalValue(rl_dest, rl_result); 1981 return; 1982 } 1983 } 1984 rl_rhs = LoadValue(rl_rhs, kCoreReg); 1985 if (rl_result.location != kLocPhysReg) { 1986 // Okay, we can do this into memory. 1987 OpMemReg(op, rl_result, rl_rhs.low_reg); 1988 return; 1989 } else if (!IsFpReg(rl_result.low_reg)) { 1990 // Can do this directly into the result register. 1991 OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg); 1992 StoreFinalValue(rl_dest, rl_result); 1993 return; 1994 } else { 1995 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1996 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1997 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 1998 } 1999 } else { 2000 // Try to use reg/memory instructions. 2001 rl_lhs = UpdateLoc(rl_lhs); 2002 rl_rhs = UpdateLoc(rl_rhs); 2003 // We can't optimize with FP registers. 2004 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { 2005 // Something is difficult, so fall back to the standard case. 2006 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2007 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2008 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2009 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2010 } else { 2011 // We can optimize by moving to result and using memory operands. 2012 if (rl_rhs.location != kLocPhysReg) { 2013 // Force LHS into result. 2014 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2015 LoadValueDirect(rl_lhs, rl_result.low_reg); 2016 OpRegMem(op, rl_result.low_reg, rl_rhs); 2017 } else if (rl_lhs.location != kLocPhysReg) { 2018 // RHS is in a register; LHS is in memory. 2019 if (op != kOpSub) { 2020 // Force RHS into result and operate on memory. 2021 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2022 OpRegCopy(rl_result.low_reg, rl_rhs.low_reg); 2023 OpRegMem(op, rl_result.low_reg, rl_lhs); 2024 } else { 2025 // Subtraction isn't commutative. 2026 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2027 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2028 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2029 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2030 } 2031 } else { 2032 // Both are in registers. 2033 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2034 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2035 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2036 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2037 } 2038 } 2039 } 2040 } 2041 } 2042 StoreValue(rl_dest, rl_result); 2043} 2044 2045bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { 2046 // If we have non-core registers, then we can't do good things. 2047 if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.low_reg)) { 2048 return false; 2049 } 2050 if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.low_reg)) { 2051 return false; 2052 } 2053 2054 // Everything will be fine :-). 2055 return true; 2056} 2057} // namespace art 2058