int_x86.cc revision c17ebe866beb50eb6da1e6a47555cb4731467f3b
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the X86 ISA */ 18 19#include "codegen_x86.h" 20#include "dex/quick/mir_to_lir-inl.h" 21#include "mirror/array.h" 22#include "x86_lir.h" 23 24namespace art { 25 26/* 27 * Perform register memory operation. 28 */ 29LIR* X86Mir2Lir::GenRegMemCheck(ConditionCode c_code, 30 int reg1, int base, int offset, ThrowKind kind) { 31 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 32 current_dalvik_offset_, reg1, base, offset); 33 OpRegMem(kOpCmp, reg1, base, offset); 34 LIR* branch = OpCondBranch(c_code, tgt); 35 // Remember branch target - will process later 36 throw_launchpads_.Insert(tgt); 37 return branch; 38} 39 40/* 41 * Perform a compare of memory to immediate value 42 */ 43LIR* X86Mir2Lir::GenMemImmedCheck(ConditionCode c_code, 44 int base, int offset, int check_value, ThrowKind kind) { 45 LIR* tgt = RawLIR(0, kPseudoThrowTarget, kind, 46 current_dalvik_offset_, base, check_value, 0); 47 NewLIR3(IS_SIMM8(check_value) ? kX86Cmp32MI8 : kX86Cmp32MI, base, offset, check_value); 48 LIR* branch = OpCondBranch(c_code, tgt); 49 // Remember branch target - will process later 50 throw_launchpads_.Insert(tgt); 51 return branch; 52} 53 54/* 55 * Compare two 64-bit values 56 * x = y return 0 57 * x < y return -1 58 * x > y return 1 59 */ 60void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 61 RegLocation rl_src2) { 62 FlushAllRegs(); 63 LockCallTemps(); // Prepare for explicit register usage 64 LoadValueDirectWideFixed(rl_src1, r0, r1); 65 LoadValueDirectWideFixed(rl_src2, r2, r3); 66 // Compute (r1:r0) = (r1:r0) - (r3:r2) 67 OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 68 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF 69 NewLIR2(kX86Set8R, r2, kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 70 NewLIR2(kX86Movzx8RR, r2, r2); 71 OpReg(kOpNeg, r2); // r2 = -r2 72 OpRegReg(kOpOr, r0, r1); // r0 = high | low - sets ZF 73 NewLIR2(kX86Set8R, r0, kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 74 NewLIR2(kX86Movzx8RR, r0, r0); 75 OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2 76 RegLocation rl_result = LocCReturn(); 77 StoreValue(rl_dest, rl_result); 78} 79 80X86ConditionCode X86ConditionEncoding(ConditionCode cond) { 81 switch (cond) { 82 case kCondEq: return kX86CondEq; 83 case kCondNe: return kX86CondNe; 84 case kCondCs: return kX86CondC; 85 case kCondCc: return kX86CondNc; 86 case kCondUlt: return kX86CondC; 87 case kCondUge: return kX86CondNc; 88 case kCondMi: return kX86CondS; 89 case kCondPl: return kX86CondNs; 90 case kCondVs: return kX86CondO; 91 case kCondVc: return kX86CondNo; 92 case kCondHi: return kX86CondA; 93 case kCondLs: return kX86CondBe; 94 case kCondGe: return kX86CondGe; 95 case kCondLt: return kX86CondL; 96 case kCondGt: return kX86CondG; 97 case kCondLe: return kX86CondLe; 98 case kCondAl: 99 case kCondNv: LOG(FATAL) << "Should not reach here"; 100 } 101 return kX86CondO; 102} 103 104LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, int src1, int src2, 105 LIR* target) { 106 NewLIR2(kX86Cmp32RR, src1, src2); 107 X86ConditionCode cc = X86ConditionEncoding(cond); 108 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , 109 cc); 110 branch->target = target; 111 return branch; 112} 113 114LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, int reg, 115 int check_value, LIR* target) { 116 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { 117 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode 118 NewLIR2(kX86Test32RR, reg, reg); 119 } else { 120 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg, check_value); 121 } 122 X86ConditionCode cc = X86ConditionEncoding(cond); 123 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); 124 branch->target = target; 125 return branch; 126} 127 128LIR* X86Mir2Lir::OpRegCopyNoInsert(int r_dest, int r_src) { 129 if (X86_FPREG(r_dest) || X86_FPREG(r_src)) 130 return OpFpRegCopy(r_dest, r_src); 131 LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR, 132 r_dest, r_src); 133 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 134 res->flags.is_nop = true; 135 } 136 return res; 137} 138 139LIR* X86Mir2Lir::OpRegCopy(int r_dest, int r_src) { 140 LIR *res = OpRegCopyNoInsert(r_dest, r_src); 141 AppendLIR(res); 142 return res; 143} 144 145void X86Mir2Lir::OpRegCopyWide(int dest_lo, int dest_hi, 146 int src_lo, int src_hi) { 147 bool dest_fp = X86_FPREG(dest_lo) && X86_FPREG(dest_hi); 148 bool src_fp = X86_FPREG(src_lo) && X86_FPREG(src_hi); 149 assert(X86_FPREG(src_lo) == X86_FPREG(src_hi)); 150 assert(X86_FPREG(dest_lo) == X86_FPREG(dest_hi)); 151 if (dest_fp) { 152 if (src_fp) { 153 OpRegCopy(S2d(dest_lo, dest_hi), S2d(src_lo, src_hi)); 154 } else { 155 // TODO: Prevent this from happening in the code. The result is often 156 // unused or could have been loaded more easily from memory. 157 NewLIR2(kX86MovdxrRR, dest_lo, src_lo); 158 dest_hi = AllocTempDouble(); 159 NewLIR2(kX86MovdxrRR, dest_hi, src_hi); 160 NewLIR2(kX86PunpckldqRR, dest_lo, dest_hi); 161 FreeTemp(dest_hi); 162 } 163 } else { 164 if (src_fp) { 165 NewLIR2(kX86MovdrxRR, dest_lo, src_lo); 166 NewLIR2(kX86PsrlqRI, src_lo, 32); 167 NewLIR2(kX86MovdrxRR, dest_hi, src_lo); 168 } else { 169 // Handle overlap 170 if (src_hi == dest_lo && src_lo == dest_hi) { 171 // Deal with cycles. 172 int temp_reg = AllocTemp(); 173 OpRegCopy(temp_reg, dest_hi); 174 OpRegCopy(dest_hi, dest_lo); 175 OpRegCopy(dest_lo, temp_reg); 176 FreeTemp(temp_reg); 177 } else if (src_hi == dest_lo) { 178 OpRegCopy(dest_hi, src_hi); 179 OpRegCopy(dest_lo, src_lo); 180 } else { 181 OpRegCopy(dest_lo, src_lo); 182 OpRegCopy(dest_hi, src_hi); 183 } 184 } 185 } 186} 187 188void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 189 RegLocation rl_result; 190 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 191 RegLocation rl_dest = mir_graph_->GetDest(mir); 192 rl_src = LoadValue(rl_src, kCoreReg); 193 194 // The kMirOpSelect has two variants, one for constants and one for moves. 195 const bool is_constant_case = (mir->ssa_rep->num_uses == 1); 196 197 if (is_constant_case) { 198 int true_val = mir->dalvikInsn.vB; 199 int false_val = mir->dalvikInsn.vC; 200 rl_result = EvalLoc(rl_dest, kCoreReg, true); 201 202 /* 203 * 1) When the true case is zero and result_reg is not same as src_reg: 204 * xor result_reg, result_reg 205 * cmp $0, src_reg 206 * mov t1, $false_case 207 * cmovnz result_reg, t1 208 * 2) When the false case is zero and result_reg is not same as src_reg: 209 * xor result_reg, result_reg 210 * cmp $0, src_reg 211 * mov t1, $true_case 212 * cmovz result_reg, t1 213 * 3) All other cases (we do compare first to set eflags): 214 * cmp $0, src_reg 215 * mov result_reg, $true_case 216 * mov t1, $false_case 217 * cmovnz result_reg, t1 218 */ 219 const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.low_reg == rl_result.low_reg); 220 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); 221 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); 222 const bool catch_all_case = !(true_zero_case || false_zero_case); 223 224 if (true_zero_case || false_zero_case) { 225 OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); 226 } 227 228 if (true_zero_case || false_zero_case || catch_all_case) { 229 OpRegImm(kOpCmp, rl_src.low_reg, 0); 230 } 231 232 if (catch_all_case) { 233 OpRegImm(kOpMov, rl_result.low_reg, true_val); 234 } 235 236 if (true_zero_case || false_zero_case || catch_all_case) { 237 int immediateForTemp = false_zero_case ? true_val : false_val; 238 int temp1_reg = AllocTemp(); 239 OpRegImm(kOpMov, temp1_reg, immediateForTemp); 240 241 ConditionCode cc = false_zero_case ? kCondEq : kCondNe; 242 OpCondRegReg(kOpCmov, cc, rl_result.low_reg, temp1_reg); 243 244 FreeTemp(temp1_reg); 245 } 246 } else { 247 RegLocation rl_true = mir_graph_->GetSrc(mir, 1); 248 RegLocation rl_false = mir_graph_->GetSrc(mir, 2); 249 rl_true = LoadValue(rl_true, kCoreReg); 250 rl_false = LoadValue(rl_false, kCoreReg); 251 rl_result = EvalLoc(rl_dest, kCoreReg, true); 252 253 /* 254 * 1) When true case is already in place: 255 * cmp $0, src_reg 256 * cmovnz result_reg, false_reg 257 * 2) When false case is already in place: 258 * cmp $0, src_reg 259 * cmovz result_reg, true_reg 260 * 3) When neither cases are in place: 261 * cmp $0, src_reg 262 * mov result_reg, true_reg 263 * cmovnz result_reg, false_reg 264 */ 265 266 // kMirOpSelect is generated just for conditional cases when comparison is done with zero. 267 OpRegImm(kOpCmp, rl_src.low_reg, 0); 268 269 if (rl_result.low_reg == rl_true.low_reg) { 270 OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg); 271 } else if (rl_result.low_reg == rl_false.low_reg) { 272 OpCondRegReg(kOpCmov, kCondEq, rl_result.low_reg, rl_true.low_reg); 273 } else { 274 OpRegCopy(rl_result.low_reg, rl_true.low_reg); 275 OpCondRegReg(kOpCmov, kCondNe, rl_result.low_reg, rl_false.low_reg); 276 } 277 } 278 279 StoreValue(rl_dest, rl_result); 280} 281 282void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 283 LIR* taken = &block_label_list_[bb->taken]; 284 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 285 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 286 ConditionCode ccode = mir->meta.ccode; 287 288 if (rl_src1.is_const) { 289 std::swap(rl_src1, rl_src2); 290 ccode = FlipComparisonOrder(ccode); 291 } 292 if (rl_src2.is_const) { 293 // Do special compare/branch against simple const operand 294 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 295 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 296 return; 297 } 298 299 FlushAllRegs(); 300 LockCallTemps(); // Prepare for explicit register usage 301 LoadValueDirectWideFixed(rl_src1, r0, r1); 302 LoadValueDirectWideFixed(rl_src2, r2, r3); 303 // Swap operands and condition code to prevent use of zero flag. 304 if (ccode == kCondLe || ccode == kCondGt) { 305 // Compute (r3:r2) = (r3:r2) - (r1:r0) 306 OpRegReg(kOpSub, r2, r0); // r2 = r2 - r0 307 OpRegReg(kOpSbc, r3, r1); // r3 = r3 - r1 - CF 308 } else { 309 // Compute (r1:r0) = (r1:r0) - (r3:r2) 310 OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2 311 OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF 312 } 313 switch (ccode) { 314 case kCondEq: 315 case kCondNe: 316 OpRegReg(kOpOr, r0, r1); // r0 = r0 | r1 317 break; 318 case kCondLe: 319 ccode = kCondGe; 320 break; 321 case kCondGt: 322 ccode = kCondLt; 323 break; 324 case kCondLt: 325 case kCondGe: 326 break; 327 default: 328 LOG(FATAL) << "Unexpected ccode: " << ccode; 329 } 330 OpCondBranch(ccode, taken); 331} 332 333void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 334 int64_t val, ConditionCode ccode) { 335 int32_t val_lo = Low32Bits(val); 336 int32_t val_hi = High32Bits(val); 337 LIR* taken = &block_label_list_[bb->taken]; 338 LIR* not_taken = &block_label_list_[bb->fall_through]; 339 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 340 int32_t low_reg = rl_src1.low_reg; 341 int32_t high_reg = rl_src1.high_reg; 342 343 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { 344 int t_reg = AllocTemp(); 345 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); 346 FreeTemp(t_reg); 347 OpCondBranch(ccode, taken); 348 return; 349 } 350 351 OpRegImm(kOpCmp, high_reg, val_hi); 352 switch (ccode) { 353 case kCondEq: 354 case kCondNe: 355 OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken); 356 break; 357 case kCondLt: 358 OpCondBranch(kCondLt, taken); 359 OpCondBranch(kCondGt, not_taken); 360 ccode = kCondUlt; 361 break; 362 case kCondLe: 363 OpCondBranch(kCondLt, taken); 364 OpCondBranch(kCondGt, not_taken); 365 ccode = kCondLs; 366 break; 367 case kCondGt: 368 OpCondBranch(kCondGt, taken); 369 OpCondBranch(kCondLt, not_taken); 370 ccode = kCondHi; 371 break; 372 case kCondGe: 373 OpCondBranch(kCondGt, taken); 374 OpCondBranch(kCondLt, not_taken); 375 ccode = kCondUge; 376 break; 377 default: 378 LOG(FATAL) << "Unexpected ccode: " << ccode; 379 } 380 OpCmpImmBranch(ccode, low_reg, val_lo, taken); 381} 382 383void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) { 384 // It does not make sense to calculate magic and shift for zero divisor. 385 DCHECK_NE(divisor, 0); 386 387 /* According to H.S.Warren's Hacker's Delight Chapter 10 and 388 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 389 * The magic number M and shift S can be calculated in the following way: 390 * Let nc be the most positive value of numerator(n) such that nc = kd - 1, 391 * where divisor(d) >=2. 392 * Let nc be the most negative value of numerator(n) such that nc = kd + 1, 393 * where divisor(d) <= -2. 394 * Thus nc can be calculated like: 395 * nc = 2^31 + 2^31 % d - 1, where d >= 2 396 * nc = -2^31 + (2^31 + 1) % d, where d >= 2. 397 * 398 * So the shift p is the smallest p satisfying 399 * 2^p > nc * (d - 2^p % d), where d >= 2 400 * 2^p > nc * (d + 2^p % d), where d <= -2. 401 * 402 * the magic number M is calcuated by 403 * M = (2^p + d - 2^p % d) / d, where d >= 2 404 * M = (2^p - d - 2^p % d) / d, where d <= -2. 405 * 406 * Notice that p is always bigger than or equal to 32, so we just return 32-p as 407 * the shift number S. 408 */ 409 410 int32_t p = 31; 411 const uint32_t two31 = 0x80000000U; 412 413 // Initialize the computations. 414 uint32_t abs_d = (divisor >= 0) ? divisor : -divisor; 415 uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31); 416 uint32_t abs_nc = tmp - 1 - tmp % abs_d; 417 uint32_t quotient1 = two31 / abs_nc; 418 uint32_t remainder1 = two31 % abs_nc; 419 uint32_t quotient2 = two31 / abs_d; 420 uint32_t remainder2 = two31 % abs_d; 421 422 /* 423 * To avoid handling both positive and negative divisor, Hacker's Delight 424 * introduces a method to handle these 2 cases together to avoid duplication. 425 */ 426 uint32_t delta; 427 do { 428 p++; 429 quotient1 = 2 * quotient1; 430 remainder1 = 2 * remainder1; 431 if (remainder1 >= abs_nc) { 432 quotient1++; 433 remainder1 = remainder1 - abs_nc; 434 } 435 quotient2 = 2 * quotient2; 436 remainder2 = 2 * remainder2; 437 if (remainder2 >= abs_d) { 438 quotient2++; 439 remainder2 = remainder2 - abs_d; 440 } 441 delta = abs_d - remainder2; 442 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); 443 444 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); 445 shift = p - 32; 446} 447 448RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo, 449 int lit, bool is_div) { 450 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; 451 return rl_dest; 452} 453 454RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, 455 int imm, bool is_div) { 456 // Use a multiply (and fixup) to perform an int div/rem by a constant. 457 458 // We have to use fixed registers, so flush all the temps. 459 FlushAllRegs(); 460 LockCallTemps(); // Prepare for explicit register usage. 461 462 // Assume that the result will be in EDX. 463 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 464 r2, INVALID_REG, INVALID_SREG, INVALID_SREG}; 465 466 // handle div/rem by 1 special case. 467 if (imm == 1) { 468 if (is_div) { 469 // x / 1 == x. 470 StoreValue(rl_result, rl_src); 471 } else { 472 // x % 1 == 0. 473 LoadConstantNoClobber(r0, 0); 474 // For this case, return the result in EAX. 475 rl_result.low_reg = r0; 476 } 477 } else if (imm == -1) { // handle 0x80000000 / -1 special case. 478 if (is_div) { 479 LIR *minint_branch = 0; 480 LoadValueDirectFixed(rl_src, r0); 481 OpRegImm(kOpCmp, r0, 0x80000000); 482 minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 483 484 // for x != MIN_INT, x / -1 == -x. 485 NewLIR1(kX86Neg32R, r0); 486 487 LIR* branch_around = NewLIR1(kX86Jmp8, 0); 488 // The target for cmp/jmp above. 489 minint_branch->target = NewLIR0(kPseudoTargetLabel); 490 // EAX already contains the right value (0x80000000), 491 branch_around->target = NewLIR0(kPseudoTargetLabel); 492 } else { 493 // x % -1 == 0. 494 LoadConstantNoClobber(r0, 0); 495 } 496 // For this case, return the result in EAX. 497 rl_result.low_reg = r0; 498 } else { 499 CHECK(imm <= -2 || imm >= 2); 500 // Use H.S.Warren's Hacker's Delight Chapter 10 and 501 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 502 int magic, shift; 503 CalculateMagicAndShift(imm, magic, shift); 504 505 /* 506 * For imm >= 2, 507 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 508 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 509 * For imm <= -2, 510 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 511 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 512 * We implement this algorithm in the following way: 513 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX 514 * 2. if imm > 0 and magic < 0, add numerator to EDX 515 * if imm < 0 and magic > 0, sub numerator from EDX 516 * 3. if S !=0, SAR S bits for EDX 517 * 4. add 1 to EDX if EDX < 0 518 * 5. Thus, EDX is the quotient 519 */ 520 521 // Numerator into EAX. 522 int numerator_reg = -1; 523 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { 524 // We will need the value later. 525 if (rl_src.location == kLocPhysReg) { 526 // We can use it directly. 527 DCHECK(rl_src.low_reg != r0 && rl_src.low_reg != r2); 528 numerator_reg = rl_src.low_reg; 529 } else { 530 LoadValueDirectFixed(rl_src, r1); 531 numerator_reg = r1; 532 } 533 OpRegCopy(r0, numerator_reg); 534 } else { 535 // Only need this once. Just put it into EAX. 536 LoadValueDirectFixed(rl_src, r0); 537 } 538 539 // EDX = magic. 540 LoadConstantNoClobber(r2, magic); 541 542 // EDX:EAX = magic & dividend. 543 NewLIR1(kX86Imul32DaR, r2); 544 545 if (imm > 0 && magic < 0) { 546 // Add numerator to EDX. 547 DCHECK_NE(numerator_reg, -1); 548 NewLIR2(kX86Add32RR, r2, numerator_reg); 549 } else if (imm < 0 && magic > 0) { 550 DCHECK_NE(numerator_reg, -1); 551 NewLIR2(kX86Sub32RR, r2, numerator_reg); 552 } 553 554 // Do we need the shift? 555 if (shift != 0) { 556 // Shift EDX by 'shift' bits. 557 NewLIR2(kX86Sar32RI, r2, shift); 558 } 559 560 // Add 1 to EDX if EDX < 0. 561 562 // Move EDX to EAX. 563 OpRegCopy(r0, r2); 564 565 // Move sign bit to bit 0, zeroing the rest. 566 NewLIR2(kX86Shr32RI, r2, 31); 567 568 // EDX = EDX + EAX. 569 NewLIR2(kX86Add32RR, r2, r0); 570 571 // Quotient is in EDX. 572 if (!is_div) { 573 // We need to compute the remainder. 574 // Remainder is divisor - (quotient * imm). 575 DCHECK_NE(numerator_reg, -1); 576 OpRegCopy(r0, numerator_reg); 577 578 // EAX = numerator * imm. 579 OpRegRegImm(kOpMul, r2, r2, imm); 580 581 // EDX -= EAX. 582 NewLIR2(kX86Sub32RR, r0, r2); 583 584 // For this case, return the result in EAX. 585 rl_result.low_reg = r0; 586 } 587 } 588 589 return rl_result; 590} 591 592RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo, 593 int reg_hi, bool is_div) { 594 LOG(FATAL) << "Unexpected use of GenDivRem for x86"; 595 return rl_dest; 596} 597 598RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 599 RegLocation rl_src2, bool is_div, bool check_zero) { 600 // We have to use fixed registers, so flush all the temps. 601 FlushAllRegs(); 602 LockCallTemps(); // Prepare for explicit register usage. 603 604 // Load LHS into EAX. 605 LoadValueDirectFixed(rl_src1, r0); 606 607 // Load RHS into EBX. 608 LoadValueDirectFixed(rl_src2, r1); 609 610 // Copy LHS sign bit into EDX. 611 NewLIR0(kx86Cdq32Da); 612 613 if (check_zero) { 614 // Handle division by zero case. 615 GenImmedCheck(kCondEq, r1, 0, kThrowDivZero); 616 } 617 618 // Have to catch 0x80000000/-1 case, or we will get an exception! 619 OpRegImm(kOpCmp, r1, -1); 620 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 621 622 // RHS is -1. 623 OpRegImm(kOpCmp, r0, 0x80000000); 624 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 625 626 // In 0x80000000/-1 case. 627 if (!is_div) { 628 // For DIV, EAX is already right. For REM, we need EDX 0. 629 LoadConstantNoClobber(r2, 0); 630 } 631 LIR* done = NewLIR1(kX86Jmp8, 0); 632 633 // Expected case. 634 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 635 minint_branch->target = minus_one_branch->target; 636 NewLIR1(kX86Idivmod32DaR, r1); 637 done->target = NewLIR0(kPseudoTargetLabel); 638 639 // Result is in EAX for div and EDX for rem. 640 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, 641 r0, INVALID_REG, INVALID_SREG, INVALID_SREG}; 642 if (!is_div) { 643 rl_result.low_reg = r2; 644 } 645 return rl_result; 646} 647 648bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { 649 DCHECK_EQ(cu_->instruction_set, kX86); 650 651 // Get the two arguments to the invoke and place them in GP registers. 652 RegLocation rl_src1 = info->args[0]; 653 RegLocation rl_src2 = info->args[1]; 654 rl_src1 = LoadValue(rl_src1, kCoreReg); 655 rl_src2 = LoadValue(rl_src2, kCoreReg); 656 657 RegLocation rl_dest = InlineTarget(info); 658 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 659 660 /* 661 * If the result register is the same as the second element, then we need to be careful. 662 * The reason is that the first copy will inadvertently clobber the second element with 663 * the first one thus yielding the wrong result. Thus we do a swap in that case. 664 */ 665 if (rl_result.low_reg == rl_src2.low_reg) { 666 std::swap(rl_src1, rl_src2); 667 } 668 669 // Pick the first integer as min/max. 670 OpRegCopy(rl_result.low_reg, rl_src1.low_reg); 671 672 // If the integers are both in the same register, then there is nothing else to do 673 // because they are equal and we have already moved one into the result. 674 if (rl_src1.low_reg != rl_src2.low_reg) { 675 // It is possible we didn't pick correctly so do the actual comparison now. 676 OpRegReg(kOpCmp, rl_src1.low_reg, rl_src2.low_reg); 677 678 // Conditionally move the other integer into the destination register. 679 ConditionCode condition_code = is_min ? kCondGt : kCondLt; 680 OpCondRegReg(kOpCmov, condition_code, rl_result.low_reg, rl_src2.low_reg); 681 } 682 683 StoreValue(rl_dest, rl_result); 684 return true; 685} 686 687bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 688 RegLocation rl_src_address = info->args[0]; // long address 689 rl_src_address.wide = 0; // ignore high half in info->args[1] 690 RegLocation rl_dest = size == kLong ? InlineTargetWide(info) : InlineTarget(info); 691 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 692 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 693 if (size == kLong) { 694 // Unaligned access is allowed on x86. 695 LoadBaseDispWide(rl_address.low_reg, 0, rl_result.low_reg, rl_result.high_reg, INVALID_SREG); 696 StoreValueWide(rl_dest, rl_result); 697 } else { 698 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 699 // Unaligned access is allowed on x86. 700 LoadBaseDisp(rl_address.low_reg, 0, rl_result.low_reg, size, INVALID_SREG); 701 StoreValue(rl_dest, rl_result); 702 } 703 return true; 704} 705 706bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 707 RegLocation rl_src_address = info->args[0]; // long address 708 rl_src_address.wide = 0; // ignore high half in info->args[1] 709 RegLocation rl_src_value = info->args[2]; // [size] value 710 RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); 711 if (size == kLong) { 712 // Unaligned access is allowed on x86. 713 RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg); 714 StoreBaseDispWide(rl_address.low_reg, 0, rl_value.low_reg, rl_value.high_reg); 715 } else { 716 DCHECK(size == kSignedByte || size == kSignedHalf || size == kWord); 717 // Unaligned access is allowed on x86. 718 RegLocation rl_value = LoadValue(rl_src_value, kCoreReg); 719 StoreBaseDisp(rl_address.low_reg, 0, rl_value.low_reg, size); 720 } 721 return true; 722} 723 724void X86Mir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) { 725 NewLIR5(kX86Lea32RA, rBase, reg1, reg2, scale, offset); 726} 727 728void X86Mir2Lir::OpTlsCmp(ThreadOffset offset, int val) { 729 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 730} 731 732bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 733 DCHECK_EQ(cu_->instruction_set, kX86); 734 // Unused - RegLocation rl_src_unsafe = info->args[0]; 735 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 736 RegLocation rl_src_offset = info->args[2]; // long low 737 rl_src_offset.wide = 0; // ignore high half in info->args[3] 738 RegLocation rl_src_expected = info->args[4]; // int, long or Object 739 // If is_long, high half is in info->args[5] 740 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 741 // If is_long, high half is in info->args[7] 742 743 if (is_long) { 744 FlushAllRegs(); 745 LockCallTemps(); 746 LoadValueDirectWideFixed(rl_src_expected, rAX, rDX); 747 LoadValueDirectWideFixed(rl_src_new_value, rBX, rCX); 748 NewLIR1(kX86Push32R, rDI); 749 MarkTemp(rDI); 750 LockTemp(rDI); 751 NewLIR1(kX86Push32R, rSI); 752 MarkTemp(rSI); 753 LockTemp(rSI); 754 const int push_offset = 4 /* push edi */ + 4 /* push esi */; 755 LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_obj.s_reg_low) + push_offset, rDI); 756 LoadWordDisp(TargetReg(kSp), SRegOffset(rl_src_offset.s_reg_low) + push_offset, rSI); 757 NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0); 758 FreeTemp(rSI); 759 UnmarkTemp(rSI); 760 NewLIR1(kX86Pop32R, rSI); 761 FreeTemp(rDI); 762 UnmarkTemp(rDI); 763 NewLIR1(kX86Pop32R, rDI); 764 FreeCallTemps(); 765 } else { 766 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. 767 FlushReg(r0); 768 LockTemp(r0); 769 770 // Release store semantics, get the barrier out of the way. TODO: revisit 771 GenMemBarrier(kStoreLoad); 772 773 RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); 774 RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); 775 776 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 777 // Mark card for object assuming new value is stored. 778 FreeTemp(r0); // Temporarily release EAX for MarkGCCard(). 779 MarkGCCard(rl_new_value.low_reg, rl_object.low_reg); 780 LockTemp(r0); 781 } 782 783 RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); 784 LoadValueDirect(rl_src_expected, r0); 785 NewLIR5(kX86LockCmpxchgAR, rl_object.low_reg, rl_offset.low_reg, 0, 0, rl_new_value.low_reg); 786 787 FreeTemp(r0); 788 } 789 790 // Convert ZF to boolean 791 RegLocation rl_dest = InlineTarget(info); // boolean place for result 792 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 793 NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondZ); 794 NewLIR2(kX86Movzx8RR, rl_result.low_reg, rl_result.low_reg); 795 StoreValue(rl_dest, rl_result); 796 return true; 797} 798 799LIR* X86Mir2Lir::OpPcRelLoad(int reg, LIR* target) { 800 CHECK(base_of_code_ != nullptr); 801 802 // Address the start of the method 803 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 804 LoadValueDirectFixed(rl_method, reg); 805 store_method_addr_used_ = true; 806 807 // Load the proper value from the literal area. 808 // We don't know the proper offset for the value, so pick one that will force 809 // 4 byte offset. We will fix this up in the assembler later to have the right 810 // value. 811 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg, reg, 256, 0, 0, target); 812 res->target = target; 813 res->flags.fixup = kFixupLoad; 814 SetMemRefType(res, true, kLiteral); 815 store_method_addr_used_ = true; 816 return res; 817} 818 819LIR* X86Mir2Lir::OpVldm(int rBase, int count) { 820 LOG(FATAL) << "Unexpected use of OpVldm for x86"; 821 return NULL; 822} 823 824LIR* X86Mir2Lir::OpVstm(int rBase, int count) { 825 LOG(FATAL) << "Unexpected use of OpVstm for x86"; 826 return NULL; 827} 828 829void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 830 RegLocation rl_result, int lit, 831 int first_bit, int second_bit) { 832 int t_reg = AllocTemp(); 833 OpRegRegImm(kOpLsl, t_reg, rl_src.low_reg, second_bit - first_bit); 834 OpRegRegReg(kOpAdd, rl_result.low_reg, rl_src.low_reg, t_reg); 835 FreeTemp(t_reg); 836 if (first_bit != 0) { 837 OpRegRegImm(kOpLsl, rl_result.low_reg, rl_result.low_reg, first_bit); 838 } 839} 840 841void X86Mir2Lir::GenDivZeroCheck(int reg_lo, int reg_hi) { 842 // We are not supposed to clobber either of the provided registers, so allocate 843 // a temporary to use for the check. 844 int t_reg = AllocTemp(); 845 846 // Doing an OR is a quick way to check if both registers are zero. This will set the flags. 847 OpRegRegReg(kOpOr, t_reg, reg_lo, reg_hi); 848 849 // In case of zero, throw ArithmeticException. 850 GenCheck(kCondEq, kThrowDivZero); 851 852 // The temp is no longer needed so free it at this time. 853 FreeTemp(t_reg); 854} 855 856// Test suspend flag, return target of taken suspend branch 857LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { 858 OpTlsCmp(Thread::ThreadFlagsOffset(), 0); 859 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); 860} 861 862// Decrement register and branch on condition 863LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) { 864 OpRegImm(kOpSub, reg, 1); 865 return OpCondBranch(c_code, target); 866} 867 868bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 869 RegLocation rl_src, RegLocation rl_dest, int lit) { 870 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; 871 return false; 872} 873 874LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { 875 LOG(FATAL) << "Unexpected use of OpIT in x86"; 876 return NULL; 877} 878 879void X86Mir2Lir::GenImulRegImm(int dest, int src, int val) { 880 switch (val) { 881 case 0: 882 NewLIR2(kX86Xor32RR, dest, dest); 883 break; 884 case 1: 885 OpRegCopy(dest, src); 886 break; 887 default: 888 OpRegRegImm(kOpMul, dest, src, val); 889 break; 890 } 891} 892 893void X86Mir2Lir::GenImulMemImm(int dest, int sreg, int displacement, int val) { 894 LIR *m; 895 switch (val) { 896 case 0: 897 NewLIR2(kX86Xor32RR, dest, dest); 898 break; 899 case 1: 900 LoadBaseDisp(rX86_SP, displacement, dest, kWord, sreg); 901 break; 902 default: 903 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest, rX86_SP, 904 displacement, val); 905 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); 906 break; 907 } 908} 909 910void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 911 RegLocation rl_src2) { 912 if (rl_src1.is_const) { 913 std::swap(rl_src1, rl_src2); 914 } 915 // Are we multiplying by a constant? 916 if (rl_src2.is_const) { 917 // Do special compare/branch against simple const operand 918 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 919 if (val == 0) { 920 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 921 OpRegReg(kOpXor, rl_result.low_reg, rl_result.low_reg); 922 OpRegReg(kOpXor, rl_result.high_reg, rl_result.high_reg); 923 StoreValueWide(rl_dest, rl_result); 924 return; 925 } else if (val == 1) { 926 StoreValueWide(rl_dest, rl_src1); 927 return; 928 } else if (val == 2) { 929 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); 930 return; 931 } else if (IsPowerOfTwo(val)) { 932 int shift_amount = LowestSetBit(val); 933 if (!BadOverlap(rl_src1, rl_dest)) { 934 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 935 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, 936 rl_src1, shift_amount); 937 StoreValueWide(rl_dest, rl_result); 938 return; 939 } 940 } 941 942 // Okay, just bite the bullet and do it. 943 int32_t val_lo = Low32Bits(val); 944 int32_t val_hi = High32Bits(val); 945 FlushAllRegs(); 946 LockCallTemps(); // Prepare for explicit register usage. 947 rl_src1 = UpdateLocWide(rl_src1); 948 bool src1_in_reg = rl_src1.location == kLocPhysReg; 949 int displacement = SRegOffset(rl_src1.s_reg_low); 950 951 // ECX <- 1H * 2L 952 // EAX <- 1L * 2H 953 if (src1_in_reg) { 954 GenImulRegImm(r1, rl_src1.high_reg, val_lo); 955 GenImulRegImm(r0, rl_src1.low_reg, val_hi); 956 } else { 957 GenImulMemImm(r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); 958 GenImulMemImm(r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); 959 } 960 961 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 962 NewLIR2(kX86Add32RR, r1, r0); 963 964 // EAX <- 2L 965 LoadConstantNoClobber(r0, val_lo); 966 967 // EDX:EAX <- 2L * 1L (double precision) 968 if (src1_in_reg) { 969 NewLIR1(kX86Mul32DaR, rl_src1.low_reg); 970 } else { 971 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 972 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 973 true /* is_load */, true /* is_64bit */); 974 } 975 976 // EDX <- EDX + ECX (add high words) 977 NewLIR2(kX86Add32RR, r2, r1); 978 979 // Result is EDX:EAX 980 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, 981 INVALID_SREG, INVALID_SREG}; 982 StoreValueWide(rl_dest, rl_result); 983 return; 984 } 985 986 // Nope. Do it the hard way 987 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. 988 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == 989 mir_graph_->SRegToVReg(rl_src2.s_reg_low); 990 991 FlushAllRegs(); 992 LockCallTemps(); // Prepare for explicit register usage. 993 rl_src1 = UpdateLocWide(rl_src1); 994 rl_src2 = UpdateLocWide(rl_src2); 995 996 // At this point, the VRs are in their home locations. 997 bool src1_in_reg = rl_src1.location == kLocPhysReg; 998 bool src2_in_reg = rl_src2.location == kLocPhysReg; 999 1000 // ECX <- 1H 1001 if (src1_in_reg) { 1002 NewLIR2(kX86Mov32RR, r1, rl_src1.high_reg); 1003 } else { 1004 LoadBaseDisp(rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, r1, 1005 kWord, GetSRegHi(rl_src1.s_reg_low)); 1006 } 1007 1008 if (is_square) { 1009 // Take advantage of the fact that the values are the same. 1010 // ECX <- ECX * 2L (1H * 2L) 1011 if (src2_in_reg) { 1012 NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg); 1013 } else { 1014 int displacement = SRegOffset(rl_src2.s_reg_low); 1015 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1016 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1017 true /* is_load */, true /* is_64bit */); 1018 } 1019 1020 // ECX <- 2*ECX (2H * 1L) + (1H * 2L) 1021 NewLIR2(kX86Add32RR, r1, r1); 1022 } else { 1023 // EAX <- 2H 1024 if (src2_in_reg) { 1025 NewLIR2(kX86Mov32RR, r0, rl_src2.high_reg); 1026 } else { 1027 LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, r0, 1028 kWord, GetSRegHi(rl_src2.s_reg_low)); 1029 } 1030 1031 // EAX <- EAX * 1L (2H * 1L) 1032 if (src1_in_reg) { 1033 NewLIR2(kX86Imul32RR, r0, rl_src1.low_reg); 1034 } else { 1035 int displacement = SRegOffset(rl_src1.s_reg_low); 1036 LIR *m = NewLIR3(kX86Imul32RM, r0, rX86_SP, displacement + LOWORD_OFFSET); 1037 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1038 true /* is_load */, true /* is_64bit */); 1039 } 1040 1041 // ECX <- ECX * 2L (1H * 2L) 1042 if (src2_in_reg) { 1043 NewLIR2(kX86Imul32RR, r1, rl_src2.low_reg); 1044 } else { 1045 int displacement = SRegOffset(rl_src2.s_reg_low); 1046 LIR *m = NewLIR3(kX86Imul32RM, r1, rX86_SP, displacement + LOWORD_OFFSET); 1047 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1048 true /* is_load */, true /* is_64bit */); 1049 } 1050 1051 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1052 NewLIR2(kX86Add32RR, r1, r0); 1053 } 1054 1055 // EAX <- 2L 1056 if (src2_in_reg) { 1057 NewLIR2(kX86Mov32RR, r0, rl_src2.low_reg); 1058 } else { 1059 LoadBaseDisp(rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, r0, 1060 kWord, rl_src2.s_reg_low); 1061 } 1062 1063 // EDX:EAX <- 2L * 1L (double precision) 1064 if (src1_in_reg) { 1065 NewLIR1(kX86Mul32DaR, rl_src1.low_reg); 1066 } else { 1067 int displacement = SRegOffset(rl_src1.s_reg_low); 1068 LIR *m = NewLIR2(kX86Mul32DaM, rX86_SP, displacement + LOWORD_OFFSET); 1069 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1070 true /* is_load */, true /* is_64bit */); 1071 } 1072 1073 // EDX <- EDX + ECX (add high words) 1074 NewLIR2(kX86Add32RR, r2, r1); 1075 1076 // Result is EDX:EAX 1077 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r2, 1078 INVALID_SREG, INVALID_SREG}; 1079 StoreValueWide(rl_dest, rl_result); 1080} 1081 1082void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, 1083 Instruction::Code op) { 1084 DCHECK_EQ(rl_dest.location, kLocPhysReg); 1085 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1086 if (rl_src.location == kLocPhysReg) { 1087 // Both operands are in registers. 1088 if (rl_dest.low_reg == rl_src.high_reg) { 1089 // The registers are the same, so we would clobber it before the use. 1090 int temp_reg = AllocTemp(); 1091 OpRegCopy(temp_reg, rl_dest.low_reg); 1092 rl_src.high_reg = temp_reg; 1093 } 1094 NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg); 1095 1096 x86op = GetOpcode(op, rl_dest, rl_src, true); 1097 NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg); 1098 FreeTemp(rl_src.low_reg); 1099 FreeTemp(rl_src.high_reg); 1100 return; 1101 } 1102 1103 // RHS is in memory. 1104 DCHECK((rl_src.location == kLocDalvikFrame) || 1105 (rl_src.location == kLocCompilerTemp)); 1106 int rBase = TargetReg(kSp); 1107 int displacement = SRegOffset(rl_src.s_reg_low); 1108 1109 LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET); 1110 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1111 true /* is_load */, true /* is64bit */); 1112 x86op = GetOpcode(op, rl_dest, rl_src, true); 1113 lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET); 1114 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1115 true /* is_load */, true /* is64bit */); 1116} 1117 1118void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1119 rl_dest = UpdateLocWide(rl_dest); 1120 if (rl_dest.location == kLocPhysReg) { 1121 // Ensure we are in a register pair 1122 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1123 1124 rl_src = UpdateLocWide(rl_src); 1125 GenLongRegOrMemOp(rl_result, rl_src, op); 1126 StoreFinalValueWide(rl_dest, rl_result); 1127 return; 1128 } 1129 1130 // It wasn't in registers, so it better be in memory. 1131 DCHECK((rl_dest.location == kLocDalvikFrame) || 1132 (rl_dest.location == kLocCompilerTemp)); 1133 rl_src = LoadValueWide(rl_src, kCoreReg); 1134 1135 // Operate directly into memory. 1136 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1137 int rBase = TargetReg(kSp); 1138 int displacement = SRegOffset(rl_dest.s_reg_low); 1139 1140 LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg); 1141 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1142 false /* is_load */, true /* is64bit */); 1143 x86op = GetOpcode(op, rl_dest, rl_src, true); 1144 lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg); 1145 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1146 false /* is_load */, true /* is64bit */); 1147 FreeTemp(rl_src.low_reg); 1148 FreeTemp(rl_src.high_reg); 1149} 1150 1151void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, 1152 RegLocation rl_src2, Instruction::Code op, 1153 bool is_commutative) { 1154 // Is this really a 2 operand operation? 1155 switch (op) { 1156 case Instruction::ADD_LONG_2ADDR: 1157 case Instruction::SUB_LONG_2ADDR: 1158 case Instruction::AND_LONG_2ADDR: 1159 case Instruction::OR_LONG_2ADDR: 1160 case Instruction::XOR_LONG_2ADDR: 1161 GenLongArith(rl_dest, rl_src2, op); 1162 return; 1163 default: 1164 break; 1165 } 1166 1167 if (rl_dest.location == kLocPhysReg) { 1168 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); 1169 1170 // We are about to clobber the LHS, so it needs to be a temp. 1171 rl_result = ForceTempWide(rl_result); 1172 1173 // Perform the operation using the RHS. 1174 rl_src2 = UpdateLocWide(rl_src2); 1175 GenLongRegOrMemOp(rl_result, rl_src2, op); 1176 1177 // And now record that the result is in the temp. 1178 StoreFinalValueWide(rl_dest, rl_result); 1179 return; 1180 } 1181 1182 // It wasn't in registers, so it better be in memory. 1183 DCHECK((rl_dest.location == kLocDalvikFrame) || 1184 (rl_dest.location == kLocCompilerTemp)); 1185 rl_src1 = UpdateLocWide(rl_src1); 1186 rl_src2 = UpdateLocWide(rl_src2); 1187 1188 // Get one of the source operands into temporary register. 1189 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1190 if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) { 1191 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1192 } else if (is_commutative) { 1193 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1194 // We need at least one of them to be a temporary. 1195 if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) { 1196 rl_src1 = ForceTempWide(rl_src1); 1197 } 1198 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1199 } else { 1200 // Need LHS to be the temp. 1201 rl_src1 = ForceTempWide(rl_src1); 1202 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1203 } 1204 1205 StoreFinalValueWide(rl_dest, rl_src1); 1206} 1207 1208void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, 1209 RegLocation rl_src1, RegLocation rl_src2) { 1210 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1211} 1212 1213void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, 1214 RegLocation rl_src1, RegLocation rl_src2) { 1215 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); 1216} 1217 1218void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, 1219 RegLocation rl_src1, RegLocation rl_src2) { 1220 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1221} 1222 1223void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, 1224 RegLocation rl_src1, RegLocation rl_src2) { 1225 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1226} 1227 1228void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, 1229 RegLocation rl_src1, RegLocation rl_src2) { 1230 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1231} 1232 1233void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 1234 rl_src = LoadValueWide(rl_src, kCoreReg); 1235 RegLocation rl_result = ForceTempWide(rl_src); 1236 if (rl_dest.low_reg == rl_src.high_reg) { 1237 // The registers are the same, so we would clobber it before the use. 1238 int temp_reg = AllocTemp(); 1239 OpRegCopy(temp_reg, rl_result.low_reg); 1240 rl_result.high_reg = temp_reg; 1241 } 1242 OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg); // rLow = -rLow 1243 OpRegImm(kOpAdc, rl_result.high_reg, 0); // rHigh = rHigh + CF 1244 OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg); // rHigh = -rHigh 1245 StoreValueWide(rl_dest, rl_result); 1246} 1247 1248void X86Mir2Lir::OpRegThreadMem(OpKind op, int r_dest, ThreadOffset thread_offset) { 1249 X86OpCode opcode = kX86Bkpt; 1250 switch (op) { 1251 case kOpCmp: opcode = kX86Cmp32RT; break; 1252 case kOpMov: opcode = kX86Mov32RT; break; 1253 default: 1254 LOG(FATAL) << "Bad opcode: " << op; 1255 break; 1256 } 1257 NewLIR2(opcode, r_dest, thread_offset.Int32Value()); 1258} 1259 1260/* 1261 * Generate array load 1262 */ 1263void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 1264 RegLocation rl_index, RegLocation rl_dest, int scale) { 1265 RegisterClass reg_class = oat_reg_class_by_size(size); 1266 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1267 RegLocation rl_result; 1268 rl_array = LoadValue(rl_array, kCoreReg); 1269 1270 int data_offset; 1271 if (size == kLong || size == kDouble) { 1272 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1273 } else { 1274 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1275 } 1276 1277 bool constant_index = rl_index.is_const; 1278 int32_t constant_index_value = 0; 1279 if (!constant_index) { 1280 rl_index = LoadValue(rl_index, kCoreReg); 1281 } else { 1282 constant_index_value = mir_graph_->ConstantValue(rl_index); 1283 // If index is constant, just fold it into the data offset 1284 data_offset += constant_index_value << scale; 1285 // treat as non array below 1286 rl_index.low_reg = INVALID_REG; 1287 } 1288 1289 /* null object? */ 1290 GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); 1291 1292 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1293 if (constant_index) { 1294 GenMemImmedCheck(kCondLs, rl_array.low_reg, len_offset, 1295 constant_index_value, kThrowConstantArrayBounds); 1296 } else { 1297 GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, 1298 len_offset, kThrowArrayBounds); 1299 } 1300 } 1301 rl_result = EvalLoc(rl_dest, reg_class, true); 1302 if ((size == kLong) || (size == kDouble)) { 1303 LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_result.low_reg, 1304 rl_result.high_reg, size, INVALID_SREG); 1305 StoreValueWide(rl_dest, rl_result); 1306 } else { 1307 LoadBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, 1308 data_offset, rl_result.low_reg, INVALID_REG, size, 1309 INVALID_SREG); 1310 StoreValue(rl_dest, rl_result); 1311 } 1312} 1313 1314/* 1315 * Generate array store 1316 * 1317 */ 1318void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1319 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1320 RegisterClass reg_class = oat_reg_class_by_size(size); 1321 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1322 int data_offset; 1323 1324 if (size == kLong || size == kDouble) { 1325 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1326 } else { 1327 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1328 } 1329 1330 rl_array = LoadValue(rl_array, kCoreReg); 1331 bool constant_index = rl_index.is_const; 1332 int32_t constant_index_value = 0; 1333 if (!constant_index) { 1334 rl_index = LoadValue(rl_index, kCoreReg); 1335 } else { 1336 // If index is constant, just fold it into the data offset 1337 constant_index_value = mir_graph_->ConstantValue(rl_index); 1338 data_offset += constant_index_value << scale; 1339 // treat as non array below 1340 rl_index.low_reg = INVALID_REG; 1341 } 1342 1343 /* null object? */ 1344 GenNullCheck(rl_array.s_reg_low, rl_array.low_reg, opt_flags); 1345 1346 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1347 if (constant_index) { 1348 GenMemImmedCheck(kCondLs, rl_array.low_reg, len_offset, 1349 constant_index_value, kThrowConstantArrayBounds); 1350 } else { 1351 GenRegMemCheck(kCondUge, rl_index.low_reg, rl_array.low_reg, 1352 len_offset, kThrowArrayBounds); 1353 } 1354 } 1355 if ((size == kLong) || (size == kDouble)) { 1356 rl_src = LoadValueWide(rl_src, reg_class); 1357 } else { 1358 rl_src = LoadValue(rl_src, reg_class); 1359 } 1360 // If the src reg can't be byte accessed, move it to a temp first. 1361 if ((size == kSignedByte || size == kUnsignedByte) && rl_src.low_reg >= 4) { 1362 int temp = AllocTemp(); 1363 OpRegCopy(temp, rl_src.low_reg); 1364 StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, temp, 1365 INVALID_REG, size, INVALID_SREG); 1366 } else { 1367 StoreBaseIndexedDisp(rl_array.low_reg, rl_index.low_reg, scale, data_offset, rl_src.low_reg, 1368 rl_src.high_reg, size, INVALID_SREG); 1369 } 1370 if (card_mark) { 1371 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. 1372 if (!constant_index) { 1373 FreeTemp(rl_index.low_reg); 1374 } 1375 MarkGCCard(rl_src.low_reg, rl_array.low_reg); 1376 } 1377} 1378 1379RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1380 RegLocation rl_src, int shift_amount) { 1381 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1382 switch (opcode) { 1383 case Instruction::SHL_LONG: 1384 case Instruction::SHL_LONG_2ADDR: 1385 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. 1386 if (shift_amount == 32) { 1387 OpRegCopy(rl_result.high_reg, rl_src.low_reg); 1388 LoadConstant(rl_result.low_reg, 0); 1389 } else if (shift_amount > 31) { 1390 OpRegCopy(rl_result.high_reg, rl_src.low_reg); 1391 FreeTemp(rl_src.high_reg); 1392 NewLIR2(kX86Sal32RI, rl_result.high_reg, shift_amount - 32); 1393 LoadConstant(rl_result.low_reg, 0); 1394 } else { 1395 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1396 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1397 NewLIR3(kX86Shld32RRI, rl_result.high_reg, rl_result.low_reg, shift_amount); 1398 NewLIR2(kX86Sal32RI, rl_result.low_reg, shift_amount); 1399 } 1400 break; 1401 case Instruction::SHR_LONG: 1402 case Instruction::SHR_LONG_2ADDR: 1403 if (shift_amount == 32) { 1404 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1405 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1406 NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); 1407 } else if (shift_amount > 31) { 1408 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1409 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1410 NewLIR2(kX86Sar32RI, rl_result.low_reg, shift_amount - 32); 1411 NewLIR2(kX86Sar32RI, rl_result.high_reg, 31); 1412 } else { 1413 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1414 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1415 NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); 1416 NewLIR2(kX86Sar32RI, rl_result.high_reg, shift_amount); 1417 } 1418 break; 1419 case Instruction::USHR_LONG: 1420 case Instruction::USHR_LONG_2ADDR: 1421 if (shift_amount == 32) { 1422 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1423 LoadConstant(rl_result.high_reg, 0); 1424 } else if (shift_amount > 31) { 1425 OpRegCopy(rl_result.low_reg, rl_src.high_reg); 1426 NewLIR2(kX86Shr32RI, rl_result.low_reg, shift_amount - 32); 1427 LoadConstant(rl_result.high_reg, 0); 1428 } else { 1429 OpRegCopy(rl_result.low_reg, rl_src.low_reg); 1430 OpRegCopy(rl_result.high_reg, rl_src.high_reg); 1431 NewLIR3(kX86Shrd32RRI, rl_result.low_reg, rl_result.high_reg, shift_amount); 1432 NewLIR2(kX86Shr32RI, rl_result.high_reg, shift_amount); 1433 } 1434 break; 1435 default: 1436 LOG(FATAL) << "Unexpected case"; 1437 } 1438 return rl_result; 1439} 1440 1441void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1442 RegLocation rl_src, RegLocation rl_shift) { 1443 // Per spec, we only care about low 6 bits of shift amount. 1444 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 1445 if (shift_amount == 0) { 1446 rl_src = LoadValueWide(rl_src, kCoreReg); 1447 StoreValueWide(rl_dest, rl_src); 1448 return; 1449 } else if (shift_amount == 1 && 1450 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { 1451 // Need to handle this here to avoid calling StoreValueWide twice. 1452 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); 1453 return; 1454 } 1455 if (BadOverlap(rl_src, rl_dest)) { 1456 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 1457 return; 1458 } 1459 rl_src = LoadValueWide(rl_src, kCoreReg); 1460 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount); 1461 StoreValueWide(rl_dest, rl_result); 1462} 1463 1464void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, 1465 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 1466 switch (opcode) { 1467 case Instruction::ADD_LONG: 1468 case Instruction::AND_LONG: 1469 case Instruction::OR_LONG: 1470 case Instruction::XOR_LONG: 1471 if (rl_src2.is_const) { 1472 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1473 } else { 1474 DCHECK(rl_src1.is_const); 1475 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1476 } 1477 break; 1478 case Instruction::SUB_LONG: 1479 case Instruction::SUB_LONG_2ADDR: 1480 if (rl_src2.is_const) { 1481 GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 1482 } else { 1483 GenSubLong(opcode, rl_dest, rl_src1, rl_src2); 1484 } 1485 break; 1486 case Instruction::ADD_LONG_2ADDR: 1487 case Instruction::OR_LONG_2ADDR: 1488 case Instruction::XOR_LONG_2ADDR: 1489 case Instruction::AND_LONG_2ADDR: 1490 if (rl_src2.is_const) { 1491 GenLongImm(rl_dest, rl_src2, opcode); 1492 } else { 1493 DCHECK(rl_src1.is_const); 1494 GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 1495 } 1496 break; 1497 default: 1498 // Default - bail to non-const handler. 1499 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 1500 break; 1501 } 1502} 1503 1504bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { 1505 switch (op) { 1506 case Instruction::AND_LONG_2ADDR: 1507 case Instruction::AND_LONG: 1508 return value == -1; 1509 case Instruction::OR_LONG: 1510 case Instruction::OR_LONG_2ADDR: 1511 case Instruction::XOR_LONG: 1512 case Instruction::XOR_LONG_2ADDR: 1513 return value == 0; 1514 default: 1515 return false; 1516 } 1517} 1518 1519X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, 1520 bool is_high_op) { 1521 bool rhs_in_mem = rhs.location != kLocPhysReg; 1522 bool dest_in_mem = dest.location != kLocPhysReg; 1523 DCHECK(!rhs_in_mem || !dest_in_mem); 1524 switch (op) { 1525 case Instruction::ADD_LONG: 1526 case Instruction::ADD_LONG_2ADDR: 1527 if (dest_in_mem) { 1528 return is_high_op ? kX86Adc32MR : kX86Add32MR; 1529 } else if (rhs_in_mem) { 1530 return is_high_op ? kX86Adc32RM : kX86Add32RM; 1531 } 1532 return is_high_op ? kX86Adc32RR : kX86Add32RR; 1533 case Instruction::SUB_LONG: 1534 case Instruction::SUB_LONG_2ADDR: 1535 if (dest_in_mem) { 1536 return is_high_op ? kX86Sbb32MR : kX86Sub32MR; 1537 } else if (rhs_in_mem) { 1538 return is_high_op ? kX86Sbb32RM : kX86Sub32RM; 1539 } 1540 return is_high_op ? kX86Sbb32RR : kX86Sub32RR; 1541 case Instruction::AND_LONG_2ADDR: 1542 case Instruction::AND_LONG: 1543 if (dest_in_mem) { 1544 return kX86And32MR; 1545 } 1546 return rhs_in_mem ? kX86And32RM : kX86And32RR; 1547 case Instruction::OR_LONG: 1548 case Instruction::OR_LONG_2ADDR: 1549 if (dest_in_mem) { 1550 return kX86Or32MR; 1551 } 1552 return rhs_in_mem ? kX86Or32RM : kX86Or32RR; 1553 case Instruction::XOR_LONG: 1554 case Instruction::XOR_LONG_2ADDR: 1555 if (dest_in_mem) { 1556 return kX86Xor32MR; 1557 } 1558 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; 1559 default: 1560 LOG(FATAL) << "Unexpected opcode: " << op; 1561 return kX86Add32RR; 1562 } 1563} 1564 1565X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, 1566 int32_t value) { 1567 bool in_mem = loc.location != kLocPhysReg; 1568 bool byte_imm = IS_SIMM8(value); 1569 DCHECK(in_mem || !IsFpReg(loc.low_reg)); 1570 switch (op) { 1571 case Instruction::ADD_LONG: 1572 case Instruction::ADD_LONG_2ADDR: 1573 if (byte_imm) { 1574 if (in_mem) { 1575 return is_high_op ? kX86Adc32MI8 : kX86Add32MI8; 1576 } 1577 return is_high_op ? kX86Adc32RI8 : kX86Add32RI8; 1578 } 1579 if (in_mem) { 1580 return is_high_op ? kX86Adc32MI : kX86Add32MI; 1581 } 1582 return is_high_op ? kX86Adc32RI : kX86Add32RI; 1583 case Instruction::SUB_LONG: 1584 case Instruction::SUB_LONG_2ADDR: 1585 if (byte_imm) { 1586 if (in_mem) { 1587 return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; 1588 } 1589 return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; 1590 } 1591 if (in_mem) { 1592 return is_high_op ? kX86Sbb32MI : kX86Sub32MI; 1593 } 1594 return is_high_op ? kX86Sbb32RI : kX86Sub32RI; 1595 case Instruction::AND_LONG_2ADDR: 1596 case Instruction::AND_LONG: 1597 if (byte_imm) { 1598 return in_mem ? kX86And32MI8 : kX86And32RI8; 1599 } 1600 return in_mem ? kX86And32MI : kX86And32RI; 1601 case Instruction::OR_LONG: 1602 case Instruction::OR_LONG_2ADDR: 1603 if (byte_imm) { 1604 return in_mem ? kX86Or32MI8 : kX86Or32RI8; 1605 } 1606 return in_mem ? kX86Or32MI : kX86Or32RI; 1607 case Instruction::XOR_LONG: 1608 case Instruction::XOR_LONG_2ADDR: 1609 if (byte_imm) { 1610 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; 1611 } 1612 return in_mem ? kX86Xor32MI : kX86Xor32RI; 1613 default: 1614 LOG(FATAL) << "Unexpected opcode: " << op; 1615 return kX86Add32MI; 1616 } 1617} 1618 1619void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1620 DCHECK(rl_src.is_const); 1621 int64_t val = mir_graph_->ConstantValueWide(rl_src); 1622 int32_t val_lo = Low32Bits(val); 1623 int32_t val_hi = High32Bits(val); 1624 rl_dest = UpdateLocWide(rl_dest); 1625 1626 // Can we just do this into memory? 1627 if ((rl_dest.location == kLocDalvikFrame) || 1628 (rl_dest.location == kLocCompilerTemp)) { 1629 int rBase = TargetReg(kSp); 1630 int displacement = SRegOffset(rl_dest.s_reg_low); 1631 1632 if (!IsNoOp(op, val_lo)) { 1633 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1634 LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo); 1635 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1636 false /* is_load */, true /* is64bit */); 1637 } 1638 if (!IsNoOp(op, val_hi)) { 1639 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1640 LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi); 1641 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1642 false /* is_load */, true /* is64bit */); 1643 } 1644 return; 1645 } 1646 1647 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1648 DCHECK_EQ(rl_result.location, kLocPhysReg); 1649 DCHECK(!IsFpReg(rl_result.low_reg)); 1650 1651 if (!IsNoOp(op, val_lo)) { 1652 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1653 NewLIR2(x86op, rl_result.low_reg, val_lo); 1654 } 1655 if (!IsNoOp(op, val_hi)) { 1656 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1657 NewLIR2(x86op, rl_result.high_reg, val_hi); 1658 } 1659 StoreValueWide(rl_dest, rl_result); 1660} 1661 1662void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, 1663 RegLocation rl_src2, Instruction::Code op) { 1664 DCHECK(rl_src2.is_const); 1665 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1666 int32_t val_lo = Low32Bits(val); 1667 int32_t val_hi = High32Bits(val); 1668 rl_dest = UpdateLocWide(rl_dest); 1669 rl_src1 = UpdateLocWide(rl_src1); 1670 1671 // Can we do this directly into the destination registers? 1672 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && 1673 rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg && 1674 !IsFpReg(rl_dest.low_reg)) { 1675 if (!IsNoOp(op, val_lo)) { 1676 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 1677 NewLIR2(x86op, rl_dest.low_reg, val_lo); 1678 } 1679 if (!IsNoOp(op, val_hi)) { 1680 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 1681 NewLIR2(x86op, rl_dest.high_reg, val_hi); 1682 } 1683 1684 StoreFinalValueWide(rl_dest, rl_dest); 1685 return; 1686 } 1687 1688 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1689 DCHECK_EQ(rl_src1.location, kLocPhysReg); 1690 1691 // We need the values to be in a temporary 1692 RegLocation rl_result = ForceTempWide(rl_src1); 1693 if (!IsNoOp(op, val_lo)) { 1694 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 1695 NewLIR2(x86op, rl_result.low_reg, val_lo); 1696 } 1697 if (!IsNoOp(op, val_hi)) { 1698 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 1699 NewLIR2(x86op, rl_result.high_reg, val_hi); 1700 } 1701 1702 StoreFinalValueWide(rl_dest, rl_result); 1703} 1704 1705// For final classes there are no sub-classes to check and so we can answer the instance-of 1706// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. 1707void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, 1708 RegLocation rl_dest, RegLocation rl_src) { 1709 RegLocation object = LoadValue(rl_src, kCoreReg); 1710 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1711 int result_reg = rl_result.low_reg; 1712 1713 // SETcc only works with EAX..EDX. 1714 if (result_reg == object.low_reg || result_reg >= 4) { 1715 result_reg = AllocTypedTemp(false, kCoreReg); 1716 DCHECK_LT(result_reg, 4); 1717 } 1718 1719 // Assume that there is no match. 1720 LoadConstant(result_reg, 0); 1721 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.low_reg, 0, NULL); 1722 1723 int check_class = AllocTypedTemp(false, kCoreReg); 1724 1725 // If Method* is already in a register, we can save a copy. 1726 RegLocation rl_method = mir_graph_->GetMethodLoc(); 1727 int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + 1728 (sizeof(mirror::Class*) * type_idx); 1729 1730 if (rl_method.location == kLocPhysReg) { 1731 if (use_declaring_class) { 1732 LoadWordDisp(rl_method.low_reg, 1733 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1734 check_class); 1735 } else { 1736 LoadWordDisp(rl_method.low_reg, 1737 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1738 check_class); 1739 LoadWordDisp(check_class, offset_of_type, check_class); 1740 } 1741 } else { 1742 LoadCurrMethodDirect(check_class); 1743 if (use_declaring_class) { 1744 LoadWordDisp(check_class, 1745 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 1746 check_class); 1747 } else { 1748 LoadWordDisp(check_class, 1749 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 1750 check_class); 1751 LoadWordDisp(check_class, offset_of_type, check_class); 1752 } 1753 } 1754 1755 // Compare the computed class to the class in the object. 1756 DCHECK_EQ(object.location, kLocPhysReg); 1757 OpRegMem(kOpCmp, check_class, object.low_reg, 1758 mirror::Object::ClassOffset().Int32Value()); 1759 1760 // Set the low byte of the result to 0 or 1 from the compare condition code. 1761 NewLIR2(kX86Set8R, result_reg, kX86CondEq); 1762 1763 LIR* target = NewLIR0(kPseudoTargetLabel); 1764 null_branchover->target = target; 1765 FreeTemp(check_class); 1766 if (IsTemp(result_reg)) { 1767 OpRegCopy(rl_result.low_reg, result_reg); 1768 FreeTemp(result_reg); 1769 } 1770 StoreValue(rl_dest, rl_result); 1771} 1772 1773void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_known_final, 1774 bool type_known_abstract, bool use_declaring_class, 1775 bool can_assume_type_is_in_dex_cache, 1776 uint32_t type_idx, RegLocation rl_dest, 1777 RegLocation rl_src) { 1778 FlushAllRegs(); 1779 // May generate a call - use explicit registers. 1780 LockCallTemps(); 1781 LoadCurrMethodDirect(TargetReg(kArg1)); // kArg1 gets current Method*. 1782 int class_reg = TargetReg(kArg2); // kArg2 will hold the Class*. 1783 // Reference must end up in kArg0. 1784 if (needs_access_check) { 1785 // Check we have access to type_idx and if not throw IllegalAccessError, 1786 // Caller function returns Class* in kArg0. 1787 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeTypeAndVerifyAccess), 1788 type_idx, true); 1789 OpRegCopy(class_reg, TargetReg(kRet0)); 1790 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1791 } else if (use_declaring_class) { 1792 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1793 LoadWordDisp(TargetReg(kArg1), 1794 mirror::ArtMethod::DeclaringClassOffset().Int32Value(), class_reg); 1795 } else { 1796 // Load dex cache entry into class_reg (kArg2). 1797 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); 1798 LoadWordDisp(TargetReg(kArg1), 1799 mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg); 1800 int32_t offset_of_type = 1801 mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*) 1802 * type_idx); 1803 LoadWordDisp(class_reg, offset_of_type, class_reg); 1804 if (!can_assume_type_is_in_dex_cache) { 1805 // Need to test presence of type in dex cache at runtime. 1806 LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL); 1807 // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0. 1808 CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(pInitializeType), type_idx, true); 1809 OpRegCopy(TargetReg(kArg2), TargetReg(kRet0)); // Align usage with fast path. 1810 LoadValueDirectFixed(rl_src, TargetReg(kArg0)); /* Reload Ref. */ 1811 // Rejoin code paths 1812 LIR* hop_target = NewLIR0(kPseudoTargetLabel); 1813 hop_branch->target = hop_target; 1814 } 1815 } 1816 /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */ 1817 RegLocation rl_result = GetReturn(false); 1818 1819 // SETcc only works with EAX..EDX. 1820 DCHECK_LT(rl_result.low_reg, 4); 1821 1822 // Is the class NULL? 1823 LIR* branch1 = OpCmpImmBranch(kCondEq, TargetReg(kArg0), 0, NULL); 1824 1825 /* Load object->klass_. */ 1826 DCHECK_EQ(mirror::Object::ClassOffset().Int32Value(), 0); 1827 LoadWordDisp(TargetReg(kArg0), mirror::Object::ClassOffset().Int32Value(), TargetReg(kArg1)); 1828 /* kArg0 is ref, kArg1 is ref->klass_, kArg2 is class. */ 1829 LIR* branchover = nullptr; 1830 if (type_known_final) { 1831 // Ensure top 3 bytes of result are 0. 1832 LoadConstant(rl_result.low_reg, 0); 1833 OpRegReg(kOpCmp, TargetReg(kArg1), TargetReg(kArg2)); 1834 // Set the low byte of the result to 0 or 1 from the compare condition code. 1835 NewLIR2(kX86Set8R, rl_result.low_reg, kX86CondEq); 1836 } else { 1837 if (!type_known_abstract) { 1838 LoadConstant(rl_result.low_reg, 1); // Assume result succeeds. 1839 branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL); 1840 } 1841 OpRegCopy(TargetReg(kArg0), TargetReg(kArg2)); 1842 OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(pInstanceofNonTrivial)); 1843 } 1844 // TODO: only clobber when type isn't final? 1845 ClobberCallerSave(); 1846 /* Branch targets here. */ 1847 LIR* target = NewLIR0(kPseudoTargetLabel); 1848 StoreValue(rl_dest, rl_result); 1849 branch1->target = target; 1850 if (branchover != nullptr) { 1851 branchover->target = target; 1852 } 1853} 1854 1855void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, 1856 RegLocation rl_lhs, RegLocation rl_rhs) { 1857 OpKind op = kOpBkpt; 1858 bool is_div_rem = false; 1859 bool unary = false; 1860 bool shift_op = false; 1861 bool is_two_addr = false; 1862 RegLocation rl_result; 1863 switch (opcode) { 1864 case Instruction::NEG_INT: 1865 op = kOpNeg; 1866 unary = true; 1867 break; 1868 case Instruction::NOT_INT: 1869 op = kOpMvn; 1870 unary = true; 1871 break; 1872 case Instruction::ADD_INT_2ADDR: 1873 is_two_addr = true; 1874 // Fallthrough 1875 case Instruction::ADD_INT: 1876 op = kOpAdd; 1877 break; 1878 case Instruction::SUB_INT_2ADDR: 1879 is_two_addr = true; 1880 // Fallthrough 1881 case Instruction::SUB_INT: 1882 op = kOpSub; 1883 break; 1884 case Instruction::MUL_INT_2ADDR: 1885 is_two_addr = true; 1886 // Fallthrough 1887 case Instruction::MUL_INT: 1888 op = kOpMul; 1889 break; 1890 case Instruction::DIV_INT_2ADDR: 1891 is_two_addr = true; 1892 // Fallthrough 1893 case Instruction::DIV_INT: 1894 op = kOpDiv; 1895 is_div_rem = true; 1896 break; 1897 /* NOTE: returns in kArg1 */ 1898 case Instruction::REM_INT_2ADDR: 1899 is_two_addr = true; 1900 // Fallthrough 1901 case Instruction::REM_INT: 1902 op = kOpRem; 1903 is_div_rem = true; 1904 break; 1905 case Instruction::AND_INT_2ADDR: 1906 is_two_addr = true; 1907 // Fallthrough 1908 case Instruction::AND_INT: 1909 op = kOpAnd; 1910 break; 1911 case Instruction::OR_INT_2ADDR: 1912 is_two_addr = true; 1913 // Fallthrough 1914 case Instruction::OR_INT: 1915 op = kOpOr; 1916 break; 1917 case Instruction::XOR_INT_2ADDR: 1918 is_two_addr = true; 1919 // Fallthrough 1920 case Instruction::XOR_INT: 1921 op = kOpXor; 1922 break; 1923 case Instruction::SHL_INT_2ADDR: 1924 is_two_addr = true; 1925 // Fallthrough 1926 case Instruction::SHL_INT: 1927 shift_op = true; 1928 op = kOpLsl; 1929 break; 1930 case Instruction::SHR_INT_2ADDR: 1931 is_two_addr = true; 1932 // Fallthrough 1933 case Instruction::SHR_INT: 1934 shift_op = true; 1935 op = kOpAsr; 1936 break; 1937 case Instruction::USHR_INT_2ADDR: 1938 is_two_addr = true; 1939 // Fallthrough 1940 case Instruction::USHR_INT: 1941 shift_op = true; 1942 op = kOpLsr; 1943 break; 1944 default: 1945 LOG(FATAL) << "Invalid word arith op: " << opcode; 1946 } 1947 1948 // Can we convert to a two address instruction? 1949 if (!is_two_addr && 1950 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 1951 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { 1952 is_two_addr = true; 1953 } 1954 1955 // Get the div/rem stuff out of the way. 1956 if (is_div_rem) { 1957 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true); 1958 StoreValue(rl_dest, rl_result); 1959 return; 1960 } 1961 1962 if (unary) { 1963 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1964 rl_result = UpdateLoc(rl_dest); 1965 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1966 OpRegReg(op, rl_result.low_reg, rl_lhs.low_reg); 1967 } else { 1968 if (shift_op) { 1969 // X86 doesn't require masking and must use ECX. 1970 int t_reg = TargetReg(kCount); // rCX 1971 LoadValueDirectFixed(rl_rhs, t_reg); 1972 if (is_two_addr) { 1973 // Can we do this directly into memory? 1974 rl_result = UpdateLoc(rl_dest); 1975 rl_rhs = LoadValue(rl_rhs, kCoreReg); 1976 if (rl_result.location != kLocPhysReg) { 1977 // Okay, we can do this into memory 1978 OpMemReg(op, rl_result, t_reg); 1979 FreeTemp(t_reg); 1980 return; 1981 } else if (!IsFpReg(rl_result.low_reg)) { 1982 // Can do this directly into the result register 1983 OpRegReg(op, rl_result.low_reg, t_reg); 1984 FreeTemp(t_reg); 1985 StoreFinalValue(rl_dest, rl_result); 1986 return; 1987 } 1988 } 1989 // Three address form, or we can't do directly. 1990 rl_lhs = LoadValue(rl_lhs, kCoreReg); 1991 rl_result = EvalLoc(rl_dest, kCoreReg, true); 1992 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, t_reg); 1993 FreeTemp(t_reg); 1994 } else { 1995 // Multiply is 3 operand only (sort of). 1996 if (is_two_addr && op != kOpMul) { 1997 // Can we do this directly into memory? 1998 rl_result = UpdateLoc(rl_dest); 1999 if (rl_result.location == kLocPhysReg) { 2000 // Can we do this from memory directly? 2001 rl_rhs = UpdateLoc(rl_rhs); 2002 if (rl_rhs.location != kLocPhysReg) { 2003 OpRegMem(op, rl_result.low_reg, rl_rhs); 2004 StoreFinalValue(rl_dest, rl_result); 2005 return; 2006 } else if (!IsFpReg(rl_rhs.low_reg)) { 2007 OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg); 2008 StoreFinalValue(rl_dest, rl_result); 2009 return; 2010 } 2011 } 2012 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2013 if (rl_result.location != kLocPhysReg) { 2014 // Okay, we can do this into memory. 2015 OpMemReg(op, rl_result, rl_rhs.low_reg); 2016 return; 2017 } else if (!IsFpReg(rl_result.low_reg)) { 2018 // Can do this directly into the result register. 2019 OpRegReg(op, rl_result.low_reg, rl_rhs.low_reg); 2020 StoreFinalValue(rl_dest, rl_result); 2021 return; 2022 } else { 2023 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2024 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2025 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2026 } 2027 } else { 2028 // Try to use reg/memory instructions. 2029 rl_lhs = UpdateLoc(rl_lhs); 2030 rl_rhs = UpdateLoc(rl_rhs); 2031 // We can't optimize with FP registers. 2032 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { 2033 // Something is difficult, so fall back to the standard case. 2034 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2035 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2036 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2037 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2038 } else { 2039 // We can optimize by moving to result and using memory operands. 2040 if (rl_rhs.location != kLocPhysReg) { 2041 // Force LHS into result. 2042 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2043 LoadValueDirect(rl_lhs, rl_result.low_reg); 2044 OpRegMem(op, rl_result.low_reg, rl_rhs); 2045 } else if (rl_lhs.location != kLocPhysReg) { 2046 // RHS is in a register; LHS is in memory. 2047 if (op != kOpSub) { 2048 // Force RHS into result and operate on memory. 2049 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2050 OpRegCopy(rl_result.low_reg, rl_rhs.low_reg); 2051 OpRegMem(op, rl_result.low_reg, rl_lhs); 2052 } else { 2053 // Subtraction isn't commutative. 2054 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2055 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2056 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2057 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2058 } 2059 } else { 2060 // Both are in registers. 2061 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2062 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2063 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2064 OpRegRegReg(op, rl_result.low_reg, rl_lhs.low_reg, rl_rhs.low_reg); 2065 } 2066 } 2067 } 2068 } 2069 } 2070 StoreValue(rl_dest, rl_result); 2071} 2072 2073bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { 2074 // If we have non-core registers, then we can't do good things. 2075 if (rl_lhs.location == kLocPhysReg && IsFpReg(rl_lhs.low_reg)) { 2076 return false; 2077 } 2078 if (rl_rhs.location == kLocPhysReg && IsFpReg(rl_rhs.low_reg)) { 2079 return false; 2080 } 2081 2082 // Everything will be fine :-). 2083 return true; 2084} 2085} // namespace art 2086