int_x86.cc revision 9ee4519afd97121f893f82d41d23164fc6c9ed34
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* This file contains codegen for the X86 ISA */ 18 19#include "codegen_x86.h" 20#include "dex/quick/mir_to_lir-inl.h" 21#include "dex/reg_storage_eq.h" 22#include "mirror/art_method.h" 23#include "mirror/array.h" 24#include "x86_lir.h" 25 26namespace art { 27 28/* 29 * Compare two 64-bit values 30 * x = y return 0 31 * x < y return -1 32 * x > y return 1 33 */ 34void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, 35 RegLocation rl_src2) { 36 if (cu_->target64) { 37 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 38 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 39 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 40 RegStorage temp_reg = AllocTemp(); 41 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 42 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0 43 NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1 44 NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg()); 45 NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 46 47 StoreValue(rl_dest, rl_result); 48 FreeTemp(temp_reg); 49 return; 50 } 51 52 FlushAllRegs(); 53 LockCallTemps(); // Prepare for explicit register usage 54 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); 55 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); 56 LoadValueDirectWideFixed(rl_src1, r_tmp1); 57 LoadValueDirectWideFixed(rl_src2, r_tmp2); 58 // Compute (r1:r0) = (r1:r0) - (r3:r2) 59 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 60 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF 61 NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 62 NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg()); 63 OpReg(kOpNeg, rs_r2); // r2 = -r2 64 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF 65 NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 66 NewLIR2(kX86Movzx8RR, r0, r0); 67 OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2 68 RegLocation rl_result = LocCReturn(); 69 StoreValue(rl_dest, rl_result); 70} 71 72X86ConditionCode X86ConditionEncoding(ConditionCode cond) { 73 switch (cond) { 74 case kCondEq: return kX86CondEq; 75 case kCondNe: return kX86CondNe; 76 case kCondCs: return kX86CondC; 77 case kCondCc: return kX86CondNc; 78 case kCondUlt: return kX86CondC; 79 case kCondUge: return kX86CondNc; 80 case kCondMi: return kX86CondS; 81 case kCondPl: return kX86CondNs; 82 case kCondVs: return kX86CondO; 83 case kCondVc: return kX86CondNo; 84 case kCondHi: return kX86CondA; 85 case kCondLs: return kX86CondBe; 86 case kCondGe: return kX86CondGe; 87 case kCondLt: return kX86CondL; 88 case kCondGt: return kX86CondG; 89 case kCondLe: return kX86CondLe; 90 case kCondAl: 91 case kCondNv: LOG(FATAL) << "Should not reach here"; 92 } 93 return kX86CondO; 94} 95 96LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { 97 NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg()); 98 X86ConditionCode cc = X86ConditionEncoding(cond); 99 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , 100 cc); 101 branch->target = target; 102 return branch; 103} 104 105LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, 106 int check_value, LIR* target) { 107 if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { 108 // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode 109 NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg()); 110 } else { 111 if (reg.Is64Bit()) { 112 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value); 113 } else { 114 NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value); 115 } 116 } 117 X86ConditionCode cc = X86ConditionEncoding(cond); 118 LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); 119 branch->target = target; 120 return branch; 121} 122 123LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { 124 // If src or dest is a pair, we'll be using low reg. 125 if (r_dest.IsPair()) { 126 r_dest = r_dest.GetLow(); 127 } 128 if (r_src.IsPair()) { 129 r_src = r_src.GetLow(); 130 } 131 if (r_dest.IsFloat() || r_src.IsFloat()) 132 return OpFpRegCopy(r_dest, r_src); 133 LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR, 134 r_dest.GetReg(), r_src.GetReg()); 135 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { 136 res->flags.is_nop = true; 137 } 138 return res; 139} 140 141void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { 142 if (r_dest != r_src) { 143 LIR *res = OpRegCopyNoInsert(r_dest, r_src); 144 AppendLIR(res); 145 } 146} 147 148void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { 149 if (r_dest != r_src) { 150 bool dest_fp = r_dest.IsFloat(); 151 bool src_fp = r_src.IsFloat(); 152 if (dest_fp) { 153 if (src_fp) { 154 OpRegCopy(r_dest, r_src); 155 } else { 156 // TODO: Prevent this from happening in the code. The result is often 157 // unused or could have been loaded more easily from memory. 158 if (!r_src.IsPair()) { 159 DCHECK(!r_dest.IsPair()); 160 NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg()); 161 } else { 162 NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); 163 RegStorage r_tmp = AllocTempDouble(); 164 NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); 165 NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); 166 FreeTemp(r_tmp); 167 } 168 } 169 } else { 170 if (src_fp) { 171 if (!r_dest.IsPair()) { 172 DCHECK(!r_src.IsPair()); 173 NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg()); 174 } else { 175 NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); 176 RegStorage temp_reg = AllocTempDouble(); 177 NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); 178 NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); 179 NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); 180 } 181 } else { 182 DCHECK_EQ(r_dest.IsPair(), r_src.IsPair()); 183 if (!r_src.IsPair()) { 184 // Just copy the register directly. 185 OpRegCopy(r_dest, r_src); 186 } else { 187 // Handle overlap 188 if (r_src.GetHighReg() == r_dest.GetLowReg() && 189 r_src.GetLowReg() == r_dest.GetHighReg()) { 190 // Deal with cycles. 191 RegStorage temp_reg = AllocTemp(); 192 OpRegCopy(temp_reg, r_dest.GetHigh()); 193 OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); 194 OpRegCopy(r_dest.GetLow(), temp_reg); 195 FreeTemp(temp_reg); 196 } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { 197 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 198 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 199 } else { 200 OpRegCopy(r_dest.GetLow(), r_src.GetLow()); 201 OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); 202 } 203 } 204 } 205 } 206 } 207} 208 209void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, 210 int32_t true_val, int32_t false_val, RegStorage rs_dest, 211 int dest_reg_class) { 212 DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair()); 213 DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat()); 214 215 // We really need this check for correctness, otherwise we will need to do more checks in 216 // non zero/one case 217 if (true_val == false_val) { 218 LoadConstantNoClobber(rs_dest, true_val); 219 return; 220 } 221 222 const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op); 223 224 const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0); 225 if (zero_one_case && IsByteRegister(rs_dest)) { 226 if (!dest_intersect) { 227 LoadConstantNoClobber(rs_dest, 0); 228 } 229 OpRegReg(kOpCmp, left_op, right_op); 230 // Set the low byte of the result to 0 or 1 from the compare condition code. 231 NewLIR2(kX86Set8R, rs_dest.GetReg(), 232 X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code))); 233 if (dest_intersect) { 234 NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg()); 235 } 236 } else { 237 // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops 238 // and it cannot use xor because it makes cc flags to be dirty 239 RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false); 240 if (temp_reg.Valid()) { 241 if (false_val == 0 && dest_intersect) { 242 code = FlipComparisonOrder(code); 243 std::swap(true_val, false_val); 244 } 245 if (!dest_intersect) { 246 LoadConstantNoClobber(rs_dest, false_val); 247 } 248 LoadConstantNoClobber(temp_reg, true_val); 249 OpRegReg(kOpCmp, left_op, right_op); 250 if (dest_intersect) { 251 LoadConstantNoClobber(rs_dest, false_val); 252 DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); 253 } 254 OpCondRegReg(kOpCmov, code, rs_dest, temp_reg); 255 FreeTemp(temp_reg); 256 } else { 257 // slow path 258 LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr); 259 LoadConstantNoClobber(rs_dest, false_val); 260 LIR* that_is_it = NewLIR1(kX86Jmp8, 0); 261 LIR* true_case = NewLIR0(kPseudoTargetLabel); 262 cmp_branch->target = true_case; 263 LoadConstantNoClobber(rs_dest, true_val); 264 LIR* end = NewLIR0(kPseudoTargetLabel); 265 that_is_it->target = end; 266 } 267 } 268} 269 270void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { 271 RegLocation rl_result; 272 RegLocation rl_src = mir_graph_->GetSrc(mir, 0); 273 RegLocation rl_dest = mir_graph_->GetDest(mir); 274 // Avoid using float regs here. 275 RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; 276 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; 277 rl_src = LoadValue(rl_src, src_reg_class); 278 ConditionCode ccode = mir->meta.ccode; 279 280 // The kMirOpSelect has two variants, one for constants and one for moves. 281 const bool is_constant_case = (mir->ssa_rep->num_uses == 1); 282 283 if (is_constant_case) { 284 int true_val = mir->dalvikInsn.vB; 285 int false_val = mir->dalvikInsn.vC; 286 rl_result = EvalLoc(rl_dest, result_reg_class, true); 287 288 /* 289 * For ccode == kCondEq: 290 * 291 * 1) When the true case is zero and result_reg is not same as src_reg: 292 * xor result_reg, result_reg 293 * cmp $0, src_reg 294 * mov t1, $false_case 295 * cmovnz result_reg, t1 296 * 2) When the false case is zero and result_reg is not same as src_reg: 297 * xor result_reg, result_reg 298 * cmp $0, src_reg 299 * mov t1, $true_case 300 * cmovz result_reg, t1 301 * 3) All other cases (we do compare first to set eflags): 302 * cmp $0, src_reg 303 * mov result_reg, $false_case 304 * mov t1, $true_case 305 * cmovz result_reg, t1 306 */ 307 // FIXME: depending on how you use registers you could get a false != mismatch when dealing 308 // with different views of the same underlying physical resource (i.e. solo32 vs. solo64). 309 const bool result_reg_same_as_src = 310 (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum()); 311 const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); 312 const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); 313 const bool catch_all_case = !(true_zero_case || false_zero_case); 314 315 if (true_zero_case || false_zero_case) { 316 OpRegReg(kOpXor, rl_result.reg, rl_result.reg); 317 } 318 319 if (true_zero_case || false_zero_case || catch_all_case) { 320 OpRegImm(kOpCmp, rl_src.reg, 0); 321 } 322 323 if (catch_all_case) { 324 OpRegImm(kOpMov, rl_result.reg, false_val); 325 } 326 327 if (true_zero_case || false_zero_case || catch_all_case) { 328 ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode; 329 int immediateForTemp = true_zero_case ? false_val : true_val; 330 RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class); 331 OpRegImm(kOpMov, temp1_reg, immediateForTemp); 332 333 OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg); 334 335 FreeTemp(temp1_reg); 336 } 337 } else { 338 RegLocation rl_true = mir_graph_->GetSrc(mir, 1); 339 RegLocation rl_false = mir_graph_->GetSrc(mir, 2); 340 rl_true = LoadValue(rl_true, result_reg_class); 341 rl_false = LoadValue(rl_false, result_reg_class); 342 rl_result = EvalLoc(rl_dest, result_reg_class, true); 343 344 /* 345 * For ccode == kCondEq: 346 * 347 * 1) When true case is already in place: 348 * cmp $0, src_reg 349 * cmovnz result_reg, false_reg 350 * 2) When false case is already in place: 351 * cmp $0, src_reg 352 * cmovz result_reg, true_reg 353 * 3) When neither cases are in place: 354 * cmp $0, src_reg 355 * mov result_reg, false_reg 356 * cmovz result_reg, true_reg 357 */ 358 359 // kMirOpSelect is generated just for conditional cases when comparison is done with zero. 360 OpRegImm(kOpCmp, rl_src.reg, 0); 361 362 if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { 363 OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg); 364 } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { 365 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); 366 } else { 367 OpRegCopy(rl_result.reg, rl_false.reg); 368 OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); 369 } 370 } 371 372 StoreValue(rl_dest, rl_result); 373} 374 375void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { 376 LIR* taken = &block_label_list_[bb->taken]; 377 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); 378 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); 379 ConditionCode ccode = mir->meta.ccode; 380 381 if (rl_src1.is_const) { 382 std::swap(rl_src1, rl_src2); 383 ccode = FlipComparisonOrder(ccode); 384 } 385 if (rl_src2.is_const) { 386 // Do special compare/branch against simple const operand 387 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 388 GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); 389 return; 390 } 391 392 if (cu_->target64) { 393 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 394 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 395 396 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 397 OpCondBranch(ccode, taken); 398 return; 399 } 400 401 FlushAllRegs(); 402 LockCallTemps(); // Prepare for explicit register usage 403 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); 404 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); 405 LoadValueDirectWideFixed(rl_src1, r_tmp1); 406 LoadValueDirectWideFixed(rl_src2, r_tmp2); 407 408 // Swap operands and condition code to prevent use of zero flag. 409 if (ccode == kCondLe || ccode == kCondGt) { 410 // Compute (r3:r2) = (r3:r2) - (r1:r0) 411 OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0 412 OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF 413 } else { 414 // Compute (r1:r0) = (r1:r0) - (r3:r2) 415 OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 416 OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF 417 } 418 switch (ccode) { 419 case kCondEq: 420 case kCondNe: 421 OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1 422 break; 423 case kCondLe: 424 ccode = kCondGe; 425 break; 426 case kCondGt: 427 ccode = kCondLt; 428 break; 429 case kCondLt: 430 case kCondGe: 431 break; 432 default: 433 LOG(FATAL) << "Unexpected ccode: " << ccode; 434 } 435 OpCondBranch(ccode, taken); 436} 437 438void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, 439 int64_t val, ConditionCode ccode) { 440 int32_t val_lo = Low32Bits(val); 441 int32_t val_hi = High32Bits(val); 442 LIR* taken = &block_label_list_[bb->taken]; 443 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 444 bool is_equality_test = ccode == kCondEq || ccode == kCondNe; 445 446 if (cu_->target64) { 447 if (is_equality_test && val == 0) { 448 // We can simplify of comparing for ==, != to 0. 449 NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg()); 450 } else if (is_equality_test && val_hi == 0 && val_lo > 0) { 451 OpRegImm(kOpCmp, rl_src1.reg, val_lo); 452 } else { 453 RegStorage tmp = AllocTypedTempWide(false, kCoreReg); 454 LoadConstantWide(tmp, val); 455 OpRegReg(kOpCmp, rl_src1.reg, tmp); 456 FreeTemp(tmp); 457 } 458 OpCondBranch(ccode, taken); 459 return; 460 } 461 462 if (is_equality_test && val != 0) { 463 rl_src1 = ForceTempWide(rl_src1); 464 } 465 RegStorage low_reg = rl_src1.reg.GetLow(); 466 RegStorage high_reg = rl_src1.reg.GetHigh(); 467 468 if (is_equality_test) { 469 // We can simplify of comparing for ==, != to 0. 470 if (val == 0) { 471 if (IsTemp(low_reg)) { 472 OpRegReg(kOpOr, low_reg, high_reg); 473 // We have now changed it; ignore the old values. 474 Clobber(rl_src1.reg); 475 } else { 476 RegStorage t_reg = AllocTemp(); 477 OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); 478 FreeTemp(t_reg); 479 } 480 OpCondBranch(ccode, taken); 481 return; 482 } 483 484 // Need to compute the actual value for ==, !=. 485 OpRegImm(kOpSub, low_reg, val_lo); 486 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); 487 OpRegReg(kOpOr, high_reg, low_reg); 488 Clobber(rl_src1.reg); 489 } else if (ccode == kCondLe || ccode == kCondGt) { 490 // Swap operands and condition code to prevent use of zero flag. 491 RegStorage tmp = AllocTypedTempWide(false, kCoreReg); 492 LoadConstantWide(tmp, val); 493 OpRegReg(kOpSub, tmp.GetLow(), low_reg); 494 OpRegReg(kOpSbc, tmp.GetHigh(), high_reg); 495 ccode = (ccode == kCondLe) ? kCondGe : kCondLt; 496 FreeTemp(tmp); 497 } else { 498 // We can use a compare for the low word to set CF. 499 OpRegImm(kOpCmp, low_reg, val_lo); 500 if (IsTemp(high_reg)) { 501 NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); 502 // We have now changed it; ignore the old values. 503 Clobber(rl_src1.reg); 504 } else { 505 // mov temp_reg, high_reg; sbb temp_reg, high_constant 506 RegStorage t_reg = AllocTemp(); 507 OpRegCopy(t_reg, high_reg); 508 NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi); 509 FreeTemp(t_reg); 510 } 511 } 512 513 OpCondBranch(ccode, taken); 514} 515 516void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) { 517 // It does not make sense to calculate magic and shift for zero divisor. 518 DCHECK_NE(divisor, 0); 519 520 /* According to H.S.Warren's Hacker's Delight Chapter 10 and 521 * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 522 * The magic number M and shift S can be calculated in the following way: 523 * Let nc be the most positive value of numerator(n) such that nc = kd - 1, 524 * where divisor(d) >=2. 525 * Let nc be the most negative value of numerator(n) such that nc = kd + 1, 526 * where divisor(d) <= -2. 527 * Thus nc can be calculated like: 528 * nc = 2^31 + 2^31 % d - 1, where d >= 2 529 * nc = -2^31 + (2^31 + 1) % d, where d >= 2. 530 * 531 * So the shift p is the smallest p satisfying 532 * 2^p > nc * (d - 2^p % d), where d >= 2 533 * 2^p > nc * (d + 2^p % d), where d <= -2. 534 * 535 * the magic number M is calcuated by 536 * M = (2^p + d - 2^p % d) / d, where d >= 2 537 * M = (2^p - d - 2^p % d) / d, where d <= -2. 538 * 539 * Notice that p is always bigger than or equal to 32, so we just return 32-p as 540 * the shift number S. 541 */ 542 543 int32_t p = 31; 544 const uint32_t two31 = 0x80000000U; 545 546 // Initialize the computations. 547 uint32_t abs_d = (divisor >= 0) ? divisor : -divisor; 548 uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31); 549 uint32_t abs_nc = tmp - 1 - tmp % abs_d; 550 uint32_t quotient1 = two31 / abs_nc; 551 uint32_t remainder1 = two31 % abs_nc; 552 uint32_t quotient2 = two31 / abs_d; 553 uint32_t remainder2 = two31 % abs_d; 554 555 /* 556 * To avoid handling both positive and negative divisor, Hacker's Delight 557 * introduces a method to handle these 2 cases together to avoid duplication. 558 */ 559 uint32_t delta; 560 do { 561 p++; 562 quotient1 = 2 * quotient1; 563 remainder1 = 2 * remainder1; 564 if (remainder1 >= abs_nc) { 565 quotient1++; 566 remainder1 = remainder1 - abs_nc; 567 } 568 quotient2 = 2 * quotient2; 569 remainder2 = 2 * remainder2; 570 if (remainder2 >= abs_d) { 571 quotient2++; 572 remainder2 = remainder2 - abs_d; 573 } 574 delta = abs_d - remainder2; 575 } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); 576 577 magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); 578 shift = p - 32; 579} 580 581RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) { 582 LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; 583 return rl_dest; 584} 585 586RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, 587 int imm, bool is_div) { 588 // Use a multiply (and fixup) to perform an int div/rem by a constant. 589 590 // We have to use fixed registers, so flush all the temps. 591 FlushAllRegs(); 592 LockCallTemps(); // Prepare for explicit register usage. 593 594 // Assume that the result will be in EDX. 595 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r2, INVALID_SREG, INVALID_SREG}; 596 597 // handle div/rem by 1 special case. 598 if (imm == 1) { 599 if (is_div) { 600 // x / 1 == x. 601 StoreValue(rl_result, rl_src); 602 } else { 603 // x % 1 == 0. 604 LoadConstantNoClobber(rs_r0, 0); 605 // For this case, return the result in EAX. 606 rl_result.reg.SetReg(r0); 607 } 608 } else if (imm == -1) { // handle 0x80000000 / -1 special case. 609 if (is_div) { 610 LIR *minint_branch = 0; 611 LoadValueDirectFixed(rl_src, rs_r0); 612 OpRegImm(kOpCmp, rs_r0, 0x80000000); 613 minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); 614 615 // for x != MIN_INT, x / -1 == -x. 616 NewLIR1(kX86Neg32R, r0); 617 618 LIR* branch_around = NewLIR1(kX86Jmp8, 0); 619 // The target for cmp/jmp above. 620 minint_branch->target = NewLIR0(kPseudoTargetLabel); 621 // EAX already contains the right value (0x80000000), 622 branch_around->target = NewLIR0(kPseudoTargetLabel); 623 } else { 624 // x % -1 == 0. 625 LoadConstantNoClobber(rs_r0, 0); 626 } 627 // For this case, return the result in EAX. 628 rl_result.reg.SetReg(r0); 629 } else { 630 CHECK(imm <= -2 || imm >= 2); 631 // Use H.S.Warren's Hacker's Delight Chapter 10 and 632 // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. 633 int magic, shift; 634 CalculateMagicAndShift(imm, magic, shift); 635 636 /* 637 * For imm >= 2, 638 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 639 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. 640 * For imm <= -2, 641 * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 642 * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. 643 * We implement this algorithm in the following way: 644 * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX 645 * 2. if imm > 0 and magic < 0, add numerator to EDX 646 * if imm < 0 and magic > 0, sub numerator from EDX 647 * 3. if S !=0, SAR S bits for EDX 648 * 4. add 1 to EDX if EDX < 0 649 * 5. Thus, EDX is the quotient 650 */ 651 652 // Numerator into EAX. 653 RegStorage numerator_reg; 654 if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { 655 // We will need the value later. 656 if (rl_src.location == kLocPhysReg) { 657 // We can use it directly. 658 DCHECK(rl_src.reg.GetReg() != rs_r0.GetReg() && rl_src.reg.GetReg() != rs_r2.GetReg()); 659 numerator_reg = rl_src.reg; 660 } else { 661 numerator_reg = rs_r1; 662 LoadValueDirectFixed(rl_src, numerator_reg); 663 } 664 OpRegCopy(rs_r0, numerator_reg); 665 } else { 666 // Only need this once. Just put it into EAX. 667 LoadValueDirectFixed(rl_src, rs_r0); 668 } 669 670 // EDX = magic. 671 LoadConstantNoClobber(rs_r2, magic); 672 673 // EDX:EAX = magic & dividend. 674 NewLIR1(kX86Imul32DaR, rs_r2.GetReg()); 675 676 if (imm > 0 && magic < 0) { 677 // Add numerator to EDX. 678 DCHECK(numerator_reg.Valid()); 679 NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg()); 680 } else if (imm < 0 && magic > 0) { 681 DCHECK(numerator_reg.Valid()); 682 NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg()); 683 } 684 685 // Do we need the shift? 686 if (shift != 0) { 687 // Shift EDX by 'shift' bits. 688 NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift); 689 } 690 691 // Add 1 to EDX if EDX < 0. 692 693 // Move EDX to EAX. 694 OpRegCopy(rs_r0, rs_r2); 695 696 // Move sign bit to bit 0, zeroing the rest. 697 NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31); 698 699 // EDX = EDX + EAX. 700 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg()); 701 702 // Quotient is in EDX. 703 if (!is_div) { 704 // We need to compute the remainder. 705 // Remainder is divisor - (quotient * imm). 706 DCHECK(numerator_reg.Valid()); 707 OpRegCopy(rs_r0, numerator_reg); 708 709 // EAX = numerator * imm. 710 OpRegRegImm(kOpMul, rs_r2, rs_r2, imm); 711 712 // EDX -= EAX. 713 NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg()); 714 715 // For this case, return the result in EAX. 716 rl_result.reg.SetReg(r0); 717 } 718 } 719 720 return rl_result; 721} 722 723RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, 724 bool is_div) { 725 LOG(FATAL) << "Unexpected use of GenDivRem for x86"; 726 return rl_dest; 727} 728 729RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, 730 RegLocation rl_src2, bool is_div, bool check_zero) { 731 // We have to use fixed registers, so flush all the temps. 732 FlushAllRegs(); 733 LockCallTemps(); // Prepare for explicit register usage. 734 735 // Load LHS into EAX. 736 LoadValueDirectFixed(rl_src1, rs_r0); 737 738 // Load RHS into EBX. 739 LoadValueDirectFixed(rl_src2, rs_r1); 740 741 // Copy LHS sign bit into EDX. 742 NewLIR0(kx86Cdq32Da); 743 744 if (check_zero) { 745 // Handle division by zero case. 746 GenDivZeroCheck(rs_r1); 747 } 748 749 // Have to catch 0x80000000/-1 case, or we will get an exception! 750 OpRegImm(kOpCmp, rs_r1, -1); 751 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 752 753 // RHS is -1. 754 OpRegImm(kOpCmp, rs_r0, 0x80000000); 755 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 756 757 // In 0x80000000/-1 case. 758 if (!is_div) { 759 // For DIV, EAX is already right. For REM, we need EDX 0. 760 LoadConstantNoClobber(rs_r2, 0); 761 } 762 LIR* done = NewLIR1(kX86Jmp8, 0); 763 764 // Expected case. 765 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 766 minint_branch->target = minus_one_branch->target; 767 NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg()); 768 done->target = NewLIR0(kPseudoTargetLabel); 769 770 // Result is in EAX for div and EDX for rem. 771 RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG}; 772 if (!is_div) { 773 rl_result.reg.SetReg(r2); 774 } 775 return rl_result; 776} 777 778bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { 779 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); 780 781 if (is_long && cu_->instruction_set == kX86) { 782 return false; 783 } 784 785 // Get the two arguments to the invoke and place them in GP registers. 786 RegLocation rl_src1 = info->args[0]; 787 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1]; 788 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg); 789 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg); 790 791 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info); 792 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 793 794 /* 795 * If the result register is the same as the second element, then we need to be careful. 796 * The reason is that the first copy will inadvertently clobber the second element with 797 * the first one thus yielding the wrong result. Thus we do a swap in that case. 798 */ 799 if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 800 std::swap(rl_src1, rl_src2); 801 } 802 803 // Pick the first integer as min/max. 804 OpRegCopy(rl_result.reg, rl_src1.reg); 805 806 // If the integers are both in the same register, then there is nothing else to do 807 // because they are equal and we have already moved one into the result. 808 if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) { 809 // It is possible we didn't pick correctly so do the actual comparison now. 810 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); 811 812 // Conditionally move the other integer into the destination register. 813 ConditionCode condition_code = is_min ? kCondGt : kCondLt; 814 OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg); 815 } 816 817 if (is_long) { 818 StoreValueWide(rl_dest, rl_result); 819 } else { 820 StoreValue(rl_dest, rl_result); 821 } 822 return true; 823} 824 825bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { 826 RegLocation rl_src_address = info->args[0]; // long address 827 RegLocation rl_address; 828 if (!cu_->target64) { 829 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] 830 rl_address = LoadValue(rl_src_address, kCoreReg); 831 } else { 832 rl_address = LoadValueWide(rl_src_address, kCoreReg); 833 } 834 RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info); 835 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 836 // Unaligned access is allowed on x86. 837 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); 838 if (size == k64) { 839 StoreValueWide(rl_dest, rl_result); 840 } else { 841 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); 842 StoreValue(rl_dest, rl_result); 843 } 844 return true; 845} 846 847bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { 848 RegLocation rl_src_address = info->args[0]; // long address 849 RegLocation rl_address; 850 if (!cu_->target64) { 851 rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] 852 rl_address = LoadValue(rl_src_address, kCoreReg); 853 } else { 854 rl_address = LoadValueWide(rl_src_address, kCoreReg); 855 } 856 RegLocation rl_src_value = info->args[2]; // [size] value 857 RegLocation rl_value; 858 if (size == k64) { 859 // Unaligned access is allowed on x86. 860 rl_value = LoadValueWide(rl_src_value, kCoreReg); 861 } else { 862 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); 863 // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR. 864 if (!cu_->target64 && size == kSignedByte) { 865 rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg); 866 if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) { 867 RegStorage temp = AllocateByteRegister(); 868 OpRegCopy(temp, rl_src_value.reg); 869 rl_value.reg = temp; 870 } else { 871 rl_value = LoadValue(rl_src_value, kCoreReg); 872 } 873 } else { 874 rl_value = LoadValue(rl_src_value, kCoreReg); 875 } 876 } 877 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); 878 return true; 879} 880 881void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { 882 NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset); 883} 884 885void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { 886 DCHECK_EQ(kX86, cu_->instruction_set); 887 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 888} 889 890void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { 891 DCHECK_EQ(kX86_64, cu_->instruction_set); 892 NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); 893} 894 895static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) { 896 return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home); 897} 898 899bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { 900 DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); 901 // Unused - RegLocation rl_src_unsafe = info->args[0]; 902 RegLocation rl_src_obj = info->args[1]; // Object - known non-null 903 RegLocation rl_src_offset = info->args[2]; // long low 904 if (!cu_->target64) { 905 rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] 906 } 907 RegLocation rl_src_expected = info->args[4]; // int, long or Object 908 // If is_long, high half is in info->args[5] 909 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object 910 // If is_long, high half is in info->args[7] 911 912 if (is_long && cu_->target64) { 913 // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. 914 FlushReg(rs_r0q); 915 Clobber(rs_r0q); 916 LockTemp(rs_r0q); 917 918 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); 919 RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); 920 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); 921 LoadValueDirectWide(rl_src_expected, rs_r0q); 922 NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, 923 rl_new_value.reg.GetReg()); 924 925 // After a store we need to insert barrier in case of potential load. Since the 926 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. 927 GenMemBarrier(kAnyAny); 928 929 FreeTemp(rs_r0q); 930 } else if (is_long) { 931 // TODO: avoid unnecessary loads of SI and DI when the values are in registers. 932 // TODO: CFI support. 933 FlushAllRegs(); 934 LockCallTemps(); 935 RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); 936 RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX); 937 LoadValueDirectWideFixed(rl_src_expected, r_tmp1); 938 LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); 939 // FIXME: needs 64-bit update. 940 const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI); 941 const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI); 942 DCHECK(!obj_in_si || !obj_in_di); 943 const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI); 944 const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI); 945 DCHECK(!off_in_si || !off_in_di); 946 // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg. 947 RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI; 948 RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI; 949 bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI); 950 bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI); 951 if (push_di) { 952 NewLIR1(kX86Push32R, rs_rDI.GetReg()); 953 MarkTemp(rs_rDI); 954 LockTemp(rs_rDI); 955 } 956 if (push_si) { 957 NewLIR1(kX86Push32R, rs_rSI.GetReg()); 958 MarkTemp(rs_rSI); 959 LockTemp(rs_rSI); 960 } 961 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 962 const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); 963 if (!obj_in_si && !obj_in_di) { 964 LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj); 965 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. 966 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); 967 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; 968 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); 969 } 970 if (!off_in_si && !off_in_di) { 971 LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off); 972 // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. 973 DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); 974 int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; 975 AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); 976 } 977 NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0); 978 979 // After a store we need to insert barrier to prevent reordering with either 980 // earlier or later memory accesses. Since 981 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, 982 // and it will be associated with the cmpxchg instruction, preventing both. 983 GenMemBarrier(kAnyAny); 984 985 if (push_si) { 986 FreeTemp(rs_rSI); 987 UnmarkTemp(rs_rSI); 988 NewLIR1(kX86Pop32R, rs_rSI.GetReg()); 989 } 990 if (push_di) { 991 FreeTemp(rs_rDI); 992 UnmarkTemp(rs_rDI); 993 NewLIR1(kX86Pop32R, rs_rDI.GetReg()); 994 } 995 FreeCallTemps(); 996 } else { 997 // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. 998 FlushReg(rs_r0); 999 Clobber(rs_r0); 1000 LockTemp(rs_r0); 1001 1002 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); 1003 RegLocation rl_new_value = LoadValue(rl_src_new_value); 1004 1005 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { 1006 // Mark card for object assuming new value is stored. 1007 FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard(). 1008 MarkGCCard(rl_new_value.reg, rl_object.reg); 1009 LockTemp(rs_r0); 1010 } 1011 1012 RegLocation rl_offset; 1013 if (cu_->target64) { 1014 rl_offset = LoadValueWide(rl_src_offset, kCoreReg); 1015 } else { 1016 rl_offset = LoadValue(rl_src_offset, kCoreReg); 1017 } 1018 LoadValueDirect(rl_src_expected, rs_r0); 1019 NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, 1020 rl_new_value.reg.GetReg()); 1021 1022 // After a store we need to insert barrier to prevent reordering with either 1023 // earlier or later memory accesses. Since 1024 // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, 1025 // and it will be associated with the cmpxchg instruction, preventing both. 1026 GenMemBarrier(kAnyAny); 1027 1028 FreeTemp(rs_r0); 1029 } 1030 1031 // Convert ZF to boolean 1032 RegLocation rl_dest = InlineTarget(info); // boolean place for result 1033 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 1034 RegStorage result_reg = rl_result.reg; 1035 1036 // For 32-bit, SETcc only works with EAX..EDX. 1037 if (!IsByteRegister(result_reg)) { 1038 result_reg = AllocateByteRegister(); 1039 } 1040 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ); 1041 NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg()); 1042 if (IsTemp(result_reg)) { 1043 FreeTemp(result_reg); 1044 } 1045 StoreValue(rl_dest, rl_result); 1046 return true; 1047} 1048 1049LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { 1050 CHECK(base_of_code_ != nullptr); 1051 1052 // Address the start of the method 1053 RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); 1054 if (rl_method.wide) { 1055 LoadValueDirectWideFixed(rl_method, reg); 1056 } else { 1057 LoadValueDirectFixed(rl_method, reg); 1058 } 1059 store_method_addr_used_ = true; 1060 1061 // Load the proper value from the literal area. 1062 // We don't know the proper offset for the value, so pick one that will force 1063 // 4 byte offset. We will fix this up in the assembler later to have the right 1064 // value. 1065 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); 1066 LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256, 1067 0, 0, target); 1068 res->target = target; 1069 res->flags.fixup = kFixupLoad; 1070 store_method_addr_used_ = true; 1071 return res; 1072} 1073 1074LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { 1075 LOG(FATAL) << "Unexpected use of OpVldm for x86"; 1076 return NULL; 1077} 1078 1079LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) { 1080 LOG(FATAL) << "Unexpected use of OpVstm for x86"; 1081 return NULL; 1082} 1083 1084void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, 1085 RegLocation rl_result, int lit, 1086 int first_bit, int second_bit) { 1087 RegStorage t_reg = AllocTemp(); 1088 OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit); 1089 OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg); 1090 FreeTemp(t_reg); 1091 if (first_bit != 0) { 1092 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); 1093 } 1094} 1095 1096void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { 1097 if (cu_->target64) { 1098 DCHECK(reg.Is64Bit()); 1099 1100 NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0); 1101 } else { 1102 DCHECK(reg.IsPair()); 1103 1104 // We are not supposed to clobber the incoming storage, so allocate a temporary. 1105 RegStorage t_reg = AllocTemp(); 1106 // Doing an OR is a quick way to check if both registers are zero. This will set the flags. 1107 OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); 1108 // The temp is no longer needed so free it at this time. 1109 FreeTemp(t_reg); 1110 } 1111 1112 // In case of zero, throw ArithmeticException. 1113 GenDivZeroCheck(kCondEq); 1114} 1115 1116void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, 1117 RegStorage array_base, 1118 int len_offset) { 1119 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { 1120 public: 1121 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch, 1122 RegStorage index, RegStorage array_base, int32_t len_offset) 1123 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch), 1124 index_(index), array_base_(array_base), len_offset_(len_offset) { 1125 } 1126 1127 void Compile() OVERRIDE { 1128 m2l_->ResetRegPool(); 1129 m2l_->ResetDefTracking(); 1130 GenerateTargetLabel(kPseudoThrowTarget); 1131 1132 RegStorage new_index = index_; 1133 // Move index out of kArg1, either directly to kArg0, or to kArg2. 1134 // TODO: clean-up to check not a number but with type 1135 if (index_ == m2l_->TargetReg(kArg1, kNotWide)) { 1136 if (array_base_ == m2l_->TargetReg(kArg0, kRef)) { 1137 m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_); 1138 new_index = m2l_->TargetReg(kArg2, kNotWide); 1139 } else { 1140 m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_); 1141 new_index = m2l_->TargetReg(kArg0, kNotWide); 1142 } 1143 } 1144 // Load array length to kArg1. 1145 m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); 1146 if (cu_->target64) { 1147 m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds), 1148 new_index, m2l_->TargetReg(kArg1, kNotWide), true); 1149 } else { 1150 m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds), 1151 new_index, m2l_->TargetReg(kArg1, kNotWide), true); 1152 } 1153 } 1154 1155 private: 1156 const RegStorage index_; 1157 const RegStorage array_base_; 1158 const int32_t len_offset_; 1159 }; 1160 1161 OpRegMem(kOpCmp, index, array_base, len_offset); 1162 MarkPossibleNullPointerException(0); 1163 LIR* branch = OpCondBranch(kCondUge, nullptr); 1164 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, 1165 index, array_base, len_offset)); 1166} 1167 1168void X86Mir2Lir::GenArrayBoundsCheck(int32_t index, 1169 RegStorage array_base, 1170 int32_t len_offset) { 1171 class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { 1172 public: 1173 ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch, 1174 int32_t index, RegStorage array_base, int32_t len_offset) 1175 : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch), 1176 index_(index), array_base_(array_base), len_offset_(len_offset) { 1177 } 1178 1179 void Compile() OVERRIDE { 1180 m2l_->ResetRegPool(); 1181 m2l_->ResetDefTracking(); 1182 GenerateTargetLabel(kPseudoThrowTarget); 1183 1184 // Load array length to kArg1. 1185 m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); 1186 m2l_->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_); 1187 if (cu_->target64) { 1188 m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds), 1189 m2l_->TargetReg(kArg0, kNotWide), 1190 m2l_->TargetReg(kArg1, kNotWide), true); 1191 } else { 1192 m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds), 1193 m2l_->TargetReg(kArg0, kNotWide), 1194 m2l_->TargetReg(kArg1, kNotWide), true); 1195 } 1196 } 1197 1198 private: 1199 const int32_t index_; 1200 const RegStorage array_base_; 1201 const int32_t len_offset_; 1202 }; 1203 1204 NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index); 1205 MarkPossibleNullPointerException(0); 1206 LIR* branch = OpCondBranch(kCondLs, nullptr); 1207 AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, 1208 index, array_base, len_offset)); 1209} 1210 1211// Test suspend flag, return target of taken suspend branch 1212LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { 1213 if (cu_->target64) { 1214 OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0); 1215 } else { 1216 OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0); 1217 } 1218 return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); 1219} 1220 1221// Decrement register and branch on condition 1222LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { 1223 OpRegImm(kOpSub, reg, 1); 1224 return OpCondBranch(c_code, target); 1225} 1226 1227bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, 1228 RegLocation rl_src, RegLocation rl_dest, int lit) { 1229 LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; 1230 return false; 1231} 1232 1233bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { 1234 LOG(FATAL) << "Unexpected use of easyMultiply in x86"; 1235 return false; 1236} 1237 1238LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { 1239 LOG(FATAL) << "Unexpected use of OpIT in x86"; 1240 return NULL; 1241} 1242 1243void X86Mir2Lir::OpEndIT(LIR* it) { 1244 LOG(FATAL) << "Unexpected use of OpEndIT in x86"; 1245} 1246 1247void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { 1248 switch (val) { 1249 case 0: 1250 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); 1251 break; 1252 case 1: 1253 OpRegCopy(dest, src); 1254 break; 1255 default: 1256 OpRegRegImm(kOpMul, dest, src, val); 1257 break; 1258 } 1259} 1260 1261void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { 1262 // All memory accesses below reference dalvik regs. 1263 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1264 1265 LIR *m; 1266 switch (val) { 1267 case 0: 1268 NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); 1269 break; 1270 case 1: 1271 LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile); 1272 break; 1273 default: 1274 m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), 1275 rs_rX86_SP.GetReg(), displacement, val); 1276 AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); 1277 break; 1278 } 1279} 1280 1281void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 1282 RegLocation rl_src2) { 1283 // All memory accesses below reference dalvik regs. 1284 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1285 1286 if (cu_->target64) { 1287 if (rl_src1.is_const) { 1288 std::swap(rl_src1, rl_src2); 1289 } 1290 // Are we multiplying by a constant? 1291 if (rl_src2.is_const) { 1292 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1293 if (val == 0) { 1294 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1295 OpRegReg(kOpXor, rl_result.reg, rl_result.reg); 1296 StoreValueWide(rl_dest, rl_result); 1297 return; 1298 } else if (val == 1) { 1299 StoreValueWide(rl_dest, rl_src1); 1300 return; 1301 } else if (val == 2) { 1302 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); 1303 return; 1304 } else if (IsPowerOfTwo(val)) { 1305 int shift_amount = LowestSetBit(val); 1306 if (!BadOverlap(rl_src1, rl_dest)) { 1307 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1308 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, 1309 rl_src1, shift_amount); 1310 StoreValueWide(rl_dest, rl_result); 1311 return; 1312 } 1313 } 1314 } 1315 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1316 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1317 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1318 if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && 1319 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 1320 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 1321 } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() && 1322 rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { 1323 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); 1324 } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && 1325 rl_result.reg.GetReg() != rl_src2.reg.GetReg()) { 1326 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 1327 } else { 1328 OpRegCopy(rl_result.reg, rl_src1.reg); 1329 NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); 1330 } 1331 StoreValueWide(rl_dest, rl_result); 1332 return; 1333 } 1334 1335 if (rl_src1.is_const) { 1336 std::swap(rl_src1, rl_src2); 1337 } 1338 // Are we multiplying by a constant? 1339 if (rl_src2.is_const) { 1340 // Do special compare/branch against simple const operand 1341 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 1342 if (val == 0) { 1343 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1344 OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow()); 1345 OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); 1346 StoreValueWide(rl_dest, rl_result); 1347 return; 1348 } else if (val == 1) { 1349 StoreValueWide(rl_dest, rl_src1); 1350 return; 1351 } else if (val == 2) { 1352 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1); 1353 return; 1354 } else if (IsPowerOfTwo(val)) { 1355 int shift_amount = LowestSetBit(val); 1356 if (!BadOverlap(rl_src1, rl_dest)) { 1357 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1358 RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, 1359 rl_src1, shift_amount); 1360 StoreValueWide(rl_dest, rl_result); 1361 return; 1362 } 1363 } 1364 1365 // Okay, just bite the bullet and do it. 1366 int32_t val_lo = Low32Bits(val); 1367 int32_t val_hi = High32Bits(val); 1368 FlushAllRegs(); 1369 LockCallTemps(); // Prepare for explicit register usage. 1370 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); 1371 bool src1_in_reg = rl_src1.location == kLocPhysReg; 1372 int displacement = SRegOffset(rl_src1.s_reg_low); 1373 1374 // ECX <- 1H * 2L 1375 // EAX <- 1L * 2H 1376 if (src1_in_reg) { 1377 GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo); 1378 GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi); 1379 } else { 1380 GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); 1381 GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); 1382 } 1383 1384 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1385 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); 1386 1387 // EAX <- 2L 1388 LoadConstantNoClobber(rs_r0, val_lo); 1389 1390 // EDX:EAX <- 2L * 1L (double precision) 1391 if (src1_in_reg) { 1392 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); 1393 } else { 1394 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET); 1395 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1396 true /* is_load */, true /* is_64bit */); 1397 } 1398 1399 // EDX <- EDX + ECX (add high words) 1400 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); 1401 1402 // Result is EDX:EAX 1403 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, 1404 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; 1405 StoreValueWide(rl_dest, rl_result); 1406 return; 1407 } 1408 1409 // Nope. Do it the hard way 1410 // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. 1411 bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == 1412 mir_graph_->SRegToVReg(rl_src2.s_reg_low); 1413 1414 FlushAllRegs(); 1415 LockCallTemps(); // Prepare for explicit register usage. 1416 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); 1417 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg); 1418 1419 // At this point, the VRs are in their home locations. 1420 bool src1_in_reg = rl_src1.location == kLocPhysReg; 1421 bool src2_in_reg = rl_src2.location == kLocPhysReg; 1422 1423 // ECX <- 1H 1424 if (src1_in_reg) { 1425 NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg()); 1426 } else { 1427 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32, 1428 kNotVolatile); 1429 } 1430 1431 if (is_square) { 1432 // Take advantage of the fact that the values are the same. 1433 // ECX <- ECX * 2L (1H * 2L) 1434 if (src2_in_reg) { 1435 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); 1436 } else { 1437 int displacement = SRegOffset(rl_src2.s_reg_low); 1438 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(), 1439 displacement + LOWORD_OFFSET); 1440 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1441 true /* is_load */, true /* is_64bit */); 1442 } 1443 1444 // ECX <- 2*ECX (2H * 1L) + (1H * 2L) 1445 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg()); 1446 } else { 1447 // EAX <- 2H 1448 if (src2_in_reg) { 1449 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg()); 1450 } else { 1451 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32, 1452 kNotVolatile); 1453 } 1454 1455 // EAX <- EAX * 1L (2H * 1L) 1456 if (src1_in_reg) { 1457 NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg()); 1458 } else { 1459 int displacement = SRegOffset(rl_src1.s_reg_low); 1460 LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(), 1461 displacement + LOWORD_OFFSET); 1462 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1463 true /* is_load */, true /* is_64bit */); 1464 } 1465 1466 // ECX <- ECX * 2L (1H * 2L) 1467 if (src2_in_reg) { 1468 NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); 1469 } else { 1470 int displacement = SRegOffset(rl_src2.s_reg_low); 1471 LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(), 1472 displacement + LOWORD_OFFSET); 1473 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1474 true /* is_load */, true /* is_64bit */); 1475 } 1476 1477 // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) 1478 NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); 1479 } 1480 1481 // EAX <- 2L 1482 if (src2_in_reg) { 1483 NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg()); 1484 } else { 1485 LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32, 1486 kNotVolatile); 1487 } 1488 1489 // EDX:EAX <- 2L * 1L (double precision) 1490 if (src1_in_reg) { 1491 NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); 1492 } else { 1493 int displacement = SRegOffset(rl_src1.s_reg_low); 1494 LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET); 1495 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 1496 true /* is_load */, true /* is_64bit */); 1497 } 1498 1499 // EDX <- EDX + ECX (add high words) 1500 NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); 1501 1502 // Result is EDX:EAX 1503 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, 1504 RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; 1505 StoreValueWide(rl_dest, rl_result); 1506} 1507 1508void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, 1509 Instruction::Code op) { 1510 DCHECK_EQ(rl_dest.location, kLocPhysReg); 1511 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1512 if (rl_src.location == kLocPhysReg) { 1513 // Both operands are in registers. 1514 // But we must ensure that rl_src is in pair 1515 if (cu_->target64) { 1516 NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg()); 1517 } else { 1518 rl_src = LoadValueWide(rl_src, kCoreReg); 1519 if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { 1520 // The registers are the same, so we would clobber it before the use. 1521 RegStorage temp_reg = AllocTemp(); 1522 OpRegCopy(temp_reg, rl_dest.reg); 1523 rl_src.reg.SetHighReg(temp_reg.GetReg()); 1524 } 1525 NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); 1526 1527 x86op = GetOpcode(op, rl_dest, rl_src, true); 1528 NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); 1529 FreeTemp(rl_src.reg); // ??? 1530 } 1531 return; 1532 } 1533 1534 // RHS is in memory. 1535 DCHECK((rl_src.location == kLocDalvikFrame) || 1536 (rl_src.location == kLocCompilerTemp)); 1537 int r_base = rs_rX86_SP.GetReg(); 1538 int displacement = SRegOffset(rl_src.s_reg_low); 1539 1540 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1541 LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), 1542 r_base, displacement + LOWORD_OFFSET); 1543 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1544 true /* is_load */, true /* is64bit */); 1545 if (!cu_->target64) { 1546 x86op = GetOpcode(op, rl_dest, rl_src, true); 1547 lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); 1548 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1549 true /* is_load */, true /* is64bit */); 1550 } 1551} 1552 1553void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 1554 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); 1555 if (rl_dest.location == kLocPhysReg) { 1556 // Ensure we are in a register pair 1557 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1558 1559 rl_src = UpdateLocWideTyped(rl_src, kCoreReg); 1560 GenLongRegOrMemOp(rl_result, rl_src, op); 1561 StoreFinalValueWide(rl_dest, rl_result); 1562 return; 1563 } 1564 1565 // It wasn't in registers, so it better be in memory. 1566 DCHECK((rl_dest.location == kLocDalvikFrame) || 1567 (rl_dest.location == kLocCompilerTemp)); 1568 rl_src = LoadValueWide(rl_src, kCoreReg); 1569 1570 // Operate directly into memory. 1571 X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); 1572 int r_base = rs_rX86_SP.GetReg(); 1573 int displacement = SRegOffset(rl_dest.s_reg_low); 1574 1575 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 1576 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, 1577 cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); 1578 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1579 true /* is_load */, true /* is64bit */); 1580 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 1581 false /* is_load */, true /* is64bit */); 1582 if (!cu_->target64) { 1583 x86op = GetOpcode(op, rl_dest, rl_src, true); 1584 lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); 1585 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1586 true /* is_load */, true /* is64bit */); 1587 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 1588 false /* is_load */, true /* is64bit */); 1589 } 1590 FreeTemp(rl_src.reg); 1591} 1592 1593void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, 1594 RegLocation rl_src2, Instruction::Code op, 1595 bool is_commutative) { 1596 // Is this really a 2 operand operation? 1597 switch (op) { 1598 case Instruction::ADD_LONG_2ADDR: 1599 case Instruction::SUB_LONG_2ADDR: 1600 case Instruction::AND_LONG_2ADDR: 1601 case Instruction::OR_LONG_2ADDR: 1602 case Instruction::XOR_LONG_2ADDR: 1603 if (GenerateTwoOperandInstructions()) { 1604 GenLongArith(rl_dest, rl_src2, op); 1605 return; 1606 } 1607 break; 1608 1609 default: 1610 break; 1611 } 1612 1613 if (rl_dest.location == kLocPhysReg) { 1614 RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); 1615 1616 // We are about to clobber the LHS, so it needs to be a temp. 1617 rl_result = ForceTempWide(rl_result); 1618 1619 // Perform the operation using the RHS. 1620 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg); 1621 GenLongRegOrMemOp(rl_result, rl_src2, op); 1622 1623 // And now record that the result is in the temp. 1624 StoreFinalValueWide(rl_dest, rl_result); 1625 return; 1626 } 1627 1628 // It wasn't in registers, so it better be in memory. 1629 DCHECK((rl_dest.location == kLocDalvikFrame) || 1630 (rl_dest.location == kLocCompilerTemp)); 1631 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); 1632 rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg); 1633 1634 // Get one of the source operands into temporary register. 1635 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 1636 if (cu_->target64) { 1637 if (IsTemp(rl_src1.reg)) { 1638 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1639 } else if (is_commutative) { 1640 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1641 // We need at least one of them to be a temporary. 1642 if (!IsTemp(rl_src2.reg)) { 1643 rl_src1 = ForceTempWide(rl_src1); 1644 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1645 } else { 1646 GenLongRegOrMemOp(rl_src2, rl_src1, op); 1647 StoreFinalValueWide(rl_dest, rl_src2); 1648 return; 1649 } 1650 } else { 1651 // Need LHS to be the temp. 1652 rl_src1 = ForceTempWide(rl_src1); 1653 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1654 } 1655 } else { 1656 if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { 1657 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1658 } else if (is_commutative) { 1659 rl_src2 = LoadValueWide(rl_src2, kCoreReg); 1660 // We need at least one of them to be a temporary. 1661 if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { 1662 rl_src1 = ForceTempWide(rl_src1); 1663 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1664 } else { 1665 GenLongRegOrMemOp(rl_src2, rl_src1, op); 1666 StoreFinalValueWide(rl_dest, rl_src2); 1667 return; 1668 } 1669 } else { 1670 // Need LHS to be the temp. 1671 rl_src1 = ForceTempWide(rl_src1); 1672 GenLongRegOrMemOp(rl_src1, rl_src2, op); 1673 } 1674 } 1675 1676 StoreFinalValueWide(rl_dest, rl_src1); 1677} 1678 1679void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, 1680 RegLocation rl_src1, RegLocation rl_src2) { 1681 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1682} 1683 1684void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, 1685 RegLocation rl_src1, RegLocation rl_src2) { 1686 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); 1687} 1688 1689void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, 1690 RegLocation rl_src1, RegLocation rl_src2) { 1691 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1692} 1693 1694void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, 1695 RegLocation rl_src1, RegLocation rl_src2) { 1696 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1697} 1698 1699void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, 1700 RegLocation rl_src1, RegLocation rl_src2) { 1701 GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); 1702} 1703 1704void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { 1705 if (cu_->target64) { 1706 rl_src = LoadValueWide(rl_src, kCoreReg); 1707 RegLocation rl_result; 1708 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1709 OpRegCopy(rl_result.reg, rl_src.reg); 1710 OpReg(kOpNot, rl_result.reg); 1711 StoreValueWide(rl_dest, rl_result); 1712 } else { 1713 LOG(FATAL) << "Unexpected use GenNotLong()"; 1714 } 1715} 1716 1717void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, 1718 RegLocation rl_src2, bool is_div) { 1719 if (!cu_->target64) { 1720 LOG(FATAL) << "Unexpected use GenDivRemLong()"; 1721 return; 1722 } 1723 1724 // We have to use fixed registers, so flush all the temps. 1725 FlushAllRegs(); 1726 LockCallTemps(); // Prepare for explicit register usage. 1727 1728 // Load LHS into RAX. 1729 LoadValueDirectWideFixed(rl_src1, rs_r0q); 1730 1731 // Load RHS into RCX. 1732 LoadValueDirectWideFixed(rl_src2, rs_r1q); 1733 1734 // Copy LHS sign bit into RDX. 1735 NewLIR0(kx86Cqo64Da); 1736 1737 // Handle division by zero case. 1738 GenDivZeroCheckWide(rs_r1q); 1739 1740 // Have to catch 0x8000000000000000/-1 case, or we will get an exception! 1741 NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1); 1742 LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 1743 1744 // RHS is -1. 1745 LoadConstantWide(rs_r6q, 0x8000000000000000); 1746 NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg()); 1747 LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); 1748 1749 // In 0x8000000000000000/-1 case. 1750 if (!is_div) { 1751 // For DIV, RAX is already right. For REM, we need RDX 0. 1752 NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg()); 1753 } 1754 LIR* done = NewLIR1(kX86Jmp8, 0); 1755 1756 // Expected case. 1757 minus_one_branch->target = NewLIR0(kPseudoTargetLabel); 1758 minint_branch->target = minus_one_branch->target; 1759 NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg()); 1760 done->target = NewLIR0(kPseudoTargetLabel); 1761 1762 // Result is in RAX for div and RDX for rem. 1763 RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG}; 1764 if (!is_div) { 1765 rl_result.reg.SetReg(r2q); 1766 } 1767 1768 StoreValueWide(rl_dest, rl_result); 1769} 1770 1771void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { 1772 rl_src = LoadValueWide(rl_src, kCoreReg); 1773 RegLocation rl_result; 1774 if (cu_->target64) { 1775 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1776 OpRegReg(kOpNeg, rl_result.reg, rl_src.reg); 1777 } else { 1778 rl_result = ForceTempWide(rl_src); 1779 if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && 1780 ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { 1781 // The registers are the same, so we would clobber it before the use. 1782 RegStorage temp_reg = AllocTemp(); 1783 OpRegCopy(temp_reg, rl_result.reg); 1784 rl_result.reg.SetHighReg(temp_reg.GetReg()); 1785 } 1786 OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow 1787 OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF 1788 OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh 1789 } 1790 StoreValueWide(rl_dest, rl_result); 1791} 1792 1793void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) { 1794 DCHECK_EQ(kX86, cu_->instruction_set); 1795 X86OpCode opcode = kX86Bkpt; 1796 switch (op) { 1797 case kOpCmp: opcode = kX86Cmp32RT; break; 1798 case kOpMov: opcode = kX86Mov32RT; break; 1799 default: 1800 LOG(FATAL) << "Bad opcode: " << op; 1801 break; 1802 } 1803 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); 1804} 1805 1806void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) { 1807 DCHECK_EQ(kX86_64, cu_->instruction_set); 1808 X86OpCode opcode = kX86Bkpt; 1809 if (cu_->target64 && r_dest.Is64BitSolo()) { 1810 switch (op) { 1811 case kOpCmp: opcode = kX86Cmp64RT; break; 1812 case kOpMov: opcode = kX86Mov64RT; break; 1813 default: 1814 LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op; 1815 break; 1816 } 1817 } else { 1818 switch (op) { 1819 case kOpCmp: opcode = kX86Cmp32RT; break; 1820 case kOpMov: opcode = kX86Mov32RT; break; 1821 default: 1822 LOG(FATAL) << "Bad opcode: " << op; 1823 break; 1824 } 1825 } 1826 NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); 1827} 1828 1829/* 1830 * Generate array load 1831 */ 1832void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, 1833 RegLocation rl_index, RegLocation rl_dest, int scale) { 1834 RegisterClass reg_class = RegClassBySize(size); 1835 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1836 RegLocation rl_result; 1837 rl_array = LoadValue(rl_array, kRefReg); 1838 1839 int data_offset; 1840 if (size == k64 || size == kDouble) { 1841 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1842 } else { 1843 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1844 } 1845 1846 bool constant_index = rl_index.is_const; 1847 int32_t constant_index_value = 0; 1848 if (!constant_index) { 1849 rl_index = LoadValue(rl_index, kCoreReg); 1850 } else { 1851 constant_index_value = mir_graph_->ConstantValue(rl_index); 1852 // If index is constant, just fold it into the data offset 1853 data_offset += constant_index_value << scale; 1854 // treat as non array below 1855 rl_index.reg = RegStorage::InvalidReg(); 1856 } 1857 1858 /* null object? */ 1859 GenNullCheck(rl_array.reg, opt_flags); 1860 1861 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1862 if (constant_index) { 1863 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); 1864 } else { 1865 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); 1866 } 1867 } 1868 rl_result = EvalLoc(rl_dest, reg_class, true); 1869 LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size); 1870 if ((size == k64) || (size == kDouble)) { 1871 StoreValueWide(rl_dest, rl_result); 1872 } else { 1873 StoreValue(rl_dest, rl_result); 1874 } 1875} 1876 1877/* 1878 * Generate array store 1879 * 1880 */ 1881void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, 1882 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { 1883 RegisterClass reg_class = RegClassBySize(size); 1884 int len_offset = mirror::Array::LengthOffset().Int32Value(); 1885 int data_offset; 1886 1887 if (size == k64 || size == kDouble) { 1888 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); 1889 } else { 1890 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); 1891 } 1892 1893 rl_array = LoadValue(rl_array, kRefReg); 1894 bool constant_index = rl_index.is_const; 1895 int32_t constant_index_value = 0; 1896 if (!constant_index) { 1897 rl_index = LoadValue(rl_index, kCoreReg); 1898 } else { 1899 // If index is constant, just fold it into the data offset 1900 constant_index_value = mir_graph_->ConstantValue(rl_index); 1901 data_offset += constant_index_value << scale; 1902 // treat as non array below 1903 rl_index.reg = RegStorage::InvalidReg(); 1904 } 1905 1906 /* null object? */ 1907 GenNullCheck(rl_array.reg, opt_flags); 1908 1909 if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { 1910 if (constant_index) { 1911 GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); 1912 } else { 1913 GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); 1914 } 1915 } 1916 if ((size == k64) || (size == kDouble)) { 1917 rl_src = LoadValueWide(rl_src, reg_class); 1918 } else { 1919 rl_src = LoadValue(rl_src, reg_class); 1920 } 1921 // If the src reg can't be byte accessed, move it to a temp first. 1922 if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) { 1923 RegStorage temp = AllocTemp(); 1924 OpRegCopy(temp, rl_src.reg); 1925 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size); 1926 } else { 1927 StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size); 1928 } 1929 if (card_mark) { 1930 // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. 1931 if (!constant_index) { 1932 FreeTemp(rl_index.reg); 1933 } 1934 MarkGCCard(rl_src.reg, rl_array.reg); 1935 } 1936} 1937 1938RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 1939 RegLocation rl_src, int shift_amount) { 1940 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 1941 if (cu_->target64) { 1942 OpKind op = static_cast<OpKind>(0); /* Make gcc happy */ 1943 switch (opcode) { 1944 case Instruction::SHL_LONG: 1945 case Instruction::SHL_LONG_2ADDR: 1946 op = kOpLsl; 1947 break; 1948 case Instruction::SHR_LONG: 1949 case Instruction::SHR_LONG_2ADDR: 1950 op = kOpAsr; 1951 break; 1952 case Instruction::USHR_LONG: 1953 case Instruction::USHR_LONG_2ADDR: 1954 op = kOpLsr; 1955 break; 1956 default: 1957 LOG(FATAL) << "Unexpected case"; 1958 } 1959 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); 1960 } else { 1961 switch (opcode) { 1962 case Instruction::SHL_LONG: 1963 case Instruction::SHL_LONG_2ADDR: 1964 DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. 1965 if (shift_amount == 32) { 1966 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); 1967 LoadConstant(rl_result.reg.GetLow(), 0); 1968 } else if (shift_amount > 31) { 1969 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); 1970 NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); 1971 LoadConstant(rl_result.reg.GetLow(), 0); 1972 } else { 1973 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); 1974 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1975 NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), 1976 shift_amount); 1977 NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); 1978 } 1979 break; 1980 case Instruction::SHR_LONG: 1981 case Instruction::SHR_LONG_2ADDR: 1982 if (shift_amount == 32) { 1983 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1984 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1985 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); 1986 } else if (shift_amount > 31) { 1987 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 1988 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1989 NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); 1990 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); 1991 } else { 1992 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); 1993 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 1994 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), 1995 shift_amount); 1996 NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); 1997 } 1998 break; 1999 case Instruction::USHR_LONG: 2000 case Instruction::USHR_LONG_2ADDR: 2001 if (shift_amount == 32) { 2002 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 2003 LoadConstant(rl_result.reg.GetHigh(), 0); 2004 } else if (shift_amount > 31) { 2005 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); 2006 NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); 2007 LoadConstant(rl_result.reg.GetHigh(), 0); 2008 } else { 2009 OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); 2010 OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); 2011 NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), 2012 shift_amount); 2013 NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); 2014 } 2015 break; 2016 default: 2017 LOG(FATAL) << "Unexpected case"; 2018 } 2019 } 2020 return rl_result; 2021} 2022 2023void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, 2024 RegLocation rl_src, RegLocation rl_shift) { 2025 // Per spec, we only care about low 6 bits of shift amount. 2026 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; 2027 if (shift_amount == 0) { 2028 rl_src = LoadValueWide(rl_src, kCoreReg); 2029 StoreValueWide(rl_dest, rl_src); 2030 return; 2031 } else if (shift_amount == 1 && 2032 (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { 2033 // Need to handle this here to avoid calling StoreValueWide twice. 2034 GenAddLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src); 2035 return; 2036 } 2037 if (BadOverlap(rl_src, rl_dest)) { 2038 GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); 2039 return; 2040 } 2041 rl_src = LoadValueWide(rl_src, kCoreReg); 2042 RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount); 2043 StoreValueWide(rl_dest, rl_result); 2044} 2045 2046void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, 2047 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 2048 bool isConstSuccess = false; 2049 switch (opcode) { 2050 case Instruction::ADD_LONG: 2051 case Instruction::AND_LONG: 2052 case Instruction::OR_LONG: 2053 case Instruction::XOR_LONG: 2054 if (rl_src2.is_const) { 2055 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 2056 } else { 2057 DCHECK(rl_src1.is_const); 2058 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 2059 } 2060 break; 2061 case Instruction::SUB_LONG: 2062 case Instruction::SUB_LONG_2ADDR: 2063 if (rl_src2.is_const) { 2064 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 2065 } else { 2066 GenSubLong(opcode, rl_dest, rl_src1, rl_src2); 2067 isConstSuccess = true; 2068 } 2069 break; 2070 case Instruction::ADD_LONG_2ADDR: 2071 case Instruction::OR_LONG_2ADDR: 2072 case Instruction::XOR_LONG_2ADDR: 2073 case Instruction::AND_LONG_2ADDR: 2074 if (rl_src2.is_const) { 2075 if (GenerateTwoOperandInstructions()) { 2076 isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode); 2077 } else { 2078 isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); 2079 } 2080 } else { 2081 DCHECK(rl_src1.is_const); 2082 isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); 2083 } 2084 break; 2085 default: 2086 isConstSuccess = false; 2087 break; 2088 } 2089 2090 if (!isConstSuccess) { 2091 // Default - bail to non-const handler. 2092 GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2); 2093 } 2094} 2095 2096bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { 2097 switch (op) { 2098 case Instruction::AND_LONG_2ADDR: 2099 case Instruction::AND_LONG: 2100 return value == -1; 2101 case Instruction::OR_LONG: 2102 case Instruction::OR_LONG_2ADDR: 2103 case Instruction::XOR_LONG: 2104 case Instruction::XOR_LONG_2ADDR: 2105 return value == 0; 2106 default: 2107 return false; 2108 } 2109} 2110 2111X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, 2112 bool is_high_op) { 2113 bool rhs_in_mem = rhs.location != kLocPhysReg; 2114 bool dest_in_mem = dest.location != kLocPhysReg; 2115 bool is64Bit = cu_->target64; 2116 DCHECK(!rhs_in_mem || !dest_in_mem); 2117 switch (op) { 2118 case Instruction::ADD_LONG: 2119 case Instruction::ADD_LONG_2ADDR: 2120 if (dest_in_mem) { 2121 return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR; 2122 } else if (rhs_in_mem) { 2123 return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM; 2124 } 2125 return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR; 2126 case Instruction::SUB_LONG: 2127 case Instruction::SUB_LONG_2ADDR: 2128 if (dest_in_mem) { 2129 return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR; 2130 } else if (rhs_in_mem) { 2131 return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM; 2132 } 2133 return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR; 2134 case Instruction::AND_LONG_2ADDR: 2135 case Instruction::AND_LONG: 2136 if (dest_in_mem) { 2137 return is64Bit ? kX86And64MR : kX86And32MR; 2138 } 2139 if (is64Bit) { 2140 return rhs_in_mem ? kX86And64RM : kX86And64RR; 2141 } 2142 return rhs_in_mem ? kX86And32RM : kX86And32RR; 2143 case Instruction::OR_LONG: 2144 case Instruction::OR_LONG_2ADDR: 2145 if (dest_in_mem) { 2146 return is64Bit ? kX86Or64MR : kX86Or32MR; 2147 } 2148 if (is64Bit) { 2149 return rhs_in_mem ? kX86Or64RM : kX86Or64RR; 2150 } 2151 return rhs_in_mem ? kX86Or32RM : kX86Or32RR; 2152 case Instruction::XOR_LONG: 2153 case Instruction::XOR_LONG_2ADDR: 2154 if (dest_in_mem) { 2155 return is64Bit ? kX86Xor64MR : kX86Xor32MR; 2156 } 2157 if (is64Bit) { 2158 return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR; 2159 } 2160 return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; 2161 default: 2162 LOG(FATAL) << "Unexpected opcode: " << op; 2163 return kX86Add32RR; 2164 } 2165} 2166 2167X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, 2168 int32_t value) { 2169 bool in_mem = loc.location != kLocPhysReg; 2170 bool is64Bit = cu_->target64; 2171 bool byte_imm = IS_SIMM8(value); 2172 DCHECK(in_mem || !loc.reg.IsFloat()); 2173 switch (op) { 2174 case Instruction::ADD_LONG: 2175 case Instruction::ADD_LONG_2ADDR: 2176 if (byte_imm) { 2177 if (in_mem) { 2178 return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8; 2179 } 2180 return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8; 2181 } 2182 if (in_mem) { 2183 return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI; 2184 } 2185 return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI; 2186 case Instruction::SUB_LONG: 2187 case Instruction::SUB_LONG_2ADDR: 2188 if (byte_imm) { 2189 if (in_mem) { 2190 return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; 2191 } 2192 return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; 2193 } 2194 if (in_mem) { 2195 return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI; 2196 } 2197 return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI; 2198 case Instruction::AND_LONG_2ADDR: 2199 case Instruction::AND_LONG: 2200 if (byte_imm) { 2201 if (is64Bit) { 2202 return in_mem ? kX86And64MI8 : kX86And64RI8; 2203 } 2204 return in_mem ? kX86And32MI8 : kX86And32RI8; 2205 } 2206 if (is64Bit) { 2207 return in_mem ? kX86And64MI : kX86And64RI; 2208 } 2209 return in_mem ? kX86And32MI : kX86And32RI; 2210 case Instruction::OR_LONG: 2211 case Instruction::OR_LONG_2ADDR: 2212 if (byte_imm) { 2213 if (is64Bit) { 2214 return in_mem ? kX86Or64MI8 : kX86Or64RI8; 2215 } 2216 return in_mem ? kX86Or32MI8 : kX86Or32RI8; 2217 } 2218 if (is64Bit) { 2219 return in_mem ? kX86Or64MI : kX86Or64RI; 2220 } 2221 return in_mem ? kX86Or32MI : kX86Or32RI; 2222 case Instruction::XOR_LONG: 2223 case Instruction::XOR_LONG_2ADDR: 2224 if (byte_imm) { 2225 if (is64Bit) { 2226 return in_mem ? kX86Xor64MI8 : kX86Xor64RI8; 2227 } 2228 return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; 2229 } 2230 if (is64Bit) { 2231 return in_mem ? kX86Xor64MI : kX86Xor64RI; 2232 } 2233 return in_mem ? kX86Xor32MI : kX86Xor32RI; 2234 default: 2235 LOG(FATAL) << "Unexpected opcode: " << op; 2236 return kX86Add32MI; 2237 } 2238} 2239 2240bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { 2241 DCHECK(rl_src.is_const); 2242 int64_t val = mir_graph_->ConstantValueWide(rl_src); 2243 2244 if (cu_->target64) { 2245 // We can do with imm only if it fits 32 bit 2246 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { 2247 return false; 2248 } 2249 2250 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); 2251 2252 if ((rl_dest.location == kLocDalvikFrame) || 2253 (rl_dest.location == kLocCompilerTemp)) { 2254 int r_base = rs_rX86_SP.GetReg(); 2255 int displacement = SRegOffset(rl_dest.s_reg_low); 2256 2257 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2258 X86OpCode x86op = GetOpcode(op, rl_dest, false, val); 2259 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); 2260 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2261 true /* is_load */, true /* is64bit */); 2262 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2263 false /* is_load */, true /* is64bit */); 2264 return true; 2265 } 2266 2267 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2268 DCHECK_EQ(rl_result.location, kLocPhysReg); 2269 DCHECK(!rl_result.reg.IsFloat()); 2270 2271 X86OpCode x86op = GetOpcode(op, rl_result, false, val); 2272 NewLIR2(x86op, rl_result.reg.GetReg(), val); 2273 2274 StoreValueWide(rl_dest, rl_result); 2275 return true; 2276 } 2277 2278 int32_t val_lo = Low32Bits(val); 2279 int32_t val_hi = High32Bits(val); 2280 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); 2281 2282 // Can we just do this into memory? 2283 if ((rl_dest.location == kLocDalvikFrame) || 2284 (rl_dest.location == kLocCompilerTemp)) { 2285 int r_base = rs_rX86_SP.GetReg(); 2286 int displacement = SRegOffset(rl_dest.s_reg_low); 2287 2288 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2289 if (!IsNoOp(op, val_lo)) { 2290 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 2291 LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); 2292 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2293 true /* is_load */, true /* is64bit */); 2294 AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, 2295 false /* is_load */, true /* is64bit */); 2296 } 2297 if (!IsNoOp(op, val_hi)) { 2298 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 2299 LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi); 2300 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 2301 true /* is_load */, true /* is64bit */); 2302 AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, 2303 false /* is_load */, true /* is64bit */); 2304 } 2305 return true; 2306 } 2307 2308 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2309 DCHECK_EQ(rl_result.location, kLocPhysReg); 2310 DCHECK(!rl_result.reg.IsFloat()); 2311 2312 if (!IsNoOp(op, val_lo)) { 2313 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 2314 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); 2315 } 2316 if (!IsNoOp(op, val_hi)) { 2317 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 2318 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); 2319 } 2320 StoreValueWide(rl_dest, rl_result); 2321 return true; 2322} 2323 2324bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, 2325 RegLocation rl_src2, Instruction::Code op) { 2326 DCHECK(rl_src2.is_const); 2327 int64_t val = mir_graph_->ConstantValueWide(rl_src2); 2328 2329 if (cu_->target64) { 2330 // We can do with imm only if it fits 32 bit 2331 if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { 2332 return false; 2333 } 2334 if (rl_dest.location == kLocPhysReg && 2335 rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) { 2336 X86OpCode x86op = GetOpcode(op, rl_dest, false, val); 2337 OpRegCopy(rl_dest.reg, rl_src1.reg); 2338 NewLIR2(x86op, rl_dest.reg.GetReg(), val); 2339 StoreFinalValueWide(rl_dest, rl_dest); 2340 return true; 2341 } 2342 2343 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 2344 // We need the values to be in a temporary 2345 RegLocation rl_result = ForceTempWide(rl_src1); 2346 2347 X86OpCode x86op = GetOpcode(op, rl_result, false, val); 2348 NewLIR2(x86op, rl_result.reg.GetReg(), val); 2349 2350 StoreFinalValueWide(rl_dest, rl_result); 2351 return true; 2352 } 2353 2354 int32_t val_lo = Low32Bits(val); 2355 int32_t val_hi = High32Bits(val); 2356 rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); 2357 rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); 2358 2359 // Can we do this directly into the destination registers? 2360 if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && 2361 rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() && 2362 rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) { 2363 if (!IsNoOp(op, val_lo)) { 2364 X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); 2365 NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo); 2366 } 2367 if (!IsNoOp(op, val_hi)) { 2368 X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); 2369 NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi); 2370 } 2371 2372 StoreFinalValueWide(rl_dest, rl_dest); 2373 return true; 2374 } 2375 2376 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 2377 DCHECK_EQ(rl_src1.location, kLocPhysReg); 2378 2379 // We need the values to be in a temporary 2380 RegLocation rl_result = ForceTempWide(rl_src1); 2381 if (!IsNoOp(op, val_lo)) { 2382 X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); 2383 NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); 2384 } 2385 if (!IsNoOp(op, val_hi)) { 2386 X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); 2387 NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); 2388 } 2389 2390 StoreFinalValueWide(rl_dest, rl_result); 2391 return true; 2392} 2393 2394// For final classes there are no sub-classes to check and so we can answer the instance-of 2395// question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. 2396void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, 2397 RegLocation rl_dest, RegLocation rl_src) { 2398 RegLocation object = LoadValue(rl_src, kRefReg); 2399 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 2400 RegStorage result_reg = rl_result.reg; 2401 2402 // For 32-bit, SETcc only works with EAX..EDX. 2403 RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg; 2404 if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) { 2405 result_reg = AllocateByteRegister(); 2406 } 2407 2408 // Assume that there is no match. 2409 LoadConstant(result_reg, 0); 2410 LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); 2411 2412 // We will use this register to compare to memory below. 2413 // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode). 2414 // For this reason, force allocation of a 32 bit register to use, so that the 2415 // compare to memory will be done using a 32 bit comparision. 2416 // The LoadRefDisp(s) below will work normally, even in 64 bit mode. 2417 RegStorage check_class = AllocTemp(); 2418 2419 // If Method* is already in a register, we can save a copy. 2420 RegLocation rl_method = mir_graph_->GetMethodLoc(); 2421 int32_t offset_of_type = mirror::Array::DataOffset( 2422 sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + 2423 (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx); 2424 2425 if (rl_method.location == kLocPhysReg) { 2426 if (use_declaring_class) { 2427 LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 2428 check_class, kNotVolatile); 2429 } else { 2430 LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 2431 check_class, kNotVolatile); 2432 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); 2433 } 2434 } else { 2435 LoadCurrMethodDirect(check_class); 2436 if (use_declaring_class) { 2437 LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), 2438 check_class, kNotVolatile); 2439 } else { 2440 LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), 2441 check_class, kNotVolatile); 2442 LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); 2443 } 2444 } 2445 2446 // Compare the computed class to the class in the object. 2447 DCHECK_EQ(object.location, kLocPhysReg); 2448 OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value()); 2449 2450 // Set the low byte of the result to 0 or 1 from the compare condition code. 2451 NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq); 2452 2453 LIR* target = NewLIR0(kPseudoTargetLabel); 2454 null_branchover->target = target; 2455 FreeTemp(check_class); 2456 if (IsTemp(result_reg)) { 2457 OpRegCopy(rl_result.reg, result_reg); 2458 FreeTemp(result_reg); 2459 } 2460 StoreValue(rl_dest, rl_result); 2461} 2462 2463void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, 2464 RegLocation rl_lhs, RegLocation rl_rhs) { 2465 OpKind op = kOpBkpt; 2466 bool is_div_rem = false; 2467 bool unary = false; 2468 bool shift_op = false; 2469 bool is_two_addr = false; 2470 RegLocation rl_result; 2471 switch (opcode) { 2472 case Instruction::NEG_INT: 2473 op = kOpNeg; 2474 unary = true; 2475 break; 2476 case Instruction::NOT_INT: 2477 op = kOpMvn; 2478 unary = true; 2479 break; 2480 case Instruction::ADD_INT_2ADDR: 2481 is_two_addr = true; 2482 // Fallthrough 2483 case Instruction::ADD_INT: 2484 op = kOpAdd; 2485 break; 2486 case Instruction::SUB_INT_2ADDR: 2487 is_two_addr = true; 2488 // Fallthrough 2489 case Instruction::SUB_INT: 2490 op = kOpSub; 2491 break; 2492 case Instruction::MUL_INT_2ADDR: 2493 is_two_addr = true; 2494 // Fallthrough 2495 case Instruction::MUL_INT: 2496 op = kOpMul; 2497 break; 2498 case Instruction::DIV_INT_2ADDR: 2499 is_two_addr = true; 2500 // Fallthrough 2501 case Instruction::DIV_INT: 2502 op = kOpDiv; 2503 is_div_rem = true; 2504 break; 2505 /* NOTE: returns in kArg1 */ 2506 case Instruction::REM_INT_2ADDR: 2507 is_two_addr = true; 2508 // Fallthrough 2509 case Instruction::REM_INT: 2510 op = kOpRem; 2511 is_div_rem = true; 2512 break; 2513 case Instruction::AND_INT_2ADDR: 2514 is_two_addr = true; 2515 // Fallthrough 2516 case Instruction::AND_INT: 2517 op = kOpAnd; 2518 break; 2519 case Instruction::OR_INT_2ADDR: 2520 is_two_addr = true; 2521 // Fallthrough 2522 case Instruction::OR_INT: 2523 op = kOpOr; 2524 break; 2525 case Instruction::XOR_INT_2ADDR: 2526 is_two_addr = true; 2527 // Fallthrough 2528 case Instruction::XOR_INT: 2529 op = kOpXor; 2530 break; 2531 case Instruction::SHL_INT_2ADDR: 2532 is_two_addr = true; 2533 // Fallthrough 2534 case Instruction::SHL_INT: 2535 shift_op = true; 2536 op = kOpLsl; 2537 break; 2538 case Instruction::SHR_INT_2ADDR: 2539 is_two_addr = true; 2540 // Fallthrough 2541 case Instruction::SHR_INT: 2542 shift_op = true; 2543 op = kOpAsr; 2544 break; 2545 case Instruction::USHR_INT_2ADDR: 2546 is_two_addr = true; 2547 // Fallthrough 2548 case Instruction::USHR_INT: 2549 shift_op = true; 2550 op = kOpLsr; 2551 break; 2552 default: 2553 LOG(FATAL) << "Invalid word arith op: " << opcode; 2554 } 2555 2556 // Can we convert to a two address instruction? 2557 if (!is_two_addr && 2558 (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 2559 mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { 2560 is_two_addr = true; 2561 } 2562 2563 if (!GenerateTwoOperandInstructions()) { 2564 is_two_addr = false; 2565 } 2566 2567 // Get the div/rem stuff out of the way. 2568 if (is_div_rem) { 2569 rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, true); 2570 StoreValue(rl_dest, rl_result); 2571 return; 2572 } 2573 2574 // If we generate any memory access below, it will reference a dalvik reg. 2575 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2576 2577 if (unary) { 2578 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2579 rl_result = UpdateLocTyped(rl_dest, kCoreReg); 2580 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2581 OpRegReg(op, rl_result.reg, rl_lhs.reg); 2582 } else { 2583 if (shift_op) { 2584 // X86 doesn't require masking and must use ECX. 2585 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX 2586 LoadValueDirectFixed(rl_rhs, t_reg); 2587 if (is_two_addr) { 2588 // Can we do this directly into memory? 2589 rl_result = UpdateLocTyped(rl_dest, kCoreReg); 2590 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2591 if (rl_result.location != kLocPhysReg) { 2592 // Okay, we can do this into memory 2593 OpMemReg(op, rl_result, t_reg.GetReg()); 2594 FreeTemp(t_reg); 2595 return; 2596 } else if (!rl_result.reg.IsFloat()) { 2597 // Can do this directly into the result register 2598 OpRegReg(op, rl_result.reg, t_reg); 2599 FreeTemp(t_reg); 2600 StoreFinalValue(rl_dest, rl_result); 2601 return; 2602 } 2603 } 2604 // Three address form, or we can't do directly. 2605 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2606 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2607 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg); 2608 FreeTemp(t_reg); 2609 } else { 2610 // Multiply is 3 operand only (sort of). 2611 if (is_two_addr && op != kOpMul) { 2612 // Can we do this directly into memory? 2613 rl_result = UpdateLocTyped(rl_dest, kCoreReg); 2614 if (rl_result.location == kLocPhysReg) { 2615 // Ensure res is in a core reg 2616 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2617 // Can we do this from memory directly? 2618 rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg); 2619 if (rl_rhs.location != kLocPhysReg) { 2620 OpRegMem(op, rl_result.reg, rl_rhs); 2621 StoreFinalValue(rl_dest, rl_result); 2622 return; 2623 } else if (!rl_rhs.reg.IsFloat()) { 2624 OpRegReg(op, rl_result.reg, rl_rhs.reg); 2625 StoreFinalValue(rl_dest, rl_result); 2626 return; 2627 } 2628 } 2629 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2630 // It might happen rl_rhs and rl_dest are the same VR 2631 // in this case rl_dest is in reg after LoadValue while 2632 // rl_result is not updated yet, so do this 2633 rl_result = UpdateLocTyped(rl_dest, kCoreReg); 2634 if (rl_result.location != kLocPhysReg) { 2635 // Okay, we can do this into memory. 2636 OpMemReg(op, rl_result, rl_rhs.reg.GetReg()); 2637 return; 2638 } else if (!rl_result.reg.IsFloat()) { 2639 // Can do this directly into the result register. 2640 OpRegReg(op, rl_result.reg, rl_rhs.reg); 2641 StoreFinalValue(rl_dest, rl_result); 2642 return; 2643 } else { 2644 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2645 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2646 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2647 } 2648 } else { 2649 // Try to use reg/memory instructions. 2650 rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg); 2651 rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg); 2652 // We can't optimize with FP registers. 2653 if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { 2654 // Something is difficult, so fall back to the standard case. 2655 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2656 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2657 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2658 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2659 } else { 2660 // We can optimize by moving to result and using memory operands. 2661 if (rl_rhs.location != kLocPhysReg) { 2662 // Force LHS into result. 2663 // We should be careful with order here 2664 // If rl_dest and rl_lhs points to the same VR we should load first 2665 // If the are different we should find a register first for dest 2666 if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == 2667 mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { 2668 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2669 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2670 // No-op if these are the same. 2671 OpRegCopy(rl_result.reg, rl_lhs.reg); 2672 } else { 2673 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2674 LoadValueDirect(rl_lhs, rl_result.reg); 2675 } 2676 OpRegMem(op, rl_result.reg, rl_rhs); 2677 } else if (rl_lhs.location != kLocPhysReg) { 2678 // RHS is in a register; LHS is in memory. 2679 if (op != kOpSub) { 2680 // Force RHS into result and operate on memory. 2681 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2682 OpRegCopy(rl_result.reg, rl_rhs.reg); 2683 OpRegMem(op, rl_result.reg, rl_lhs); 2684 } else { 2685 // Subtraction isn't commutative. 2686 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2687 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2688 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2689 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2690 } 2691 } else { 2692 // Both are in registers. 2693 rl_lhs = LoadValue(rl_lhs, kCoreReg); 2694 rl_rhs = LoadValue(rl_rhs, kCoreReg); 2695 rl_result = EvalLoc(rl_dest, kCoreReg, true); 2696 OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); 2697 } 2698 } 2699 } 2700 } 2701 } 2702 StoreValue(rl_dest, rl_result); 2703} 2704 2705bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { 2706 // If we have non-core registers, then we can't do good things. 2707 if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) { 2708 return false; 2709 } 2710 if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) { 2711 return false; 2712 } 2713 2714 // Everything will be fine :-). 2715 return true; 2716} 2717 2718void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { 2719 if (!cu_->target64) { 2720 Mir2Lir::GenIntToLong(rl_dest, rl_src); 2721 return; 2722 } 2723 rl_src = UpdateLocTyped(rl_src, kCoreReg); 2724 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 2725 if (rl_src.location == kLocPhysReg) { 2726 NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 2727 } else { 2728 int displacement = SRegOffset(rl_src.s_reg_low); 2729 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2730 LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(), 2731 displacement + LOWORD_OFFSET); 2732 AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, 2733 true /* is_load */, true /* is_64bit */); 2734 } 2735 StoreValueWide(rl_dest, rl_result); 2736} 2737 2738void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, 2739 RegLocation rl_src1, RegLocation rl_shift) { 2740 if (!cu_->target64) { 2741 Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift); 2742 return; 2743 } 2744 2745 bool is_two_addr = false; 2746 OpKind op = kOpBkpt; 2747 RegLocation rl_result; 2748 2749 switch (opcode) { 2750 case Instruction::SHL_LONG_2ADDR: 2751 is_two_addr = true; 2752 // Fallthrough 2753 case Instruction::SHL_LONG: 2754 op = kOpLsl; 2755 break; 2756 case Instruction::SHR_LONG_2ADDR: 2757 is_two_addr = true; 2758 // Fallthrough 2759 case Instruction::SHR_LONG: 2760 op = kOpAsr; 2761 break; 2762 case Instruction::USHR_LONG_2ADDR: 2763 is_two_addr = true; 2764 // Fallthrough 2765 case Instruction::USHR_LONG: 2766 op = kOpLsr; 2767 break; 2768 default: 2769 op = kOpBkpt; 2770 } 2771 2772 // X86 doesn't require masking and must use ECX. 2773 RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX 2774 LoadValueDirectFixed(rl_shift, t_reg); 2775 if (is_two_addr) { 2776 // Can we do this directly into memory? 2777 rl_result = UpdateLocWideTyped(rl_dest, kCoreReg); 2778 if (rl_result.location != kLocPhysReg) { 2779 // Okay, we can do this into memory 2780 ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); 2781 OpMemReg(op, rl_result, t_reg.GetReg()); 2782 } else if (!rl_result.reg.IsFloat()) { 2783 // Can do this directly into the result register 2784 OpRegReg(op, rl_result.reg, t_reg); 2785 StoreFinalValueWide(rl_dest, rl_result); 2786 } 2787 } else { 2788 // Three address form, or we can't do directly. 2789 rl_src1 = LoadValueWide(rl_src1, kCoreReg); 2790 rl_result = EvalLocWide(rl_dest, kCoreReg, true); 2791 OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg); 2792 StoreFinalValueWide(rl_dest, rl_result); 2793 } 2794 2795 FreeTemp(t_reg); 2796} 2797 2798} // namespace art 2799