fp_x86.cc revision 6170f5576f46f64eec6cd6c125c3de001afa5bcf
1/* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "codegen_x86.h" 18#include "dex/quick/mir_to_lir-inl.h" 19#include "x86_lir.h" 20 21namespace art { 22 23void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode, 24 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 25 X86OpCode op = kX86Nop; 26 RegLocation rl_result; 27 28 /* 29 * Don't attempt to optimize register usage since these opcodes call out to 30 * the handlers. 31 */ 32 switch (opcode) { 33 case Instruction::ADD_FLOAT_2ADDR: 34 case Instruction::ADD_FLOAT: 35 op = kX86AddssRR; 36 break; 37 case Instruction::SUB_FLOAT_2ADDR: 38 case Instruction::SUB_FLOAT: 39 op = kX86SubssRR; 40 break; 41 case Instruction::DIV_FLOAT_2ADDR: 42 case Instruction::DIV_FLOAT: 43 op = kX86DivssRR; 44 break; 45 case Instruction::MUL_FLOAT_2ADDR: 46 case Instruction::MUL_FLOAT: 47 op = kX86MulssRR; 48 break; 49 case Instruction::REM_FLOAT_2ADDR: 50 case Instruction::REM_FLOAT: 51 FlushAllRegs(); // Send everything to home location 52 CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(pFmodf), rl_src1, rl_src2, 53 false); 54 rl_result = GetReturn(true); 55 StoreValue(rl_dest, rl_result); 56 return; 57 case Instruction::NEG_FLOAT: 58 GenNegFloat(rl_dest, rl_src1); 59 return; 60 default: 61 LOG(FATAL) << "Unexpected opcode: " << opcode; 62 } 63 rl_src1 = LoadValue(rl_src1, kFPReg); 64 rl_src2 = LoadValue(rl_src2, kFPReg); 65 rl_result = EvalLoc(rl_dest, kFPReg, true); 66 RegStorage r_dest = rl_result.reg; 67 RegStorage r_src1 = rl_src1.reg; 68 RegStorage r_src2 = rl_src2.reg; 69 if (r_dest == r_src2) { 70 r_src2 = AllocTempFloat(); 71 OpRegCopy(r_src2, r_dest); 72 } 73 OpRegCopy(r_dest, r_src1); 74 NewLIR2(op, r_dest.GetReg(), r_src2.GetReg()); 75 StoreValue(rl_dest, rl_result); 76} 77 78void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode, 79 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 80 X86OpCode op = kX86Nop; 81 RegLocation rl_result; 82 83 switch (opcode) { 84 case Instruction::ADD_DOUBLE_2ADDR: 85 case Instruction::ADD_DOUBLE: 86 op = kX86AddsdRR; 87 break; 88 case Instruction::SUB_DOUBLE_2ADDR: 89 case Instruction::SUB_DOUBLE: 90 op = kX86SubsdRR; 91 break; 92 case Instruction::DIV_DOUBLE_2ADDR: 93 case Instruction::DIV_DOUBLE: 94 op = kX86DivsdRR; 95 break; 96 case Instruction::MUL_DOUBLE_2ADDR: 97 case Instruction::MUL_DOUBLE: 98 op = kX86MulsdRR; 99 break; 100 case Instruction::REM_DOUBLE_2ADDR: 101 case Instruction::REM_DOUBLE: 102 FlushAllRegs(); // Send everything to home location 103 CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(pFmod), rl_src1, rl_src2, 104 false); 105 rl_result = GetReturnWide(true); 106 StoreValueWide(rl_dest, rl_result); 107 return; 108 case Instruction::NEG_DOUBLE: 109 GenNegDouble(rl_dest, rl_src1); 110 return; 111 default: 112 LOG(FATAL) << "Unexpected opcode: " << opcode; 113 } 114 rl_src1 = LoadValueWide(rl_src1, kFPReg); 115 DCHECK(rl_src1.wide); 116 rl_src2 = LoadValueWide(rl_src2, kFPReg); 117 DCHECK(rl_src2.wide); 118 rl_result = EvalLoc(rl_dest, kFPReg, true); 119 DCHECK(rl_dest.wide); 120 DCHECK(rl_result.wide); 121 // TODO: update with direct 64-bit reg. 122 int r_dest = S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()); 123 int r_src1 = S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()); 124 int r_src2 = S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()); 125 if (r_dest == r_src2) { 126 r_src2 = AllocTempDouble().GetLowReg() | X86_FP_DOUBLE; 127 OpRegCopy(RegStorage::Solo64(r_src2), RegStorage::Solo64(r_dest)); 128 } 129 OpRegCopy(RegStorage::Solo64(r_dest), RegStorage::Solo64(r_src1)); 130 NewLIR2(op, r_dest, r_src2); 131 StoreValueWide(rl_dest, rl_result); 132} 133 134void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) { 135 // Compute offsets to the source and destination VRs on stack 136 int src_v_reg_offset = SRegOffset(rl_src.s_reg_low); 137 int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low); 138 139 // Update the in-register state of source. 140 rl_src = UpdateLocWide(rl_src); 141 142 // If the source is in physical register, then put it in its location on stack. 143 if (rl_src.location == kLocPhysReg) { 144 RegisterInfo* lo_info = GetRegInfo(rl_src.reg.GetLowReg()); 145 146 if (lo_info != nullptr && lo_info->is_temp) { 147 // Calling FlushSpecificReg because it will only write back VR if it is dirty. 148 FlushSpecificReg(lo_info); 149 // ResetDef for low/high to prevent NullifyRange from removing stores. 150 ResetDef(rl_src.reg.GetLowReg()); 151 if (rl_src.reg.GetLowReg() != rl_src.reg.GetHighReg() && GetRegInfo(rl_src.reg.GetHighReg()) != nullptr) { 152 ResetDef(rl_src.reg.GetHighReg()); 153 } 154 } else { 155 // It must have been register promoted if it is not a temp but is still in physical 156 // register. Since we need it to be in memory to convert, we place it there now. 157 StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.reg); 158 } 159 } 160 161 // Push the source virtual register onto the x87 stack. 162 LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp).GetReg(), src_v_reg_offset + LOWORD_OFFSET); 163 AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2, 164 true /* is_load */, true /* is64bit */); 165 166 // Now pop off x87 stack and store it in the destination VR's stack location. 167 int opcode = is_double ? kX86Fstp64M : kX86Fstp32M; 168 int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset; 169 LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement); 170 AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double); 171 172 /* 173 * The result is in a physical register if it was in a temp or was register 174 * promoted. For that reason it is enough to check if it is in physical 175 * register. If it is, then we must do all of the bookkeeping necessary to 176 * invalidate temp (if needed) and load in promoted register (if needed). 177 * If the result's location is in memory, then we do not need to do anything 178 * more since the fstp has already placed the correct value in memory. 179 */ 180 RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest); 181 if (rl_result.location == kLocPhysReg) { 182 /* 183 * We already know that the result is in a physical register but do not know if it is the 184 * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the 185 * correct register class. 186 */ 187 if (is_double) { 188 rl_result = EvalLocWide(rl_dest, kFPReg, true); 189 190 LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, INVALID_SREG); 191 192 StoreFinalValueWide(rl_dest, rl_result); 193 } else { 194 rl_result = EvalLoc(rl_dest, kFPReg, true); 195 196 LoadWordDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg); 197 198 StoreFinalValue(rl_dest, rl_result); 199 } 200 } 201} 202 203void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, 204 RegLocation rl_src) { 205 RegisterClass rcSrc = kFPReg; 206 X86OpCode op = kX86Nop; 207 int src_reg; 208 RegLocation rl_result; 209 switch (opcode) { 210 case Instruction::INT_TO_FLOAT: 211 rcSrc = kCoreReg; 212 op = kX86Cvtsi2ssRR; 213 break; 214 case Instruction::DOUBLE_TO_FLOAT: 215 rcSrc = kFPReg; 216 op = kX86Cvtsd2ssRR; 217 break; 218 case Instruction::FLOAT_TO_DOUBLE: 219 rcSrc = kFPReg; 220 op = kX86Cvtss2sdRR; 221 break; 222 case Instruction::INT_TO_DOUBLE: 223 rcSrc = kCoreReg; 224 op = kX86Cvtsi2sdRR; 225 break; 226 case Instruction::FLOAT_TO_INT: { 227 rl_src = LoadValue(rl_src, kFPReg); 228 src_reg = rl_src.reg.GetReg(); 229 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 230 ClobberSReg(rl_dest.s_reg_low); 231 rl_result = EvalLoc(rl_dest, kCoreReg, true); 232 int temp_reg = AllocTempFloat().GetReg(); 233 234 LoadConstant(rl_result.reg, 0x7fffffff); 235 NewLIR2(kX86Cvtsi2ssRR, temp_reg, rl_result.reg.GetReg()); 236 NewLIR2(kX86ComissRR, src_reg, temp_reg); 237 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); 238 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); 239 NewLIR2(kX86Cvttss2siRR, rl_result.reg.GetReg(), src_reg); 240 LIR* branch_normal = NewLIR1(kX86Jmp8, 0); 241 branch_na_n->target = NewLIR0(kPseudoTargetLabel); 242 NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 243 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); 244 branch_normal->target = NewLIR0(kPseudoTargetLabel); 245 StoreValue(rl_dest, rl_result); 246 return; 247 } 248 case Instruction::DOUBLE_TO_INT: { 249 rl_src = LoadValueWide(rl_src, kFPReg); 250 src_reg = rl_src.reg.GetLowReg(); 251 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 252 ClobberSReg(rl_dest.s_reg_low); 253 rl_result = EvalLoc(rl_dest, kCoreReg, true); 254 int temp_reg = AllocTempDouble().GetLowReg() | X86_FP_DOUBLE; 255 256 LoadConstant(rl_result.reg, 0x7fffffff); 257 NewLIR2(kX86Cvtsi2sdRR, temp_reg, rl_result.reg.GetReg()); 258 NewLIR2(kX86ComisdRR, src_reg, temp_reg); 259 LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA); 260 LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP); 261 NewLIR2(kX86Cvttsd2siRR, rl_result.reg.GetReg(), src_reg); 262 LIR* branch_normal = NewLIR1(kX86Jmp8, 0); 263 branch_na_n->target = NewLIR0(kPseudoTargetLabel); 264 NewLIR2(kX86Xor32RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); 265 branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel); 266 branch_normal->target = NewLIR0(kPseudoTargetLabel); 267 StoreValue(rl_dest, rl_result); 268 return; 269 } 270 case Instruction::LONG_TO_DOUBLE: 271 GenLongToFP(rl_dest, rl_src, true /* is_double */); 272 return; 273 case Instruction::LONG_TO_FLOAT: 274 GenLongToFP(rl_dest, rl_src, false /* is_double */); 275 return; 276 case Instruction::FLOAT_TO_LONG: 277 GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src); 278 return; 279 case Instruction::DOUBLE_TO_LONG: 280 GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pD2l), rl_dest, rl_src); 281 return; 282 default: 283 LOG(INFO) << "Unexpected opcode: " << opcode; 284 } 285 if (rl_src.wide) { 286 rl_src = LoadValueWide(rl_src, rcSrc); 287 src_reg = S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg()); 288 } else { 289 rl_src = LoadValue(rl_src, rcSrc); 290 src_reg = rl_src.reg.GetReg(); 291 } 292 if (rl_dest.wide) { 293 rl_result = EvalLoc(rl_dest, kFPReg, true); 294 NewLIR2(op, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), src_reg); 295 StoreValueWide(rl_dest, rl_result); 296 } else { 297 rl_result = EvalLoc(rl_dest, kFPReg, true); 298 NewLIR2(op, rl_result.reg.GetReg(), src_reg); 299 StoreValue(rl_dest, rl_result); 300 } 301} 302 303void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, 304 RegLocation rl_src1, RegLocation rl_src2) { 305 bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT); 306 bool unordered_gt = (code == Instruction::CMPG_DOUBLE) || (code == Instruction::CMPG_FLOAT); 307 int src_reg1; 308 int src_reg2; 309 if (single) { 310 rl_src1 = LoadValue(rl_src1, kFPReg); 311 src_reg1 = rl_src1.reg.GetReg(); 312 rl_src2 = LoadValue(rl_src2, kFPReg); 313 src_reg2 = rl_src2.reg.GetReg(); 314 } else { 315 rl_src1 = LoadValueWide(rl_src1, kFPReg); 316 src_reg1 = S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()); 317 rl_src2 = LoadValueWide(rl_src2, kFPReg); 318 src_reg2 = S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg()); 319 } 320 // In case result vreg is also src vreg, break association to avoid useless copy by EvalLoc() 321 ClobberSReg(rl_dest.s_reg_low); 322 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 323 LoadConstantNoClobber(rl_result.reg, unordered_gt ? 1 : 0); 324 if (single) { 325 NewLIR2(kX86UcomissRR, src_reg1, src_reg2); 326 } else { 327 NewLIR2(kX86UcomisdRR, src_reg1, src_reg2); 328 } 329 LIR* branch = NULL; 330 if (unordered_gt) { 331 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 332 } 333 // If the result reg can't be byte accessed, use a jump and move instead of a set. 334 if (rl_result.reg.GetReg() >= 4) { 335 LIR* branch2 = NULL; 336 if (unordered_gt) { 337 branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA); 338 NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x0); 339 } else { 340 branch2 = NewLIR2(kX86Jcc8, 0, kX86CondBe); 341 NewLIR2(kX86Mov32RI, rl_result.reg.GetReg(), 0x1); 342 } 343 branch2->target = NewLIR0(kPseudoTargetLabel); 344 } else { 345 NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondA /* above - unsigned > */); 346 } 347 NewLIR2(kX86Sbb32RI, rl_result.reg.GetReg(), 0); 348 if (unordered_gt) { 349 branch->target = NewLIR0(kPseudoTargetLabel); 350 } 351 StoreValue(rl_dest, rl_result); 352} 353 354void X86Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, 355 bool is_double) { 356 LIR* taken = &block_label_list_[bb->taken]; 357 LIR* not_taken = &block_label_list_[bb->fall_through]; 358 LIR* branch = NULL; 359 RegLocation rl_src1; 360 RegLocation rl_src2; 361 if (is_double) { 362 rl_src1 = mir_graph_->GetSrcWide(mir, 0); 363 rl_src2 = mir_graph_->GetSrcWide(mir, 2); 364 rl_src1 = LoadValueWide(rl_src1, kFPReg); 365 rl_src2 = LoadValueWide(rl_src2, kFPReg); 366 NewLIR2(kX86UcomisdRR, S2d(rl_src1.reg.GetLowReg(), rl_src1.reg.GetHighReg()), 367 S2d(rl_src2.reg.GetLowReg(), rl_src2.reg.GetHighReg())); 368 } else { 369 rl_src1 = mir_graph_->GetSrc(mir, 0); 370 rl_src2 = mir_graph_->GetSrc(mir, 1); 371 rl_src1 = LoadValue(rl_src1, kFPReg); 372 rl_src2 = LoadValue(rl_src2, kFPReg); 373 NewLIR2(kX86UcomissRR, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 374 } 375 ConditionCode ccode = mir->meta.ccode; 376 switch (ccode) { 377 case kCondEq: 378 if (!gt_bias) { 379 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 380 branch->target = not_taken; 381 } 382 break; 383 case kCondNe: 384 if (!gt_bias) { 385 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 386 branch->target = taken; 387 } 388 break; 389 case kCondLt: 390 if (gt_bias) { 391 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 392 branch->target = not_taken; 393 } 394 ccode = kCondUlt; 395 break; 396 case kCondLe: 397 if (gt_bias) { 398 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 399 branch->target = not_taken; 400 } 401 ccode = kCondLs; 402 break; 403 case kCondGt: 404 if (gt_bias) { 405 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 406 branch->target = taken; 407 } 408 ccode = kCondHi; 409 break; 410 case kCondGe: 411 if (gt_bias) { 412 branch = NewLIR2(kX86Jcc8, 0, kX86CondPE); 413 branch->target = taken; 414 } 415 ccode = kCondUge; 416 break; 417 default: 418 LOG(FATAL) << "Unexpected ccode: " << ccode; 419 } 420 OpCondBranch(ccode, taken); 421} 422 423void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { 424 RegLocation rl_result; 425 rl_src = LoadValue(rl_src, kCoreReg); 426 rl_result = EvalLoc(rl_dest, kCoreReg, true); 427 OpRegRegImm(kOpAdd, rl_result.reg, rl_src.reg, 0x80000000); 428 StoreValue(rl_dest, rl_result); 429} 430 431void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { 432 RegLocation rl_result; 433 rl_src = LoadValueWide(rl_src, kCoreReg); 434 rl_result = EvalLoc(rl_dest, kCoreReg, true); 435 OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000); 436 OpRegCopy(rl_result.reg, rl_src.reg); 437 StoreValueWide(rl_dest, rl_result); 438} 439 440bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) { 441 RegLocation rl_src = info->args[0]; 442 RegLocation rl_dest = InlineTargetWide(info); // double place for result 443 rl_src = LoadValueWide(rl_src, kFPReg); 444 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 445 NewLIR2(kX86SqrtsdRR, S2d(rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg()), 446 S2d(rl_src.reg.GetLowReg(), rl_src.reg.GetHighReg())); 447 StoreValueWide(rl_dest, rl_result); 448 return true; 449} 450 451 452 453} // namespace art 454