fp_arm64.cc revision 98216e53f8d538b4386b80b896edfa20e9734827
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "arm64_lir.h" 18#include "codegen_arm64.h" 19#include "dex/quick/mir_to_lir-inl.h" 20#include "utils.h" 21 22namespace art { 23 24void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, 25 RegLocation rl_src1, RegLocation rl_src2) { 26 int op = kA64Brk1d; 27 RegLocation rl_result; 28 29 switch (opcode) { 30 case Instruction::ADD_FLOAT_2ADDR: 31 case Instruction::ADD_FLOAT: 32 op = kA64Fadd3fff; 33 break; 34 case Instruction::SUB_FLOAT_2ADDR: 35 case Instruction::SUB_FLOAT: 36 op = kA64Fsub3fff; 37 break; 38 case Instruction::DIV_FLOAT_2ADDR: 39 case Instruction::DIV_FLOAT: 40 op = kA64Fdiv3fff; 41 break; 42 case Instruction::MUL_FLOAT_2ADDR: 43 case Instruction::MUL_FLOAT: 44 op = kA64Fmul3fff; 45 break; 46 case Instruction::REM_FLOAT_2ADDR: 47 case Instruction::REM_FLOAT: 48 FlushAllRegs(); // Send everything to home location 49 CallRuntimeHelperRegLocationRegLocation(kQuickFmodf, rl_src1, rl_src2, false); 50 rl_result = GetReturn(kFPReg); 51 StoreValue(rl_dest, rl_result); 52 return; 53 case Instruction::NEG_FLOAT: 54 GenNegFloat(rl_dest, rl_src1); 55 return; 56 default: 57 LOG(FATAL) << "Unexpected opcode: " << opcode; 58 } 59 rl_src1 = LoadValue(rl_src1, kFPReg); 60 rl_src2 = LoadValue(rl_src2, kFPReg); 61 rl_result = EvalLoc(rl_dest, kFPReg, true); 62 NewLIR3(op, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 63 StoreValue(rl_dest, rl_result); 64} 65 66void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, 67 RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { 68 int op = kA64Brk1d; 69 RegLocation rl_result; 70 71 switch (opcode) { 72 case Instruction::ADD_DOUBLE_2ADDR: 73 case Instruction::ADD_DOUBLE: 74 op = kA64Fadd3fff; 75 break; 76 case Instruction::SUB_DOUBLE_2ADDR: 77 case Instruction::SUB_DOUBLE: 78 op = kA64Fsub3fff; 79 break; 80 case Instruction::DIV_DOUBLE_2ADDR: 81 case Instruction::DIV_DOUBLE: 82 op = kA64Fdiv3fff; 83 break; 84 case Instruction::MUL_DOUBLE_2ADDR: 85 case Instruction::MUL_DOUBLE: 86 op = kA64Fmul3fff; 87 break; 88 case Instruction::REM_DOUBLE_2ADDR: 89 case Instruction::REM_DOUBLE: 90 FlushAllRegs(); // Send everything to home location 91 { 92 RegStorage r_tgt = CallHelperSetup(kQuickFmod); 93 LoadValueDirectWideFixed(rl_src1, rs_d0); 94 LoadValueDirectWideFixed(rl_src2, rs_d1); 95 ClobberCallerSave(); 96 CallHelper(r_tgt, kQuickFmod, false); 97 } 98 rl_result = GetReturnWide(kFPReg); 99 StoreValueWide(rl_dest, rl_result); 100 return; 101 case Instruction::NEG_DOUBLE: 102 GenNegDouble(rl_dest, rl_src1); 103 return; 104 default: 105 LOG(FATAL) << "Unexpected opcode: " << opcode; 106 } 107 108 rl_src1 = LoadValueWide(rl_src1, kFPReg); 109 DCHECK(rl_src1.wide); 110 rl_src2 = LoadValueWide(rl_src2, kFPReg); 111 DCHECK(rl_src2.wide); 112 rl_result = EvalLoc(rl_dest, kFPReg, true); 113 DCHECK(rl_dest.wide); 114 DCHECK(rl_result.wide); 115 NewLIR3(FWIDE(op), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 116 StoreValueWide(rl_dest, rl_result); 117} 118 119void Arm64Mir2Lir::GenConversion(Instruction::Code opcode, 120 RegLocation rl_dest, RegLocation rl_src) { 121 int op = kA64Brk1d; 122 RegLocation rl_result; 123 RegisterClass src_reg_class = kInvalidRegClass; 124 RegisterClass dst_reg_class = kInvalidRegClass; 125 126 switch (opcode) { 127 case Instruction::INT_TO_FLOAT: 128 op = kA64Scvtf2fw; 129 src_reg_class = kCoreReg; 130 dst_reg_class = kFPReg; 131 break; 132 case Instruction::FLOAT_TO_INT: 133 op = kA64Fcvtzs2wf; 134 src_reg_class = kFPReg; 135 dst_reg_class = kCoreReg; 136 break; 137 case Instruction::DOUBLE_TO_FLOAT: 138 op = kA64Fcvt2sS; 139 src_reg_class = kFPReg; 140 dst_reg_class = kFPReg; 141 break; 142 case Instruction::FLOAT_TO_DOUBLE: 143 op = kA64Fcvt2Ss; 144 src_reg_class = kFPReg; 145 dst_reg_class = kFPReg; 146 break; 147 case Instruction::INT_TO_DOUBLE: 148 op = FWIDE(kA64Scvtf2fw); 149 src_reg_class = kCoreReg; 150 dst_reg_class = kFPReg; 151 break; 152 case Instruction::DOUBLE_TO_INT: 153 op = FWIDE(kA64Fcvtzs2wf); 154 src_reg_class = kFPReg; 155 dst_reg_class = kCoreReg; 156 break; 157 case Instruction::LONG_TO_DOUBLE: 158 op = FWIDE(kA64Scvtf2fx); 159 src_reg_class = kCoreReg; 160 dst_reg_class = kFPReg; 161 break; 162 case Instruction::FLOAT_TO_LONG: 163 op = kA64Fcvtzs2xf; 164 src_reg_class = kFPReg; 165 dst_reg_class = kCoreReg; 166 break; 167 case Instruction::LONG_TO_FLOAT: 168 op = kA64Scvtf2fx; 169 src_reg_class = kCoreReg; 170 dst_reg_class = kFPReg; 171 break; 172 case Instruction::DOUBLE_TO_LONG: 173 op = FWIDE(kA64Fcvtzs2xf); 174 src_reg_class = kFPReg; 175 dst_reg_class = kCoreReg; 176 break; 177 default: 178 LOG(FATAL) << "Unexpected opcode: " << opcode; 179 } 180 181 DCHECK_NE(src_reg_class, kInvalidRegClass); 182 DCHECK_NE(dst_reg_class, kInvalidRegClass); 183 DCHECK_NE(op, kA64Brk1d); 184 185 if (rl_src.wide) { 186 rl_src = LoadValueWide(rl_src, src_reg_class); 187 } else { 188 rl_src = LoadValue(rl_src, src_reg_class); 189 } 190 191 rl_result = EvalLoc(rl_dest, dst_reg_class, true); 192 NewLIR2(op, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 193 194 if (rl_dest.wide) { 195 StoreValueWide(rl_dest, rl_result); 196 } else { 197 StoreValue(rl_dest, rl_result); 198 } 199} 200 201void Arm64Mir2Lir::GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, 202 bool is_double) { 203 LIR* target = &block_label_list_[bb->taken]; 204 RegLocation rl_src1; 205 RegLocation rl_src2; 206 if (is_double) { 207 rl_src1 = mir_graph_->GetSrcWide(mir, 0); 208 rl_src2 = mir_graph_->GetSrcWide(mir, 2); 209 rl_src1 = LoadValueWide(rl_src1, kFPReg); 210 rl_src2 = LoadValueWide(rl_src2, kFPReg); 211 NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 212 } else { 213 rl_src1 = mir_graph_->GetSrc(mir, 0); 214 rl_src2 = mir_graph_->GetSrc(mir, 1); 215 rl_src1 = LoadValue(rl_src1, kFPReg); 216 rl_src2 = LoadValue(rl_src2, kFPReg); 217 NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 218 } 219 ConditionCode ccode = mir->meta.ccode; 220 switch (ccode) { 221 case kCondEq: 222 case kCondNe: 223 break; 224 case kCondLt: 225 if (gt_bias) { 226 ccode = kCondMi; 227 } 228 break; 229 case kCondLe: 230 if (gt_bias) { 231 ccode = kCondLs; 232 } 233 break; 234 case kCondGt: 235 if (gt_bias) { 236 ccode = kCondHi; 237 } 238 break; 239 case kCondGe: 240 if (gt_bias) { 241 ccode = kCondUge; 242 } 243 break; 244 default: 245 LOG(FATAL) << "Unexpected ccode: " << ccode; 246 } 247 OpCondBranch(ccode, target); 248} 249 250 251void Arm64Mir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, 252 RegLocation rl_src1, RegLocation rl_src2) { 253 bool is_double = false; 254 int default_result = -1; 255 RegLocation rl_result; 256 257 switch (opcode) { 258 case Instruction::CMPL_FLOAT: 259 is_double = false; 260 default_result = -1; 261 break; 262 case Instruction::CMPG_FLOAT: 263 is_double = false; 264 default_result = 1; 265 break; 266 case Instruction::CMPL_DOUBLE: 267 is_double = true; 268 default_result = -1; 269 break; 270 case Instruction::CMPG_DOUBLE: 271 is_double = true; 272 default_result = 1; 273 break; 274 default: 275 LOG(FATAL) << "Unexpected opcode: " << opcode; 276 } 277 if (is_double) { 278 rl_src1 = LoadValueWide(rl_src1, kFPReg); 279 rl_src2 = LoadValueWide(rl_src2, kFPReg); 280 // In case result vreg is also a src vreg, break association to avoid useless copy by EvalLoc() 281 ClobberSReg(rl_dest.s_reg_low); 282 rl_result = EvalLoc(rl_dest, kCoreReg, true); 283 LoadConstant(rl_result.reg, default_result); 284 NewLIR2(FWIDE(kA64Fcmp2ff), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 285 } else { 286 rl_src1 = LoadValue(rl_src1, kFPReg); 287 rl_src2 = LoadValue(rl_src2, kFPReg); 288 // In case result vreg is also a srcvreg, break association to avoid useless copy by EvalLoc() 289 ClobberSReg(rl_dest.s_reg_low); 290 rl_result = EvalLoc(rl_dest, kCoreReg, true); 291 LoadConstant(rl_result.reg, default_result); 292 NewLIR2(kA64Fcmp2ff, rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 293 } 294 DCHECK(!rl_result.reg.IsFloat()); 295 296 // TODO(Arm64): should we rather do this? 297 // csinc wD, wzr, wzr, eq 298 // csneg wD, wD, wD, le 299 // (which requires 2 instructions rather than 3) 300 301 // Rd = if cond then Rd else -Rd. 302 NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(), 303 rl_result.reg.GetReg(), (default_result == 1) ? kArmCondPl : kArmCondLe); 304 NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rwzr, rl_result.reg.GetReg(), 305 kArmCondEq); 306 StoreValue(rl_dest, rl_result); 307} 308 309void Arm64Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) { 310 RegLocation rl_result; 311 rl_src = LoadValue(rl_src, kFPReg); 312 rl_result = EvalLoc(rl_dest, kFPReg, true); 313 NewLIR2(kA64Fneg2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 314 StoreValue(rl_dest, rl_result); 315} 316 317void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { 318 RegLocation rl_result; 319 rl_src = LoadValueWide(rl_src, kFPReg); 320 rl_result = EvalLoc(rl_dest, kFPReg, true); 321 NewLIR2(FWIDE(kA64Fneg2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); 322 StoreValueWide(rl_dest, rl_result); 323} 324 325static RegisterClass RegClassForAbsFP(RegLocation rl_src, RegLocation rl_dest) { 326 // If src is in a core reg or, unlikely, dest has been promoted to a core reg, use core reg. 327 if ((rl_src.location == kLocPhysReg && !rl_src.reg.IsFloat()) || 328 (rl_dest.location == kLocPhysReg && !rl_dest.reg.IsFloat())) { 329 return kCoreReg; 330 } 331 // If src is in an fp reg or dest has been promoted to an fp reg, use fp reg. 332 if (rl_src.location == kLocPhysReg || rl_dest.location == kLocPhysReg) { 333 return kFPReg; 334 } 335 // With both src and dest in the stack frame we have to perform load+abs+store. Whether this 336 // is faster using a core reg or fp reg depends on the particular CPU. For example, on A53 337 // it's faster using core reg while on A57 it's faster with fp reg, the difference being 338 // bigger on the A53. Without further investigation and testing we prefer core register. 339 // (If the result is subsequently used in another fp operation, the dalvik reg will probably 340 // get promoted and that should be handled by the cases above.) 341 return kCoreReg; 342} 343 344bool Arm64Mir2Lir::GenInlinedAbsFloat(CallInfo* info) { 345 if (info->result.location == kLocInvalid) { 346 return true; // Result is unused: inlining successful, no code generated. 347 } 348 RegLocation rl_dest = info->result; 349 RegLocation rl_src = UpdateLoc(info->args[0]); 350 RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest); 351 rl_src = LoadValue(rl_src, reg_class); 352 RegLocation rl_result = EvalLoc(rl_dest, reg_class, true); 353 if (reg_class == kFPReg) { 354 NewLIR2(kA64Fabs2ff, rl_result.reg.GetReg(), rl_src.reg.GetReg()); 355 } else { 356 NewLIR4(kA64Ubfm4rrdd, rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 30); 357 } 358 StoreValue(rl_dest, rl_result); 359 return true; 360} 361 362bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { 363 if (info->result.location == kLocInvalid) { 364 return true; // Result is unused: inlining successful, no code generated. 365 } 366 RegLocation rl_dest = info->result; 367 RegLocation rl_src = UpdateLocWide(info->args[0]); 368 RegisterClass reg_class = RegClassForAbsFP(rl_src, rl_dest); 369 rl_src = LoadValueWide(rl_src, reg_class); 370 RegLocation rl_result = EvalLoc(rl_dest, reg_class, true); 371 if (reg_class == kFPReg) { 372 NewLIR2(FWIDE(kA64Fabs2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); 373 } else { 374 NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62); 375 } 376 StoreValueWide(rl_dest, rl_result); 377 return true; 378} 379 380bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) { 381 RegLocation rl_src = info->args[0]; 382 RegLocation rl_dest = InlineTargetWide(info); // double place for result 383 rl_src = LoadValueWide(rl_src, kFPReg); 384 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 385 NewLIR2(FWIDE(kA64Fsqrt2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); 386 StoreValueWide(rl_dest, rl_result); 387 return true; 388} 389 390bool Arm64Mir2Lir::GenInlinedCeil(CallInfo* info) { 391 RegLocation rl_src = info->args[0]; 392 RegLocation rl_dest = InlineTargetWide(info); 393 rl_src = LoadValueWide(rl_src, kFPReg); 394 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 395 NewLIR2(FWIDE(kA64Frintp2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); 396 StoreValueWide(rl_dest, rl_result); 397 return true; 398} 399 400bool Arm64Mir2Lir::GenInlinedFloor(CallInfo* info) { 401 RegLocation rl_src = info->args[0]; 402 RegLocation rl_dest = InlineTargetWide(info); 403 rl_src = LoadValueWide(rl_src, kFPReg); 404 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 405 NewLIR2(FWIDE(kA64Frintm2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); 406 StoreValueWide(rl_dest, rl_result); 407 return true; 408} 409 410bool Arm64Mir2Lir::GenInlinedRint(CallInfo* info) { 411 RegLocation rl_src = info->args[0]; 412 RegLocation rl_dest = InlineTargetWide(info); 413 rl_src = LoadValueWide(rl_src, kFPReg); 414 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 415 NewLIR2(FWIDE(kA64Frintn2ff), rl_result.reg.GetReg(), rl_src.reg.GetReg()); 416 StoreValueWide(rl_dest, rl_result); 417 return true; 418} 419 420bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) { 421 int32_t encoded_imm = EncodeImmSingle(bit_cast<float, uint32_t>(0.5f)); 422 ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0); 423 RegLocation rl_src = info->args[0]; 424 RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info); 425 rl_src = (is_double) ? LoadValueWide(rl_src, kFPReg) : LoadValue(rl_src, kFPReg); 426 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); 427 RegStorage r_imm_point5 = (is_double) ? AllocTempDouble() : AllocTempSingle(); 428 RegStorage r_tmp = (is_double) ? AllocTempDouble() : AllocTempSingle(); 429 // 0.5f and 0.5d are encoded in the same way. 430 NewLIR2(kA64Fmov2fI | wide, r_imm_point5.GetReg(), encoded_imm); 431 NewLIR3(kA64Fadd3fff | wide, r_tmp.GetReg(), rl_src.reg.GetReg(), r_imm_point5.GetReg()); 432 NewLIR2((is_double) ? kA64Fcvtms2xS : kA64Fcvtms2ws, rl_result.reg.GetReg(), r_tmp.GetReg()); 433 (is_double) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result); 434 return true; 435} 436 437bool Arm64Mir2Lir::GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double) { 438 DCHECK_EQ(cu_->instruction_set, kArm64); 439 int op = (is_min) ? kA64Fmin3fff : kA64Fmax3fff; 440 ArmOpcode wide = (is_double) ? FWIDE(0) : FUNWIDE(0); 441 RegLocation rl_src1 = info->args[0]; 442 RegLocation rl_src2 = (is_double) ? info->args[2] : info->args[1]; 443 rl_src1 = (is_double) ? LoadValueWide(rl_src1, kFPReg) : LoadValue(rl_src1, kFPReg); 444 rl_src2 = (is_double) ? LoadValueWide(rl_src2, kFPReg) : LoadValue(rl_src2, kFPReg); 445 RegLocation rl_dest = (is_double) ? InlineTargetWide(info) : InlineTarget(info); 446 RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); 447 NewLIR3(op | wide, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), rl_src2.reg.GetReg()); 448 (is_double) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result); 449 return true; 450} 451 452} // namespace art 453