1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86CallingConv.h" 18#include "X86InstrBuilder.h" 19#include "X86InstrInfo.h" 20#include "X86MachineFunctionInfo.h" 21#include "X86RegisterInfo.h" 22#include "X86Subtarget.h" 23#include "X86TargetMachine.h" 24#include "llvm/Analysis/BranchProbabilityInfo.h" 25#include "llvm/CodeGen/Analysis.h" 26#include "llvm/CodeGen/FastISel.h" 27#include "llvm/CodeGen/FunctionLoweringInfo.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/IR/CallSite.h" 32#include "llvm/IR/CallingConv.h" 33#include "llvm/IR/DerivedTypes.h" 34#include "llvm/IR/GetElementPtrTypeIterator.h" 35#include "llvm/IR/GlobalAlias.h" 36#include "llvm/IR/GlobalVariable.h" 37#include "llvm/IR/Instructions.h" 38#include "llvm/IR/IntrinsicInst.h" 39#include "llvm/IR/Operator.h" 40#include "llvm/MC/MCAsmInfo.h" 41#include "llvm/MC/MCSymbol.h" 42#include "llvm/Support/ErrorHandling.h" 43#include "llvm/Target/TargetOptions.h" 44using namespace llvm; 45 46namespace { 47 48class X86FastISel final : public FastISel { 49 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 50 /// make the right decision when generating code for different targets. 51 const X86Subtarget *Subtarget; 52 53 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 54 /// floating point ops. 55 /// When SSE is available, use it for f32 operations. 56 /// When SSE2 is available, use it for f64 operations. 57 bool X86ScalarSSEf64; 58 bool X86ScalarSSEf32; 59 60public: 61 explicit X86FastISel(FunctionLoweringInfo &funcInfo, 62 const TargetLibraryInfo *libInfo) 63 : FastISel(funcInfo, libInfo) { 64 Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>(); 65 X86ScalarSSEf64 = Subtarget->hasSSE2(); 66 X86ScalarSSEf32 = Subtarget->hasSSE1(); 67 } 68 69 bool fastSelectInstruction(const Instruction *I) override; 70 71 /// \brief The specified machine instr operand is a vreg, and that 72 /// vreg is being provided by the specified load instruction. If possible, 73 /// try to fold the load as an operand to the instruction, returning true if 74 /// possible. 75 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 76 const LoadInst *LI) override; 77 78 bool fastLowerArguments() override; 79 bool fastLowerCall(CallLoweringInfo &CLI) override; 80 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 81 82#include "X86GenFastISel.inc" 83 84private: 85 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL); 86 87 bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO, 88 unsigned &ResultReg, unsigned Alignment = 1); 89 90 bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM, 91 MachineMemOperand *MMO = nullptr, bool Aligned = false); 92 bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 93 X86AddressMode &AM, 94 MachineMemOperand *MMO = nullptr, bool Aligned = false); 95 96 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 97 unsigned &ResultReg); 98 99 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 100 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 101 102 bool X86SelectLoad(const Instruction *I); 103 104 bool X86SelectStore(const Instruction *I); 105 106 bool X86SelectRet(const Instruction *I); 107 108 bool X86SelectCmp(const Instruction *I); 109 110 bool X86SelectZExt(const Instruction *I); 111 112 bool X86SelectBranch(const Instruction *I); 113 114 bool X86SelectShift(const Instruction *I); 115 116 bool X86SelectDivRem(const Instruction *I); 117 118 bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); 119 120 bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); 121 122 bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); 123 124 bool X86SelectSelect(const Instruction *I); 125 126 bool X86SelectTrunc(const Instruction *I); 127 128 bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc, 129 const TargetRegisterClass *RC); 130 131 bool X86SelectFPExt(const Instruction *I); 132 bool X86SelectFPTrunc(const Instruction *I); 133 bool X86SelectSIToFP(const Instruction *I); 134 135 const X86InstrInfo *getInstrInfo() const { 136 return Subtarget->getInstrInfo(); 137 } 138 const X86TargetMachine *getTargetMachine() const { 139 return static_cast<const X86TargetMachine *>(&TM); 140 } 141 142 bool handleConstantAddresses(const Value *V, X86AddressMode &AM); 143 144 unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT); 145 unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT); 146 unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT); 147 unsigned fastMaterializeConstant(const Constant *C) override; 148 149 unsigned fastMaterializeAlloca(const AllocaInst *C) override; 150 151 unsigned fastMaterializeFloatZero(const ConstantFP *CF) override; 152 153 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 154 /// computed in an SSE register, not on the X87 floating point stack. 155 bool isScalarFPTypeInSSEReg(EVT VT) const { 156 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 157 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 158 } 159 160 bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false); 161 162 bool IsMemcpySmall(uint64_t Len); 163 164 bool TryEmitSmallMemcpy(X86AddressMode DestAM, 165 X86AddressMode SrcAM, uint64_t Len); 166 167 bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 168 const Value *Cond); 169 170 const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB, 171 X86AddressMode &AM); 172}; 173 174} // end anonymous namespace. 175 176static std::pair<X86::CondCode, bool> 177getX86ConditionCode(CmpInst::Predicate Predicate) { 178 X86::CondCode CC = X86::COND_INVALID; 179 bool NeedSwap = false; 180 switch (Predicate) { 181 default: break; 182 // Floating-point Predicates 183 case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; 184 case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through 185 case CmpInst::FCMP_OGT: CC = X86::COND_A; break; 186 case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through 187 case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; 188 case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through 189 case CmpInst::FCMP_ULT: CC = X86::COND_B; break; 190 case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through 191 case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; 192 case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; 193 case CmpInst::FCMP_UNO: CC = X86::COND_P; break; 194 case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; 195 case CmpInst::FCMP_OEQ: // fall-through 196 case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; 197 198 // Integer Predicates 199 case CmpInst::ICMP_EQ: CC = X86::COND_E; break; 200 case CmpInst::ICMP_NE: CC = X86::COND_NE; break; 201 case CmpInst::ICMP_UGT: CC = X86::COND_A; break; 202 case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; 203 case CmpInst::ICMP_ULT: CC = X86::COND_B; break; 204 case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; 205 case CmpInst::ICMP_SGT: CC = X86::COND_G; break; 206 case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; 207 case CmpInst::ICMP_SLT: CC = X86::COND_L; break; 208 case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; 209 } 210 211 return std::make_pair(CC, NeedSwap); 212} 213 214static std::pair<unsigned, bool> 215getX86SSEConditionCode(CmpInst::Predicate Predicate) { 216 unsigned CC; 217 bool NeedSwap = false; 218 219 // SSE Condition code mapping: 220 // 0 - EQ 221 // 1 - LT 222 // 2 - LE 223 // 3 - UNORD 224 // 4 - NEQ 225 // 5 - NLT 226 // 6 - NLE 227 // 7 - ORD 228 switch (Predicate) { 229 default: llvm_unreachable("Unexpected predicate"); 230 case CmpInst::FCMP_OEQ: CC = 0; break; 231 case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through 232 case CmpInst::FCMP_OLT: CC = 1; break; 233 case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through 234 case CmpInst::FCMP_OLE: CC = 2; break; 235 case CmpInst::FCMP_UNO: CC = 3; break; 236 case CmpInst::FCMP_UNE: CC = 4; break; 237 case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through 238 case CmpInst::FCMP_UGE: CC = 5; break; 239 case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through 240 case CmpInst::FCMP_UGT: CC = 6; break; 241 case CmpInst::FCMP_ORD: CC = 7; break; 242 case CmpInst::FCMP_UEQ: 243 case CmpInst::FCMP_ONE: CC = 8; break; 244 } 245 246 return std::make_pair(CC, NeedSwap); 247} 248 249/// \brief Adds a complex addressing mode to the given machine instr builder. 250/// Note, this will constrain the index register. If its not possible to 251/// constrain the given index register, then a new one will be created. The 252/// IndexReg field of the addressing mode will be updated to match in this case. 253const MachineInstrBuilder & 254X86FastISel::addFullAddress(const MachineInstrBuilder &MIB, 255 X86AddressMode &AM) { 256 // First constrain the index register. It needs to be a GR64_NOSP. 257 AM.IndexReg = constrainOperandRegClass(MIB->getDesc(), AM.IndexReg, 258 MIB->getNumOperands() + 259 X86::AddrIndexReg); 260 return ::addFullAddress(MIB, AM); 261} 262 263/// \brief Check if it is possible to fold the condition from the XALU intrinsic 264/// into the user. The condition code will only be updated on success. 265bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, 266 const Value *Cond) { 267 if (!isa<ExtractValueInst>(Cond)) 268 return false; 269 270 const auto *EV = cast<ExtractValueInst>(Cond); 271 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 272 return false; 273 274 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 275 MVT RetVT; 276 const Function *Callee = II->getCalledFunction(); 277 Type *RetTy = 278 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 279 if (!isTypeLegal(RetTy, RetVT)) 280 return false; 281 282 if (RetVT != MVT::i32 && RetVT != MVT::i64) 283 return false; 284 285 X86::CondCode TmpCC; 286 switch (II->getIntrinsicID()) { 287 default: return false; 288 case Intrinsic::sadd_with_overflow: 289 case Intrinsic::ssub_with_overflow: 290 case Intrinsic::smul_with_overflow: 291 case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; 292 case Intrinsic::uadd_with_overflow: 293 case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; 294 } 295 296 // Check if both instructions are in the same basic block. 297 if (II->getParent() != I->getParent()) 298 return false; 299 300 // Make sure nothing is in the way 301 BasicBlock::const_iterator Start(I); 302 BasicBlock::const_iterator End(II); 303 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 304 // We only expect extractvalue instructions between the intrinsic and the 305 // instruction to be selected. 306 if (!isa<ExtractValueInst>(Itr)) 307 return false; 308 309 // Check that the extractvalue operand comes from the intrinsic. 310 const auto *EVI = cast<ExtractValueInst>(Itr); 311 if (EVI->getAggregateOperand() != II) 312 return false; 313 } 314 315 CC = TmpCC; 316 return true; 317} 318 319bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { 320 EVT evt = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); 321 if (evt == MVT::Other || !evt.isSimple()) 322 // Unhandled type. Halt "fast" selection and bail. 323 return false; 324 325 VT = evt.getSimpleVT(); 326 // For now, require SSE/SSE2 for performing floating-point operations, 327 // since x87 requires additional work. 328 if (VT == MVT::f64 && !X86ScalarSSEf64) 329 return false; 330 if (VT == MVT::f32 && !X86ScalarSSEf32) 331 return false; 332 // Similarly, no f80 support yet. 333 if (VT == MVT::f80) 334 return false; 335 // We only handle legal types. For example, on x86-32 the instruction 336 // selector contains all of the 64-bit instructions from x86-64, 337 // under the assumption that i64 won't be used if the target doesn't 338 // support it. 339 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 340} 341 342#include "X86GenCallingConv.inc" 343 344/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 345/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 346/// Return true and the result register by reference if it is possible. 347bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, 348 MachineMemOperand *MMO, unsigned &ResultReg, 349 unsigned Alignment) { 350 // Get opcode and regclass of the output for the given load instruction. 351 unsigned Opc = 0; 352 const TargetRegisterClass *RC = nullptr; 353 switch (VT.getSimpleVT().SimpleTy) { 354 default: return false; 355 case MVT::i1: 356 case MVT::i8: 357 Opc = X86::MOV8rm; 358 RC = &X86::GR8RegClass; 359 break; 360 case MVT::i16: 361 Opc = X86::MOV16rm; 362 RC = &X86::GR16RegClass; 363 break; 364 case MVT::i32: 365 Opc = X86::MOV32rm; 366 RC = &X86::GR32RegClass; 367 break; 368 case MVT::i64: 369 // Must be in x86-64 mode. 370 Opc = X86::MOV64rm; 371 RC = &X86::GR64RegClass; 372 break; 373 case MVT::f32: 374 if (X86ScalarSSEf32) { 375 Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; 376 RC = &X86::FR32RegClass; 377 } else { 378 Opc = X86::LD_Fp32m; 379 RC = &X86::RFP32RegClass; 380 } 381 break; 382 case MVT::f64: 383 if (X86ScalarSSEf64) { 384 Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; 385 RC = &X86::FR64RegClass; 386 } else { 387 Opc = X86::LD_Fp64m; 388 RC = &X86::RFP64RegClass; 389 } 390 break; 391 case MVT::f80: 392 // No f80 support yet. 393 return false; 394 case MVT::v4f32: 395 if (Alignment >= 16) 396 Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm; 397 else 398 Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm; 399 RC = &X86::VR128RegClass; 400 break; 401 case MVT::v2f64: 402 if (Alignment >= 16) 403 Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm; 404 else 405 Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm; 406 RC = &X86::VR128RegClass; 407 break; 408 case MVT::v4i32: 409 case MVT::v2i64: 410 case MVT::v8i16: 411 case MVT::v16i8: 412 if (Alignment >= 16) 413 Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm; 414 else 415 Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm; 416 RC = &X86::VR128RegClass; 417 break; 418 } 419 420 ResultReg = createResultReg(RC); 421 MachineInstrBuilder MIB = 422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 423 addFullAddress(MIB, AM); 424 if (MMO) 425 MIB->addMemOperand(*FuncInfo.MF, MMO); 426 return true; 427} 428 429/// X86FastEmitStore - Emit a machine instruction to store a value Val of 430/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 431/// and a displacement offset, or a GlobalAddress, 432/// i.e. V. Return true if it is possible. 433bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, 434 X86AddressMode &AM, 435 MachineMemOperand *MMO, bool Aligned) { 436 bool HasSSE2 = Subtarget->hasSSE2(); 437 bool HasSSE4A = Subtarget->hasSSE4A(); 438 bool HasAVX = Subtarget->hasAVX(); 439 bool IsNonTemporal = MMO && MMO->isNonTemporal(); 440 441 // Get opcode and regclass of the output for the given store instruction. 442 unsigned Opc = 0; 443 switch (VT.getSimpleVT().SimpleTy) { 444 case MVT::f80: // No f80 support yet. 445 default: return false; 446 case MVT::i1: { 447 // Mask out all but lowest bit. 448 unsigned AndResult = createResultReg(&X86::GR8RegClass); 449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 450 TII.get(X86::AND8ri), AndResult) 451 .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); 452 ValReg = AndResult; 453 } 454 // FALLTHROUGH, handling i1 as i8. 455 case MVT::i8: Opc = X86::MOV8mr; break; 456 case MVT::i16: Opc = X86::MOV16mr; break; 457 case MVT::i32: 458 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr; 459 break; 460 case MVT::i64: 461 // Must be in x86-64 mode. 462 Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr; 463 break; 464 case MVT::f32: 465 if (X86ScalarSSEf32) { 466 if (IsNonTemporal && HasSSE4A) 467 Opc = X86::MOVNTSS; 468 else 469 Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr; 470 } else 471 Opc = X86::ST_Fp32m; 472 break; 473 case MVT::f64: 474 if (X86ScalarSSEf32) { 475 if (IsNonTemporal && HasSSE4A) 476 Opc = X86::MOVNTSD; 477 else 478 Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr; 479 } else 480 Opc = X86::ST_Fp64m; 481 break; 482 case MVT::v4f32: 483 if (Aligned) { 484 if (IsNonTemporal) 485 Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr; 486 else 487 Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr; 488 } else 489 Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr; 490 break; 491 case MVT::v2f64: 492 if (Aligned) { 493 if (IsNonTemporal) 494 Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr; 495 else 496 Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr; 497 } else 498 Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr; 499 break; 500 case MVT::v4i32: 501 case MVT::v2i64: 502 case MVT::v8i16: 503 case MVT::v16i8: 504 if (Aligned) { 505 if (IsNonTemporal) 506 Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr; 507 else 508 Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr; 509 } else 510 Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; 511 break; 512 } 513 514 MachineInstrBuilder MIB = 515 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); 516 addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); 517 if (MMO) 518 MIB->addMemOperand(*FuncInfo.MF, MMO); 519 520 return true; 521} 522 523bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 524 X86AddressMode &AM, 525 MachineMemOperand *MMO, bool Aligned) { 526 // Handle 'null' like i32/i64 0. 527 if (isa<ConstantPointerNull>(Val)) 528 Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); 529 530 // If this is a store of a simple constant, fold the constant into the store. 531 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 532 unsigned Opc = 0; 533 bool Signed = true; 534 switch (VT.getSimpleVT().SimpleTy) { 535 default: break; 536 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 537 case MVT::i8: Opc = X86::MOV8mi; break; 538 case MVT::i16: Opc = X86::MOV16mi; break; 539 case MVT::i32: Opc = X86::MOV32mi; break; 540 case MVT::i64: 541 // Must be a 32-bit sign extended value. 542 if (isInt<32>(CI->getSExtValue())) 543 Opc = X86::MOV64mi32; 544 break; 545 } 546 547 if (Opc) { 548 MachineInstrBuilder MIB = 549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); 550 addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() 551 : CI->getZExtValue()); 552 if (MMO) 553 MIB->addMemOperand(*FuncInfo.MF, MMO); 554 return true; 555 } 556 } 557 558 unsigned ValReg = getRegForValue(Val); 559 if (ValReg == 0) 560 return false; 561 562 bool ValKill = hasTrivialKill(Val); 563 return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); 564} 565 566/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 567/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 568/// ISD::SIGN_EXTEND). 569bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 570 unsigned Src, EVT SrcVT, 571 unsigned &ResultReg) { 572 unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 573 Src, /*TODO: Kill=*/false); 574 if (RR == 0) 575 return false; 576 577 ResultReg = RR; 578 return true; 579} 580 581bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { 582 // Handle constant address. 583 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 584 // Can't handle alternate code models yet. 585 if (TM.getCodeModel() != CodeModel::Small) 586 return false; 587 588 // Can't handle TLS yet. 589 if (GV->isThreadLocal()) 590 return false; 591 592 // RIP-relative addresses can't have additional register operands, so if 593 // we've already folded stuff into the addressing mode, just force the 594 // global value into its own register, which we can use as the basereg. 595 if (!Subtarget->isPICStyleRIPRel() || 596 (AM.Base.Reg == 0 && AM.IndexReg == 0)) { 597 // Okay, we've committed to selecting this global. Set up the address. 598 AM.GV = GV; 599 600 // Allow the subtarget to classify the global. 601 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 602 603 // If this reference is relative to the pic base, set it now. 604 if (isGlobalRelativeToPICBase(GVFlags)) { 605 // FIXME: How do we know Base.Reg is free?? 606 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 607 } 608 609 // Unless the ABI requires an extra load, return a direct reference to 610 // the global. 611 if (!isGlobalStubReference(GVFlags)) { 612 if (Subtarget->isPICStyleRIPRel()) { 613 // Use rip-relative addressing if we can. Above we verified that the 614 // base and index registers are unused. 615 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 616 AM.Base.Reg = X86::RIP; 617 } 618 AM.GVOpFlags = GVFlags; 619 return true; 620 } 621 622 // Ok, we need to do a load from a stub. If we've already loaded from 623 // this stub, reuse the loaded pointer, otherwise emit the load now. 624 DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V); 625 unsigned LoadReg; 626 if (I != LocalValueMap.end() && I->second != 0) { 627 LoadReg = I->second; 628 } else { 629 // Issue load from stub. 630 unsigned Opc = 0; 631 const TargetRegisterClass *RC = nullptr; 632 X86AddressMode StubAM; 633 StubAM.Base.Reg = AM.Base.Reg; 634 StubAM.GV = GV; 635 StubAM.GVOpFlags = GVFlags; 636 637 // Prepare for inserting code in the local-value area. 638 SavePoint SaveInsertPt = enterLocalValueArea(); 639 640 if (TLI.getPointerTy(DL) == MVT::i64) { 641 Opc = X86::MOV64rm; 642 RC = &X86::GR64RegClass; 643 644 if (Subtarget->isPICStyleRIPRel()) 645 StubAM.Base.Reg = X86::RIP; 646 } else { 647 Opc = X86::MOV32rm; 648 RC = &X86::GR32RegClass; 649 } 650 651 LoadReg = createResultReg(RC); 652 MachineInstrBuilder LoadMI = 653 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg); 654 addFullAddress(LoadMI, StubAM); 655 656 // Ok, back to normal mode. 657 leaveLocalValueArea(SaveInsertPt); 658 659 // Prevent loading GV stub multiple times in same MBB. 660 LocalValueMap[V] = LoadReg; 661 } 662 663 // Now construct the final address. Note that the Disp, Scale, 664 // and Index values may already be set here. 665 AM.Base.Reg = LoadReg; 666 AM.GV = nullptr; 667 return true; 668 } 669 } 670 671 // If all else fails, try to materialize the value in a register. 672 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 673 if (AM.Base.Reg == 0) { 674 AM.Base.Reg = getRegForValue(V); 675 return AM.Base.Reg != 0; 676 } 677 if (AM.IndexReg == 0) { 678 assert(AM.Scale == 1 && "Scale with no index!"); 679 AM.IndexReg = getRegForValue(V); 680 return AM.IndexReg != 0; 681 } 682 } 683 684 return false; 685} 686 687/// X86SelectAddress - Attempt to fill in an address from the given value. 688/// 689bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 690 SmallVector<const Value *, 32> GEPs; 691redo_gep: 692 const User *U = nullptr; 693 unsigned Opcode = Instruction::UserOp1; 694 if (const Instruction *I = dyn_cast<Instruction>(V)) { 695 // Don't walk into other basic blocks; it's possible we haven't 696 // visited them yet, so the instructions may not yet be assigned 697 // virtual registers. 698 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) || 699 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 700 Opcode = I->getOpcode(); 701 U = I; 702 } 703 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 704 Opcode = C->getOpcode(); 705 U = C; 706 } 707 708 if (PointerType *Ty = dyn_cast<PointerType>(V->getType())) 709 if (Ty->getAddressSpace() > 255) 710 // Fast instruction selection doesn't support the special 711 // address spaces. 712 return false; 713 714 switch (Opcode) { 715 default: break; 716 case Instruction::BitCast: 717 // Look past bitcasts. 718 return X86SelectAddress(U->getOperand(0), AM); 719 720 case Instruction::IntToPtr: 721 // Look past no-op inttoptrs. 722 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 723 TLI.getPointerTy(DL)) 724 return X86SelectAddress(U->getOperand(0), AM); 725 break; 726 727 case Instruction::PtrToInt: 728 // Look past no-op ptrtoints. 729 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 730 return X86SelectAddress(U->getOperand(0), AM); 731 break; 732 733 case Instruction::Alloca: { 734 // Do static allocas. 735 const AllocaInst *A = cast<AllocaInst>(V); 736 DenseMap<const AllocaInst *, int>::iterator SI = 737 FuncInfo.StaticAllocaMap.find(A); 738 if (SI != FuncInfo.StaticAllocaMap.end()) { 739 AM.BaseType = X86AddressMode::FrameIndexBase; 740 AM.Base.FrameIndex = SI->second; 741 return true; 742 } 743 break; 744 } 745 746 case Instruction::Add: { 747 // Adds of constants are common and easy enough. 748 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 749 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 750 // They have to fit in the 32-bit signed displacement field though. 751 if (isInt<32>(Disp)) { 752 AM.Disp = (uint32_t)Disp; 753 return X86SelectAddress(U->getOperand(0), AM); 754 } 755 } 756 break; 757 } 758 759 case Instruction::GetElementPtr: { 760 X86AddressMode SavedAM = AM; 761 762 // Pattern-match simple GEPs. 763 uint64_t Disp = (int32_t)AM.Disp; 764 unsigned IndexReg = AM.IndexReg; 765 unsigned Scale = AM.Scale; 766 gep_type_iterator GTI = gep_type_begin(U); 767 // Iterate through the indices, folding what we can. Constants can be 768 // folded, and one dynamic index can be handled, if the scale is supported. 769 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 770 i != e; ++i, ++GTI) { 771 const Value *Op = *i; 772 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 773 const StructLayout *SL = DL.getStructLayout(STy); 774 Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue()); 775 continue; 776 } 777 778 // A array/variable index is always of the form i*S where S is the 779 // constant scale size. See if we can push the scale into immediates. 780 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 781 for (;;) { 782 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 783 // Constant-offset addressing. 784 Disp += CI->getSExtValue() * S; 785 break; 786 } 787 if (canFoldAddIntoGEP(U, Op)) { 788 // A compatible add with a constant operand. Fold the constant. 789 ConstantInt *CI = 790 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 791 Disp += CI->getSExtValue() * S; 792 // Iterate on the other operand. 793 Op = cast<AddOperator>(Op)->getOperand(0); 794 continue; 795 } 796 if (IndexReg == 0 && 797 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 798 (S == 1 || S == 2 || S == 4 || S == 8)) { 799 // Scaled-index addressing. 800 Scale = S; 801 IndexReg = getRegForGEPIndex(Op).first; 802 if (IndexReg == 0) 803 return false; 804 break; 805 } 806 // Unsupported. 807 goto unsupported_gep; 808 } 809 } 810 811 // Check for displacement overflow. 812 if (!isInt<32>(Disp)) 813 break; 814 815 AM.IndexReg = IndexReg; 816 AM.Scale = Scale; 817 AM.Disp = (uint32_t)Disp; 818 GEPs.push_back(V); 819 820 if (const GetElementPtrInst *GEP = 821 dyn_cast<GetElementPtrInst>(U->getOperand(0))) { 822 // Ok, the GEP indices were covered by constant-offset and scaled-index 823 // addressing. Update the address state and move on to examining the base. 824 V = GEP; 825 goto redo_gep; 826 } else if (X86SelectAddress(U->getOperand(0), AM)) { 827 return true; 828 } 829 830 // If we couldn't merge the gep value into this addr mode, revert back to 831 // our address and just match the value instead of completely failing. 832 AM = SavedAM; 833 834 for (SmallVectorImpl<const Value *>::reverse_iterator 835 I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I) 836 if (handleConstantAddresses(*I, AM)) 837 return true; 838 839 return false; 840 unsupported_gep: 841 // Ok, the GEP indices weren't all covered. 842 break; 843 } 844 } 845 846 return handleConstantAddresses(V, AM); 847} 848 849/// X86SelectCallAddress - Attempt to fill in an address from the given value. 850/// 851bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 852 const User *U = nullptr; 853 unsigned Opcode = Instruction::UserOp1; 854 const Instruction *I = dyn_cast<Instruction>(V); 855 // Record if the value is defined in the same basic block. 856 // 857 // This information is crucial to know whether or not folding an 858 // operand is valid. 859 // Indeed, FastISel generates or reuses a virtual register for all 860 // operands of all instructions it selects. Obviously, the definition and 861 // its uses must use the same virtual register otherwise the produced 862 // code is incorrect. 863 // Before instruction selection, FunctionLoweringInfo::set sets the virtual 864 // registers for values that are alive across basic blocks. This ensures 865 // that the values are consistently set between across basic block, even 866 // if different instruction selection mechanisms are used (e.g., a mix of 867 // SDISel and FastISel). 868 // For values local to a basic block, the instruction selection process 869 // generates these virtual registers with whatever method is appropriate 870 // for its needs. In particular, FastISel and SDISel do not share the way 871 // local virtual registers are set. 872 // Therefore, this is impossible (or at least unsafe) to share values 873 // between basic blocks unless they use the same instruction selection 874 // method, which is not guarantee for X86. 875 // Moreover, things like hasOneUse could not be used accurately, if we 876 // allow to reference values across basic blocks whereas they are not 877 // alive across basic blocks initially. 878 bool InMBB = true; 879 if (I) { 880 Opcode = I->getOpcode(); 881 U = I; 882 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 883 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 884 Opcode = C->getOpcode(); 885 U = C; 886 } 887 888 switch (Opcode) { 889 default: break; 890 case Instruction::BitCast: 891 // Look past bitcasts if its operand is in the same BB. 892 if (InMBB) 893 return X86SelectCallAddress(U->getOperand(0), AM); 894 break; 895 896 case Instruction::IntToPtr: 897 // Look past no-op inttoptrs if its operand is in the same BB. 898 if (InMBB && 899 TLI.getValueType(DL, U->getOperand(0)->getType()) == 900 TLI.getPointerTy(DL)) 901 return X86SelectCallAddress(U->getOperand(0), AM); 902 break; 903 904 case Instruction::PtrToInt: 905 // Look past no-op ptrtoints if its operand is in the same BB. 906 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 907 return X86SelectCallAddress(U->getOperand(0), AM); 908 break; 909 } 910 911 // Handle constant address. 912 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 913 // Can't handle alternate code models yet. 914 if (TM.getCodeModel() != CodeModel::Small) 915 return false; 916 917 // RIP-relative addresses can't have additional register operands. 918 if (Subtarget->isPICStyleRIPRel() && 919 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 920 return false; 921 922 // Can't handle DLL Import. 923 if (GV->hasDLLImportStorageClass()) 924 return false; 925 926 // Can't handle TLS. 927 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 928 if (GVar->isThreadLocal()) 929 return false; 930 931 // Okay, we've committed to selecting this global. Set up the basic address. 932 AM.GV = GV; 933 934 // No ABI requires an extra load for anything other than DLLImport, which 935 // we rejected above. Return a direct reference to the global. 936 if (Subtarget->isPICStyleRIPRel()) { 937 // Use rip-relative addressing if we can. Above we verified that the 938 // base and index registers are unused. 939 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 940 AM.Base.Reg = X86::RIP; 941 } else if (Subtarget->isPICStyleStubPIC()) { 942 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 943 } else if (Subtarget->isPICStyleGOT()) { 944 AM.GVOpFlags = X86II::MO_GOTOFF; 945 } 946 947 return true; 948 } 949 950 // If all else fails, try to materialize the value in a register. 951 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 952 if (AM.Base.Reg == 0) { 953 AM.Base.Reg = getRegForValue(V); 954 return AM.Base.Reg != 0; 955 } 956 if (AM.IndexReg == 0) { 957 assert(AM.Scale == 1 && "Scale with no index!"); 958 AM.IndexReg = getRegForValue(V); 959 return AM.IndexReg != 0; 960 } 961 } 962 963 return false; 964} 965 966 967/// X86SelectStore - Select and emit code to implement store instructions. 968bool X86FastISel::X86SelectStore(const Instruction *I) { 969 // Atomic stores need special handling. 970 const StoreInst *S = cast<StoreInst>(I); 971 972 if (S->isAtomic()) 973 return false; 974 975 const Value *Val = S->getValueOperand(); 976 const Value *Ptr = S->getPointerOperand(); 977 978 MVT VT; 979 if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) 980 return false; 981 982 unsigned Alignment = S->getAlignment(); 983 unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); 984 if (Alignment == 0) // Ensure that codegen never sees alignment 0 985 Alignment = ABIAlignment; 986 bool Aligned = Alignment >= ABIAlignment; 987 988 X86AddressMode AM; 989 if (!X86SelectAddress(Ptr, AM)) 990 return false; 991 992 return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); 993} 994 995/// X86SelectRet - Select and emit code to implement ret instructions. 996bool X86FastISel::X86SelectRet(const Instruction *I) { 997 const ReturnInst *Ret = cast<ReturnInst>(I); 998 const Function &F = *I->getParent()->getParent(); 999 const X86MachineFunctionInfo *X86MFInfo = 1000 FuncInfo.MF->getInfo<X86MachineFunctionInfo>(); 1001 1002 if (!FuncInfo.CanLowerReturn) 1003 return false; 1004 1005 CallingConv::ID CC = F.getCallingConv(); 1006 if (CC != CallingConv::C && 1007 CC != CallingConv::Fast && 1008 CC != CallingConv::X86_FastCall && 1009 CC != CallingConv::X86_64_SysV) 1010 return false; 1011 1012 if (Subtarget->isCallingConvWin64(CC)) 1013 return false; 1014 1015 // Don't handle popping bytes on return for now. 1016 if (X86MFInfo->getBytesToPopOnReturn() != 0) 1017 return false; 1018 1019 // fastcc with -tailcallopt is intended to provide a guaranteed 1020 // tail call optimization. Fastisel doesn't know how to do that. 1021 if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) 1022 return false; 1023 1024 // Let SDISel handle vararg functions. 1025 if (F.isVarArg()) 1026 return false; 1027 1028 // Build a list of return value registers. 1029 SmallVector<unsigned, 4> RetRegs; 1030 1031 if (Ret->getNumOperands() > 0) { 1032 SmallVector<ISD::OutputArg, 4> Outs; 1033 GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 1034 1035 // Analyze operands of the call, assigning locations to each operand. 1036 SmallVector<CCValAssign, 16> ValLocs; 1037 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 1038 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 1039 1040 const Value *RV = Ret->getOperand(0); 1041 unsigned Reg = getRegForValue(RV); 1042 if (Reg == 0) 1043 return false; 1044 1045 // Only handle a single return value for now. 1046 if (ValLocs.size() != 1) 1047 return false; 1048 1049 CCValAssign &VA = ValLocs[0]; 1050 1051 // Don't bother handling odd stuff for now. 1052 if (VA.getLocInfo() != CCValAssign::Full) 1053 return false; 1054 // Only handle register returns for now. 1055 if (!VA.isRegLoc()) 1056 return false; 1057 1058 // The calling-convention tables for x87 returns don't tell 1059 // the whole story. 1060 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) 1061 return false; 1062 1063 unsigned SrcReg = Reg + VA.getValNo(); 1064 EVT SrcVT = TLI.getValueType(DL, RV->getType()); 1065 EVT DstVT = VA.getValVT(); 1066 // Special handling for extended integers. 1067 if (SrcVT != DstVT) { 1068 if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16) 1069 return false; 1070 1071 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 1072 return false; 1073 1074 assert(DstVT == MVT::i32 && "X86 should always ext to i32"); 1075 1076 if (SrcVT == MVT::i1) { 1077 if (Outs[0].Flags.isSExt()) 1078 return false; 1079 SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false); 1080 SrcVT = MVT::i8; 1081 } 1082 unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND : 1083 ISD::SIGN_EXTEND; 1084 SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op, 1085 SrcReg, /*TODO: Kill=*/false); 1086 } 1087 1088 // Make the copy. 1089 unsigned DstReg = VA.getLocReg(); 1090 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); 1091 // Avoid a cross-class copy. This is very unlikely. 1092 if (!SrcRC->contains(DstReg)) 1093 return false; 1094 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1095 TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg); 1096 1097 // Add register to return instruction. 1098 RetRegs.push_back(VA.getLocReg()); 1099 } 1100 1101 // The x86-64 ABI for returning structs by value requires that we copy 1102 // the sret argument into %rax for the return. We saved the argument into 1103 // a virtual register in the entry block, so now we copy the value out 1104 // and into %rax. We also do the same with %eax for Win32. 1105 if (F.hasStructRetAttr() && 1106 (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { 1107 unsigned Reg = X86MFInfo->getSRetReturnReg(); 1108 assert(Reg && 1109 "SRetReturnReg should have been set in LowerFormalArguments()!"); 1110 unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; 1111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1112 TII.get(TargetOpcode::COPY), RetReg).addReg(Reg); 1113 RetRegs.push_back(RetReg); 1114 } 1115 1116 // Now emit the RET. 1117 MachineInstrBuilder MIB = 1118 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1119 TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL)); 1120 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) 1121 MIB.addReg(RetRegs[i], RegState::Implicit); 1122 return true; 1123} 1124 1125/// X86SelectLoad - Select and emit code to implement load instructions. 1126/// 1127bool X86FastISel::X86SelectLoad(const Instruction *I) { 1128 const LoadInst *LI = cast<LoadInst>(I); 1129 1130 // Atomic loads need special handling. 1131 if (LI->isAtomic()) 1132 return false; 1133 1134 MVT VT; 1135 if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) 1136 return false; 1137 1138 const Value *Ptr = LI->getPointerOperand(); 1139 1140 X86AddressMode AM; 1141 if (!X86SelectAddress(Ptr, AM)) 1142 return false; 1143 1144 unsigned Alignment = LI->getAlignment(); 1145 unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType()); 1146 if (Alignment == 0) // Ensure that codegen never sees alignment 0 1147 Alignment = ABIAlignment; 1148 1149 unsigned ResultReg = 0; 1150 if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg, 1151 Alignment)) 1152 return false; 1153 1154 updateValueMap(I, ResultReg); 1155 return true; 1156} 1157 1158static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { 1159 bool HasAVX = Subtarget->hasAVX(); 1160 bool X86ScalarSSEf32 = Subtarget->hasSSE1(); 1161 bool X86ScalarSSEf64 = Subtarget->hasSSE2(); 1162 1163 switch (VT.getSimpleVT().SimpleTy) { 1164 default: return 0; 1165 case MVT::i8: return X86::CMP8rr; 1166 case MVT::i16: return X86::CMP16rr; 1167 case MVT::i32: return X86::CMP32rr; 1168 case MVT::i64: return X86::CMP64rr; 1169 case MVT::f32: 1170 return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0; 1171 case MVT::f64: 1172 return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0; 1173 } 1174} 1175 1176/// If we have a comparison with RHS as the RHS of the comparison, return an 1177/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0. 1178static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 1179 int64_t Val = RHSC->getSExtValue(); 1180 switch (VT.getSimpleVT().SimpleTy) { 1181 // Otherwise, we can't fold the immediate into this comparison. 1182 default: 1183 return 0; 1184 case MVT::i8: 1185 return X86::CMP8ri; 1186 case MVT::i16: 1187 if (isInt<8>(Val)) 1188 return X86::CMP16ri8; 1189 return X86::CMP16ri; 1190 case MVT::i32: 1191 if (isInt<8>(Val)) 1192 return X86::CMP32ri8; 1193 return X86::CMP32ri; 1194 case MVT::i64: 1195 if (isInt<8>(Val)) 1196 return X86::CMP64ri8; 1197 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 1198 // field. 1199 if (isInt<32>(Val)) 1200 return X86::CMP64ri32; 1201 return 0; 1202 } 1203} 1204 1205bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 1206 EVT VT, DebugLoc CurDbgLoc) { 1207 unsigned Op0Reg = getRegForValue(Op0); 1208 if (Op0Reg == 0) return false; 1209 1210 // Handle 'null' like i32/i64 0. 1211 if (isa<ConstantPointerNull>(Op1)) 1212 Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext())); 1213 1214 // We have two options: compare with register or immediate. If the RHS of 1215 // the compare is an immediate that we can fold into this compare, use 1216 // CMPri, otherwise use CMPrr. 1217 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 1218 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 1219 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc)) 1220 .addReg(Op0Reg) 1221 .addImm(Op1C->getSExtValue()); 1222 return true; 1223 } 1224 } 1225 1226 unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); 1227 if (CompareOpc == 0) return false; 1228 1229 unsigned Op1Reg = getRegForValue(Op1); 1230 if (Op1Reg == 0) return false; 1231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc)) 1232 .addReg(Op0Reg) 1233 .addReg(Op1Reg); 1234 1235 return true; 1236} 1237 1238bool X86FastISel::X86SelectCmp(const Instruction *I) { 1239 const CmpInst *CI = cast<CmpInst>(I); 1240 1241 MVT VT; 1242 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 1243 return false; 1244 1245 // Try to optimize or fold the cmp. 1246 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1247 unsigned ResultReg = 0; 1248 switch (Predicate) { 1249 default: break; 1250 case CmpInst::FCMP_FALSE: { 1251 ResultReg = createResultReg(&X86::GR32RegClass); 1252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), 1253 ResultReg); 1254 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, 1255 X86::sub_8bit); 1256 if (!ResultReg) 1257 return false; 1258 break; 1259 } 1260 case CmpInst::FCMP_TRUE: { 1261 ResultReg = createResultReg(&X86::GR8RegClass); 1262 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), 1263 ResultReg).addImm(1); 1264 break; 1265 } 1266 } 1267 1268 if (ResultReg) { 1269 updateValueMap(I, ResultReg); 1270 return true; 1271 } 1272 1273 const Value *LHS = CI->getOperand(0); 1274 const Value *RHS = CI->getOperand(1); 1275 1276 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 1277 // We don't have to materialize a zero constant for this case and can just use 1278 // %x again on the RHS. 1279 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 1280 const auto *RHSC = dyn_cast<ConstantFP>(RHS); 1281 if (RHSC && RHSC->isNullValue()) 1282 RHS = LHS; 1283 } 1284 1285 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1286 static unsigned SETFOpcTable[2][3] = { 1287 { X86::SETEr, X86::SETNPr, X86::AND8rr }, 1288 { X86::SETNEr, X86::SETPr, X86::OR8rr } 1289 }; 1290 unsigned *SETFOpc = nullptr; 1291 switch (Predicate) { 1292 default: break; 1293 case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; 1294 case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; 1295 } 1296 1297 ResultReg = createResultReg(&X86::GR8RegClass); 1298 if (SETFOpc) { 1299 if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) 1300 return false; 1301 1302 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); 1303 unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); 1304 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), 1305 FlagReg1); 1306 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), 1307 FlagReg2); 1308 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), 1309 ResultReg).addReg(FlagReg1).addReg(FlagReg2); 1310 updateValueMap(I, ResultReg); 1311 return true; 1312 } 1313 1314 X86::CondCode CC; 1315 bool SwapArgs; 1316 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 1317 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1318 unsigned Opc = X86::getSETFromCond(CC); 1319 1320 if (SwapArgs) 1321 std::swap(LHS, RHS); 1322 1323 // Emit a compare of LHS/RHS. 1324 if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc())) 1325 return false; 1326 1327 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 1328 updateValueMap(I, ResultReg); 1329 return true; 1330} 1331 1332bool X86FastISel::X86SelectZExt(const Instruction *I) { 1333 EVT DstVT = TLI.getValueType(DL, I->getType()); 1334 if (!TLI.isTypeLegal(DstVT)) 1335 return false; 1336 1337 unsigned ResultReg = getRegForValue(I->getOperand(0)); 1338 if (ResultReg == 0) 1339 return false; 1340 1341 // Handle zero-extension from i1 to i8, which is common. 1342 MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType()); 1343 if (SrcVT.SimpleTy == MVT::i1) { 1344 // Set the high bits to zero. 1345 ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 1346 SrcVT = MVT::i8; 1347 1348 if (ResultReg == 0) 1349 return false; 1350 } 1351 1352 if (DstVT == MVT::i64) { 1353 // Handle extension to 64-bits via sub-register shenanigans. 1354 unsigned MovInst; 1355 1356 switch (SrcVT.SimpleTy) { 1357 case MVT::i8: MovInst = X86::MOVZX32rr8; break; 1358 case MVT::i16: MovInst = X86::MOVZX32rr16; break; 1359 case MVT::i32: MovInst = X86::MOV32rr; break; 1360 default: llvm_unreachable("Unexpected zext to i64 source type"); 1361 } 1362 1363 unsigned Result32 = createResultReg(&X86::GR32RegClass); 1364 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32) 1365 .addReg(ResultReg); 1366 1367 ResultReg = createResultReg(&X86::GR64RegClass); 1368 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG), 1369 ResultReg) 1370 .addImm(0).addReg(Result32).addImm(X86::sub_32bit); 1371 } else if (DstVT != MVT::i8) { 1372 ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND, 1373 ResultReg, /*Kill=*/true); 1374 if (ResultReg == 0) 1375 return false; 1376 } 1377 1378 updateValueMap(I, ResultReg); 1379 return true; 1380} 1381 1382bool X86FastISel::X86SelectBranch(const Instruction *I) { 1383 // Unconditional branches are selected by tablegen-generated code. 1384 // Handle a conditional branch. 1385 const BranchInst *BI = cast<BranchInst>(I); 1386 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1387 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1388 1389 // Fold the common case of a conditional branch with a comparison 1390 // in the same block (values defined on other blocks may not have 1391 // initialized registers). 1392 X86::CondCode CC; 1393 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1394 if (CI->hasOneUse() && CI->getParent() == I->getParent()) { 1395 EVT VT = TLI.getValueType(DL, CI->getOperand(0)->getType()); 1396 1397 // Try to optimize or fold the cmp. 1398 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1399 switch (Predicate) { 1400 default: break; 1401 case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true; 1402 case CmpInst::FCMP_TRUE: fastEmitBranch(TrueMBB, DbgLoc); return true; 1403 } 1404 1405 const Value *CmpLHS = CI->getOperand(0); 1406 const Value *CmpRHS = CI->getOperand(1); 1407 1408 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 1409 // 0.0. 1410 // We don't have to materialize a zero constant for this case and can just 1411 // use %x again on the RHS. 1412 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 1413 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 1414 if (CmpRHSC && CmpRHSC->isNullValue()) 1415 CmpRHS = CmpLHS; 1416 } 1417 1418 // Try to take advantage of fallthrough opportunities. 1419 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 1420 std::swap(TrueMBB, FalseMBB); 1421 Predicate = CmpInst::getInversePredicate(Predicate); 1422 } 1423 1424 // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition 1425 // code check. Instead two branch instructions are required to check all 1426 // the flags. First we change the predicate to a supported condition code, 1427 // which will be the first branch. Later one we will emit the second 1428 // branch. 1429 bool NeedExtraBranch = false; 1430 switch (Predicate) { 1431 default: break; 1432 case CmpInst::FCMP_OEQ: 1433 std::swap(TrueMBB, FalseMBB); // fall-through 1434 case CmpInst::FCMP_UNE: 1435 NeedExtraBranch = true; 1436 Predicate = CmpInst::FCMP_ONE; 1437 break; 1438 } 1439 1440 bool SwapArgs; 1441 unsigned BranchOpc; 1442 std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); 1443 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1444 1445 BranchOpc = X86::GetCondBranchFromCond(CC); 1446 if (SwapArgs) 1447 std::swap(CmpLHS, CmpRHS); 1448 1449 // Emit a compare of the LHS and RHS, setting the flags. 1450 if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc())) 1451 return false; 1452 1453 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) 1454 .addMBB(TrueMBB); 1455 1456 // X86 requires a second branch to handle UNE (and OEQ, which is mapped 1457 // to UNE above). 1458 if (NeedExtraBranch) { 1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1)) 1460 .addMBB(TrueMBB); 1461 } 1462 1463 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 1464 return true; 1465 } 1466 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 1467 // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which 1468 // typically happen for _Bool and C++ bools. 1469 MVT SourceVT; 1470 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 1471 isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { 1472 unsigned TestOpc = 0; 1473 switch (SourceVT.SimpleTy) { 1474 default: break; 1475 case MVT::i8: TestOpc = X86::TEST8ri; break; 1476 case MVT::i16: TestOpc = X86::TEST16ri; break; 1477 case MVT::i32: TestOpc = X86::TEST32ri; break; 1478 case MVT::i64: TestOpc = X86::TEST64ri32; break; 1479 } 1480 if (TestOpc) { 1481 unsigned OpReg = getRegForValue(TI->getOperand(0)); 1482 if (OpReg == 0) return false; 1483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc)) 1484 .addReg(OpReg).addImm(1); 1485 1486 unsigned JmpOpc = X86::JNE_1; 1487 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 1488 std::swap(TrueMBB, FalseMBB); 1489 JmpOpc = X86::JE_1; 1490 } 1491 1492 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) 1493 .addMBB(TrueMBB); 1494 1495 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 1496 return true; 1497 } 1498 } 1499 } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { 1500 // Fake request the condition, otherwise the intrinsic might be completely 1501 // optimized away. 1502 unsigned TmpReg = getRegForValue(BI->getCondition()); 1503 if (TmpReg == 0) 1504 return false; 1505 1506 unsigned BranchOpc = X86::GetCondBranchFromCond(CC); 1507 1508 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) 1509 .addMBB(TrueMBB); 1510 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 1511 return true; 1512 } 1513 1514 // Otherwise do a clumsy setcc and re-test it. 1515 // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used 1516 // in an explicit cast, so make sure to handle that correctly. 1517 unsigned OpReg = getRegForValue(BI->getCondition()); 1518 if (OpReg == 0) return false; 1519 1520 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 1521 .addReg(OpReg).addImm(1); 1522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1)) 1523 .addMBB(TrueMBB); 1524 finishCondBranch(BI->getParent(), TrueMBB, FalseMBB); 1525 return true; 1526} 1527 1528bool X86FastISel::X86SelectShift(const Instruction *I) { 1529 unsigned CReg = 0, OpReg = 0; 1530 const TargetRegisterClass *RC = nullptr; 1531 if (I->getType()->isIntegerTy(8)) { 1532 CReg = X86::CL; 1533 RC = &X86::GR8RegClass; 1534 switch (I->getOpcode()) { 1535 case Instruction::LShr: OpReg = X86::SHR8rCL; break; 1536 case Instruction::AShr: OpReg = X86::SAR8rCL; break; 1537 case Instruction::Shl: OpReg = X86::SHL8rCL; break; 1538 default: return false; 1539 } 1540 } else if (I->getType()->isIntegerTy(16)) { 1541 CReg = X86::CX; 1542 RC = &X86::GR16RegClass; 1543 switch (I->getOpcode()) { 1544 case Instruction::LShr: OpReg = X86::SHR16rCL; break; 1545 case Instruction::AShr: OpReg = X86::SAR16rCL; break; 1546 case Instruction::Shl: OpReg = X86::SHL16rCL; break; 1547 default: return false; 1548 } 1549 } else if (I->getType()->isIntegerTy(32)) { 1550 CReg = X86::ECX; 1551 RC = &X86::GR32RegClass; 1552 switch (I->getOpcode()) { 1553 case Instruction::LShr: OpReg = X86::SHR32rCL; break; 1554 case Instruction::AShr: OpReg = X86::SAR32rCL; break; 1555 case Instruction::Shl: OpReg = X86::SHL32rCL; break; 1556 default: return false; 1557 } 1558 } else if (I->getType()->isIntegerTy(64)) { 1559 CReg = X86::RCX; 1560 RC = &X86::GR64RegClass; 1561 switch (I->getOpcode()) { 1562 case Instruction::LShr: OpReg = X86::SHR64rCL; break; 1563 case Instruction::AShr: OpReg = X86::SAR64rCL; break; 1564 case Instruction::Shl: OpReg = X86::SHL64rCL; break; 1565 default: return false; 1566 } 1567 } else { 1568 return false; 1569 } 1570 1571 MVT VT; 1572 if (!isTypeLegal(I->getType(), VT)) 1573 return false; 1574 1575 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1576 if (Op0Reg == 0) return false; 1577 1578 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1579 if (Op1Reg == 0) return false; 1580 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 1581 CReg).addReg(Op1Reg); 1582 1583 // The shift instruction uses X86::CL. If we defined a super-register 1584 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 1585 if (CReg != X86::CL) 1586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1587 TII.get(TargetOpcode::KILL), X86::CL) 1588 .addReg(CReg, RegState::Kill); 1589 1590 unsigned ResultReg = createResultReg(RC); 1591 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg) 1592 .addReg(Op0Reg); 1593 updateValueMap(I, ResultReg); 1594 return true; 1595} 1596 1597bool X86FastISel::X86SelectDivRem(const Instruction *I) { 1598 const static unsigned NumTypes = 4; // i8, i16, i32, i64 1599 const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem 1600 const static bool S = true; // IsSigned 1601 const static bool U = false; // !IsSigned 1602 const static unsigned Copy = TargetOpcode::COPY; 1603 // For the X86 DIV/IDIV instruction, in most cases the dividend 1604 // (numerator) must be in a specific register pair highreg:lowreg, 1605 // producing the quotient in lowreg and the remainder in highreg. 1606 // For most data types, to set up the instruction, the dividend is 1607 // copied into lowreg, and lowreg is sign-extended or zero-extended 1608 // into highreg. The exception is i8, where the dividend is defined 1609 // as a single register rather than a register pair, and we 1610 // therefore directly sign-extend or zero-extend the dividend into 1611 // lowreg, instead of copying, and ignore the highreg. 1612 const static struct DivRemEntry { 1613 // The following portion depends only on the data type. 1614 const TargetRegisterClass *RC; 1615 unsigned LowInReg; // low part of the register pair 1616 unsigned HighInReg; // high part of the register pair 1617 // The following portion depends on both the data type and the operation. 1618 struct DivRemResult { 1619 unsigned OpDivRem; // The specific DIV/IDIV opcode to use. 1620 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 1621 // highreg, or copying a zero into highreg. 1622 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 1623 // zero/sign-extending into lowreg for i8. 1624 unsigned DivRemResultReg; // Register containing the desired result. 1625 bool IsOpSigned; // Whether to use signed or unsigned form. 1626 } ResultTable[NumOps]; 1627 } OpTable[NumTypes] = { 1628 { &X86::GR8RegClass, X86::AX, 0, { 1629 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S }, // SDiv 1630 { X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S }, // SRem 1631 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U }, // UDiv 1632 { X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U }, // URem 1633 } 1634 }, // i8 1635 { &X86::GR16RegClass, X86::AX, X86::DX, { 1636 { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv 1637 { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem 1638 { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv 1639 { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem 1640 } 1641 }, // i16 1642 { &X86::GR32RegClass, X86::EAX, X86::EDX, { 1643 { X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S }, // SDiv 1644 { X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S }, // SRem 1645 { X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U }, // UDiv 1646 { X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U }, // URem 1647 } 1648 }, // i32 1649 { &X86::GR64RegClass, X86::RAX, X86::RDX, { 1650 { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv 1651 { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem 1652 { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv 1653 { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem 1654 } 1655 }, // i64 1656 }; 1657 1658 MVT VT; 1659 if (!isTypeLegal(I->getType(), VT)) 1660 return false; 1661 1662 unsigned TypeIndex, OpIndex; 1663 switch (VT.SimpleTy) { 1664 default: return false; 1665 case MVT::i8: TypeIndex = 0; break; 1666 case MVT::i16: TypeIndex = 1; break; 1667 case MVT::i32: TypeIndex = 2; break; 1668 case MVT::i64: TypeIndex = 3; 1669 if (!Subtarget->is64Bit()) 1670 return false; 1671 break; 1672 } 1673 1674 switch (I->getOpcode()) { 1675 default: llvm_unreachable("Unexpected div/rem opcode"); 1676 case Instruction::SDiv: OpIndex = 0; break; 1677 case Instruction::SRem: OpIndex = 1; break; 1678 case Instruction::UDiv: OpIndex = 2; break; 1679 case Instruction::URem: OpIndex = 3; break; 1680 } 1681 1682 const DivRemEntry &TypeEntry = OpTable[TypeIndex]; 1683 const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex]; 1684 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1685 if (Op0Reg == 0) 1686 return false; 1687 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1688 if (Op1Reg == 0) 1689 return false; 1690 1691 // Move op0 into low-order input register. 1692 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1693 TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg); 1694 // Zero-extend or sign-extend into high-order input register. 1695 if (OpEntry.OpSignExtend) { 1696 if (OpEntry.IsOpSigned) 1697 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1698 TII.get(OpEntry.OpSignExtend)); 1699 else { 1700 unsigned Zero32 = createResultReg(&X86::GR32RegClass); 1701 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1702 TII.get(X86::MOV32r0), Zero32); 1703 1704 // Copy the zero into the appropriate sub/super/identical physical 1705 // register. Unfortunately the operations needed are not uniform enough 1706 // to fit neatly into the table above. 1707 if (VT.SimpleTy == MVT::i16) { 1708 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1709 TII.get(Copy), TypeEntry.HighInReg) 1710 .addReg(Zero32, 0, X86::sub_16bit); 1711 } else if (VT.SimpleTy == MVT::i32) { 1712 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1713 TII.get(Copy), TypeEntry.HighInReg) 1714 .addReg(Zero32); 1715 } else if (VT.SimpleTy == MVT::i64) { 1716 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1717 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1718 .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); 1719 } 1720 } 1721 } 1722 // Generate the DIV/IDIV instruction. 1723 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1724 TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); 1725 // For i8 remainder, we can't reference AH directly, as we'll end 1726 // up with bogus copies like %R9B = COPY %AH. Reference AX 1727 // instead to prevent AH references in a REX instruction. 1728 // 1729 // The current assumption of the fast register allocator is that isel 1730 // won't generate explicit references to the GPR8_NOREX registers. If 1731 // the allocator and/or the backend get enhanced to be more robust in 1732 // that regard, this can be, and should be, removed. 1733 unsigned ResultReg = 0; 1734 if ((I->getOpcode() == Instruction::SRem || 1735 I->getOpcode() == Instruction::URem) && 1736 OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { 1737 unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); 1738 unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); 1739 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1740 TII.get(Copy), SourceSuperReg).addReg(X86::AX); 1741 1742 // Shift AX right by 8 bits instead of using AH. 1743 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri), 1744 ResultSuperReg).addReg(SourceSuperReg).addImm(8); 1745 1746 // Now reference the 8-bit subreg of the result. 1747 ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, 1748 /*Kill=*/true, X86::sub_8bit); 1749 } 1750 // Copy the result out of the physreg if we haven't already. 1751 if (!ResultReg) { 1752 ResultReg = createResultReg(TypeEntry.RC); 1753 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg) 1754 .addReg(OpEntry.DivRemResultReg); 1755 } 1756 updateValueMap(I, ResultReg); 1757 1758 return true; 1759} 1760 1761/// \brief Emit a conditional move instruction (if the are supported) to lower 1762/// the select. 1763bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { 1764 // Check if the subtarget supports these instructions. 1765 if (!Subtarget->hasCMov()) 1766 return false; 1767 1768 // FIXME: Add support for i8. 1769 if (RetVT < MVT::i16 || RetVT > MVT::i64) 1770 return false; 1771 1772 const Value *Cond = I->getOperand(0); 1773 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 1774 bool NeedTest = true; 1775 X86::CondCode CC = X86::COND_NE; 1776 1777 // Optimize conditions coming from a compare if both instructions are in the 1778 // same basic block (values defined in other basic blocks may not have 1779 // initialized registers). 1780 const auto *CI = dyn_cast<CmpInst>(Cond); 1781 if (CI && (CI->getParent() == I->getParent())) { 1782 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1783 1784 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1785 static unsigned SETFOpcTable[2][3] = { 1786 { X86::SETNPr, X86::SETEr , X86::TEST8rr }, 1787 { X86::SETPr, X86::SETNEr, X86::OR8rr } 1788 }; 1789 unsigned *SETFOpc = nullptr; 1790 switch (Predicate) { 1791 default: break; 1792 case CmpInst::FCMP_OEQ: 1793 SETFOpc = &SETFOpcTable[0][0]; 1794 Predicate = CmpInst::ICMP_NE; 1795 break; 1796 case CmpInst::FCMP_UNE: 1797 SETFOpc = &SETFOpcTable[1][0]; 1798 Predicate = CmpInst::ICMP_NE; 1799 break; 1800 } 1801 1802 bool NeedSwap; 1803 std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); 1804 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1805 1806 const Value *CmpLHS = CI->getOperand(0); 1807 const Value *CmpRHS = CI->getOperand(1); 1808 if (NeedSwap) 1809 std::swap(CmpLHS, CmpRHS); 1810 1811 EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); 1812 // Emit a compare of the LHS and RHS, setting the flags. 1813 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) 1814 return false; 1815 1816 if (SETFOpc) { 1817 unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); 1818 unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); 1819 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), 1820 FlagReg1); 1821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), 1822 FlagReg2); 1823 auto const &II = TII.get(SETFOpc[2]); 1824 if (II.getNumDefs()) { 1825 unsigned TmpReg = createResultReg(&X86::GR8RegClass); 1826 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) 1827 .addReg(FlagReg2).addReg(FlagReg1); 1828 } else { 1829 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 1830 .addReg(FlagReg2).addReg(FlagReg1); 1831 } 1832 } 1833 NeedTest = false; 1834 } else if (foldX86XALUIntrinsic(CC, I, Cond)) { 1835 // Fake request the condition, otherwise the intrinsic might be completely 1836 // optimized away. 1837 unsigned TmpReg = getRegForValue(Cond); 1838 if (TmpReg == 0) 1839 return false; 1840 1841 NeedTest = false; 1842 } 1843 1844 if (NeedTest) { 1845 // Selects operate on i1, however, CondReg is 8 bits width and may contain 1846 // garbage. Indeed, only the less significant bit is supposed to be 1847 // accurate. If we read more than the lsb, we may see non-zero values 1848 // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for 1849 // the select. This is achieved by performing TEST against 1. 1850 unsigned CondReg = getRegForValue(Cond); 1851 if (CondReg == 0) 1852 return false; 1853 bool CondIsKill = hasTrivialKill(Cond); 1854 1855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 1856 .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); 1857 } 1858 1859 const Value *LHS = I->getOperand(1); 1860 const Value *RHS = I->getOperand(2); 1861 1862 unsigned RHSReg = getRegForValue(RHS); 1863 bool RHSIsKill = hasTrivialKill(RHS); 1864 1865 unsigned LHSReg = getRegForValue(LHS); 1866 bool LHSIsKill = hasTrivialKill(LHS); 1867 1868 if (!LHSReg || !RHSReg) 1869 return false; 1870 1871 unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); 1872 unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, 1873 LHSReg, LHSIsKill); 1874 updateValueMap(I, ResultReg); 1875 return true; 1876} 1877 1878/// \brief Emit SSE or AVX instructions to lower the select. 1879/// 1880/// Try to use SSE1/SSE2 instructions to simulate a select without branches. 1881/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary 1882/// SSE instructions are available. If AVX is available, try to use a VBLENDV. 1883bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { 1884 // Optimize conditions coming from a compare if both instructions are in the 1885 // same basic block (values defined in other basic blocks may not have 1886 // initialized registers). 1887 const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0)); 1888 if (!CI || (CI->getParent() != I->getParent())) 1889 return false; 1890 1891 if (I->getType() != CI->getOperand(0)->getType() || 1892 !((Subtarget->hasSSE1() && RetVT == MVT::f32) || 1893 (Subtarget->hasSSE2() && RetVT == MVT::f64))) 1894 return false; 1895 1896 const Value *CmpLHS = CI->getOperand(0); 1897 const Value *CmpRHS = CI->getOperand(1); 1898 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 1899 1900 // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. 1901 // We don't have to materialize a zero constant for this case and can just use 1902 // %x again on the RHS. 1903 if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { 1904 const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); 1905 if (CmpRHSC && CmpRHSC->isNullValue()) 1906 CmpRHS = CmpLHS; 1907 } 1908 1909 unsigned CC; 1910 bool NeedSwap; 1911 std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); 1912 if (CC > 7) 1913 return false; 1914 1915 if (NeedSwap) 1916 std::swap(CmpLHS, CmpRHS); 1917 1918 // Choose the SSE instruction sequence based on data type (float or double). 1919 static unsigned OpcTable[2][4] = { 1920 { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, 1921 { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr } 1922 }; 1923 1924 unsigned *Opc = nullptr; 1925 switch (RetVT.SimpleTy) { 1926 default: return false; 1927 case MVT::f32: Opc = &OpcTable[0][0]; break; 1928 case MVT::f64: Opc = &OpcTable[1][0]; break; 1929 } 1930 1931 const Value *LHS = I->getOperand(1); 1932 const Value *RHS = I->getOperand(2); 1933 1934 unsigned LHSReg = getRegForValue(LHS); 1935 bool LHSIsKill = hasTrivialKill(LHS); 1936 1937 unsigned RHSReg = getRegForValue(RHS); 1938 bool RHSIsKill = hasTrivialKill(RHS); 1939 1940 unsigned CmpLHSReg = getRegForValue(CmpLHS); 1941 bool CmpLHSIsKill = hasTrivialKill(CmpLHS); 1942 1943 unsigned CmpRHSReg = getRegForValue(CmpRHS); 1944 bool CmpRHSIsKill = hasTrivialKill(CmpRHS); 1945 1946 if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) 1947 return false; 1948 1949 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 1950 unsigned ResultReg; 1951 1952 if (Subtarget->hasAVX()) { 1953 const TargetRegisterClass *FR32 = &X86::FR32RegClass; 1954 const TargetRegisterClass *VR128 = &X86::VR128RegClass; 1955 1956 // If we have AVX, create 1 blendv instead of 3 logic instructions. 1957 // Blendv was introduced with SSE 4.1, but the 2 register form implicitly 1958 // uses XMM0 as the selection register. That may need just as many 1959 // instructions as the AND/ANDN/OR sequence due to register moves, so 1960 // don't bother. 1961 unsigned CmpOpcode = 1962 (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr; 1963 unsigned BlendOpcode = 1964 (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr; 1965 1966 unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill, 1967 CmpRHSReg, CmpRHSIsKill, CC); 1968 unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill, 1969 LHSReg, LHSIsKill, CmpReg, true); 1970 ResultReg = createResultReg(RC); 1971 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1972 TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg); 1973 } else { 1974 unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, 1975 CmpRHSReg, CmpRHSIsKill, CC); 1976 unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, 1977 LHSReg, LHSIsKill); 1978 unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, 1979 RHSReg, RHSIsKill); 1980 ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, 1981 AndReg, /*IsKill=*/true); 1982 } 1983 updateValueMap(I, ResultReg); 1984 return true; 1985} 1986 1987bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { 1988 // These are pseudo CMOV instructions and will be later expanded into control- 1989 // flow. 1990 unsigned Opc; 1991 switch (RetVT.SimpleTy) { 1992 default: return false; 1993 case MVT::i8: Opc = X86::CMOV_GR8; break; 1994 case MVT::i16: Opc = X86::CMOV_GR16; break; 1995 case MVT::i32: Opc = X86::CMOV_GR32; break; 1996 case MVT::f32: Opc = X86::CMOV_FR32; break; 1997 case MVT::f64: Opc = X86::CMOV_FR64; break; 1998 } 1999 2000 const Value *Cond = I->getOperand(0); 2001 X86::CondCode CC = X86::COND_NE; 2002 2003 // Optimize conditions coming from a compare if both instructions are in the 2004 // same basic block (values defined in other basic blocks may not have 2005 // initialized registers). 2006 const auto *CI = dyn_cast<CmpInst>(Cond); 2007 if (CI && (CI->getParent() == I->getParent())) { 2008 bool NeedSwap; 2009 std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); 2010 if (CC > X86::LAST_VALID_COND) 2011 return false; 2012 2013 const Value *CmpLHS = CI->getOperand(0); 2014 const Value *CmpRHS = CI->getOperand(1); 2015 2016 if (NeedSwap) 2017 std::swap(CmpLHS, CmpRHS); 2018 2019 EVT CmpVT = TLI.getValueType(DL, CmpLHS->getType()); 2020 if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc())) 2021 return false; 2022 } else { 2023 unsigned CondReg = getRegForValue(Cond); 2024 if (CondReg == 0) 2025 return false; 2026 bool CondIsKill = hasTrivialKill(Cond); 2027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) 2028 .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); 2029 } 2030 2031 const Value *LHS = I->getOperand(1); 2032 const Value *RHS = I->getOperand(2); 2033 2034 unsigned LHSReg = getRegForValue(LHS); 2035 bool LHSIsKill = hasTrivialKill(LHS); 2036 2037 unsigned RHSReg = getRegForValue(RHS); 2038 bool RHSIsKill = hasTrivialKill(RHS); 2039 2040 if (!LHSReg || !RHSReg) 2041 return false; 2042 2043 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 2044 2045 unsigned ResultReg = 2046 fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC); 2047 updateValueMap(I, ResultReg); 2048 return true; 2049} 2050 2051bool X86FastISel::X86SelectSelect(const Instruction *I) { 2052 MVT RetVT; 2053 if (!isTypeLegal(I->getType(), RetVT)) 2054 return false; 2055 2056 // Check if we can fold the select. 2057 if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { 2058 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2059 const Value *Opnd = nullptr; 2060 switch (Predicate) { 2061 default: break; 2062 case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; 2063 case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; 2064 } 2065 // No need for a select anymore - this is an unconditional move. 2066 if (Opnd) { 2067 unsigned OpReg = getRegForValue(Opnd); 2068 if (OpReg == 0) 2069 return false; 2070 bool OpIsKill = hasTrivialKill(Opnd); 2071 const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); 2072 unsigned ResultReg = createResultReg(RC); 2073 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2074 TII.get(TargetOpcode::COPY), ResultReg) 2075 .addReg(OpReg, getKillRegState(OpIsKill)); 2076 updateValueMap(I, ResultReg); 2077 return true; 2078 } 2079 } 2080 2081 // First try to use real conditional move instructions. 2082 if (X86FastEmitCMoveSelect(RetVT, I)) 2083 return true; 2084 2085 // Try to use a sequence of SSE instructions to simulate a conditional move. 2086 if (X86FastEmitSSESelect(RetVT, I)) 2087 return true; 2088 2089 // Fall-back to pseudo conditional move instructions, which will be later 2090 // converted to control-flow. 2091 if (X86FastEmitPseudoSelect(RetVT, I)) 2092 return true; 2093 2094 return false; 2095} 2096 2097bool X86FastISel::X86SelectSIToFP(const Instruction *I) { 2098 // The target-independent selection algorithm in FastISel already knows how 2099 // to select a SINT_TO_FP if the target is SSE but not AVX. 2100 // Early exit if the subtarget doesn't have AVX. 2101 if (!Subtarget->hasAVX()) 2102 return false; 2103 2104 if (!I->getOperand(0)->getType()->isIntegerTy(32)) 2105 return false; 2106 2107 // Select integer to float/double conversion. 2108 unsigned OpReg = getRegForValue(I->getOperand(0)); 2109 if (OpReg == 0) 2110 return false; 2111 2112 const TargetRegisterClass *RC = nullptr; 2113 unsigned Opcode; 2114 2115 if (I->getType()->isDoubleTy()) { 2116 // sitofp int -> double 2117 Opcode = X86::VCVTSI2SDrr; 2118 RC = &X86::FR64RegClass; 2119 } else if (I->getType()->isFloatTy()) { 2120 // sitofp int -> float 2121 Opcode = X86::VCVTSI2SSrr; 2122 RC = &X86::FR32RegClass; 2123 } else 2124 return false; 2125 2126 unsigned ImplicitDefReg = createResultReg(RC); 2127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2128 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 2129 unsigned ResultReg = 2130 fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false); 2131 updateValueMap(I, ResultReg); 2132 return true; 2133} 2134 2135// Helper method used by X86SelectFPExt and X86SelectFPTrunc. 2136bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I, 2137 unsigned TargetOpc, 2138 const TargetRegisterClass *RC) { 2139 assert((I->getOpcode() == Instruction::FPExt || 2140 I->getOpcode() == Instruction::FPTrunc) && 2141 "Instruction must be an FPExt or FPTrunc!"); 2142 2143 unsigned OpReg = getRegForValue(I->getOperand(0)); 2144 if (OpReg == 0) 2145 return false; 2146 2147 unsigned ResultReg = createResultReg(RC); 2148 MachineInstrBuilder MIB; 2149 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc), 2150 ResultReg); 2151 if (Subtarget->hasAVX()) 2152 MIB.addReg(OpReg); 2153 MIB.addReg(OpReg); 2154 updateValueMap(I, ResultReg); 2155 return true; 2156} 2157 2158bool X86FastISel::X86SelectFPExt(const Instruction *I) { 2159 if (X86ScalarSSEf64 && I->getType()->isDoubleTy() && 2160 I->getOperand(0)->getType()->isFloatTy()) { 2161 // fpext from float to double. 2162 unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr; 2163 return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass); 2164 } 2165 2166 return false; 2167} 2168 2169bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 2170 if (X86ScalarSSEf64 && I->getType()->isFloatTy() && 2171 I->getOperand(0)->getType()->isDoubleTy()) { 2172 // fptrunc from double to float. 2173 unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr; 2174 return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass); 2175 } 2176 2177 return false; 2178} 2179 2180bool X86FastISel::X86SelectTrunc(const Instruction *I) { 2181 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 2182 EVT DstVT = TLI.getValueType(DL, I->getType()); 2183 2184 // This code only handles truncation to byte. 2185 if (DstVT != MVT::i8 && DstVT != MVT::i1) 2186 return false; 2187 if (!TLI.isTypeLegal(SrcVT)) 2188 return false; 2189 2190 unsigned InputReg = getRegForValue(I->getOperand(0)); 2191 if (!InputReg) 2192 // Unhandled operand. Halt "fast" selection and bail. 2193 return false; 2194 2195 if (SrcVT == MVT::i8) { 2196 // Truncate from i8 to i1; no code needed. 2197 updateValueMap(I, InputReg); 2198 return true; 2199 } 2200 2201 bool KillInputReg = false; 2202 if (!Subtarget->is64Bit()) { 2203 // If we're on x86-32; we can't extract an i8 from a general register. 2204 // First issue a copy to GR16_ABCD or GR32_ABCD. 2205 const TargetRegisterClass *CopyRC = 2206 (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass; 2207 unsigned CopyReg = createResultReg(CopyRC); 2208 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2209 TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg); 2210 InputReg = CopyReg; 2211 KillInputReg = true; 2212 } 2213 2214 // Issue an extract_subreg. 2215 unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8, 2216 InputReg, KillInputReg, 2217 X86::sub_8bit); 2218 if (!ResultReg) 2219 return false; 2220 2221 updateValueMap(I, ResultReg); 2222 return true; 2223} 2224 2225bool X86FastISel::IsMemcpySmall(uint64_t Len) { 2226 return Len <= (Subtarget->is64Bit() ? 32 : 16); 2227} 2228 2229bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, 2230 X86AddressMode SrcAM, uint64_t Len) { 2231 2232 // Make sure we don't bloat code by inlining very large memcpy's. 2233 if (!IsMemcpySmall(Len)) 2234 return false; 2235 2236 bool i64Legal = Subtarget->is64Bit(); 2237 2238 // We don't care about alignment here since we just emit integer accesses. 2239 while (Len) { 2240 MVT VT; 2241 if (Len >= 8 && i64Legal) 2242 VT = MVT::i64; 2243 else if (Len >= 4) 2244 VT = MVT::i32; 2245 else if (Len >= 2) 2246 VT = MVT::i16; 2247 else 2248 VT = MVT::i8; 2249 2250 unsigned Reg; 2251 bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); 2252 RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); 2253 assert(RV && "Failed to emit load or store??"); 2254 2255 unsigned Size = VT.getSizeInBits()/8; 2256 Len -= Size; 2257 DestAM.Disp += Size; 2258 SrcAM.Disp += Size; 2259 } 2260 2261 return true; 2262} 2263 2264bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 2265 // FIXME: Handle more intrinsics. 2266 switch (II->getIntrinsicID()) { 2267 default: return false; 2268 case Intrinsic::convert_from_fp16: 2269 case Intrinsic::convert_to_fp16: { 2270 if (Subtarget->useSoftFloat() || !Subtarget->hasF16C()) 2271 return false; 2272 2273 const Value *Op = II->getArgOperand(0); 2274 unsigned InputReg = getRegForValue(Op); 2275 if (InputReg == 0) 2276 return false; 2277 2278 // F16C only allows converting from float to half and from half to float. 2279 bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; 2280 if (IsFloatToHalf) { 2281 if (!Op->getType()->isFloatTy()) 2282 return false; 2283 } else { 2284 if (!II->getType()->isFloatTy()) 2285 return false; 2286 } 2287 2288 unsigned ResultReg = 0; 2289 const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); 2290 if (IsFloatToHalf) { 2291 // 'InputReg' is implicitly promoted from register class FR32 to 2292 // register class VR128 by method 'constrainOperandRegClass' which is 2293 // directly called by 'fastEmitInst_ri'. 2294 // Instruction VCVTPS2PHrr takes an extra immediate operand which is 2295 // used to provide rounding control. 2296 InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0); 2297 2298 // Move the lower 32-bits of ResultReg to another register of class GR32. 2299 ResultReg = createResultReg(&X86::GR32RegClass); 2300 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2301 TII.get(X86::VMOVPDI2DIrr), ResultReg) 2302 .addReg(InputReg, RegState::Kill); 2303 2304 // The result value is in the lower 16-bits of ResultReg. 2305 unsigned RegIdx = X86::sub_16bit; 2306 ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx); 2307 } else { 2308 assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); 2309 // Explicitly sign-extend the input to 32-bit. 2310 InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg, 2311 /*Kill=*/false); 2312 2313 // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. 2314 InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, 2315 InputReg, /*Kill=*/true); 2316 2317 InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true); 2318 2319 // The result value is in the lower 32-bits of ResultReg. 2320 // Emit an explicit copy from register class VR128 to register class FR32. 2321 ResultReg = createResultReg(&X86::FR32RegClass); 2322 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2323 TII.get(TargetOpcode::COPY), ResultReg) 2324 .addReg(InputReg, RegState::Kill); 2325 } 2326 2327 updateValueMap(II, ResultReg); 2328 return true; 2329 } 2330 case Intrinsic::frameaddress: { 2331 MachineFunction *MF = FuncInfo.MF; 2332 if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) 2333 return false; 2334 2335 Type *RetTy = II->getCalledFunction()->getReturnType(); 2336 2337 MVT VT; 2338 if (!isTypeLegal(RetTy, VT)) 2339 return false; 2340 2341 unsigned Opc; 2342 const TargetRegisterClass *RC = nullptr; 2343 2344 switch (VT.SimpleTy) { 2345 default: llvm_unreachable("Invalid result type for frameaddress."); 2346 case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; 2347 case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; 2348 } 2349 2350 // This needs to be set before we call getPtrSizedFrameRegister, otherwise 2351 // we get the wrong frame register. 2352 MachineFrameInfo *MFI = MF->getFrameInfo(); 2353 MFI->setFrameAddressIsTaken(true); 2354 2355 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 2356 unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF); 2357 assert(((FrameReg == X86::RBP && VT == MVT::i64) || 2358 (FrameReg == X86::EBP && VT == MVT::i32)) && 2359 "Invalid Frame Register!"); 2360 2361 // Always make a copy of the frame register to to a vreg first, so that we 2362 // never directly reference the frame register (the TwoAddressInstruction- 2363 // Pass doesn't like that). 2364 unsigned SrcReg = createResultReg(RC); 2365 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2366 TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); 2367 2368 // Now recursively load from the frame address. 2369 // movq (%rbp), %rax 2370 // movq (%rax), %rax 2371 // movq (%rax), %rax 2372 // ... 2373 unsigned DestReg; 2374 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 2375 while (Depth--) { 2376 DestReg = createResultReg(RC); 2377 addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2378 TII.get(Opc), DestReg), SrcReg); 2379 SrcReg = DestReg; 2380 } 2381 2382 updateValueMap(II, SrcReg); 2383 return true; 2384 } 2385 case Intrinsic::memcpy: { 2386 const MemCpyInst *MCI = cast<MemCpyInst>(II); 2387 // Don't handle volatile or variable length memcpys. 2388 if (MCI->isVolatile()) 2389 return false; 2390 2391 if (isa<ConstantInt>(MCI->getLength())) { 2392 // Small memcpy's are common enough that we want to do them 2393 // without a call if possible. 2394 uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue(); 2395 if (IsMemcpySmall(Len)) { 2396 X86AddressMode DestAM, SrcAM; 2397 if (!X86SelectAddress(MCI->getRawDest(), DestAM) || 2398 !X86SelectAddress(MCI->getRawSource(), SrcAM)) 2399 return false; 2400 TryEmitSmallMemcpy(DestAM, SrcAM, Len); 2401 return true; 2402 } 2403 } 2404 2405 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 2406 if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth)) 2407 return false; 2408 2409 if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) 2410 return false; 2411 2412 return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); 2413 } 2414 case Intrinsic::memset: { 2415 const MemSetInst *MSI = cast<MemSetInst>(II); 2416 2417 if (MSI->isVolatile()) 2418 return false; 2419 2420 unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32; 2421 if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth)) 2422 return false; 2423 2424 if (MSI->getDestAddressSpace() > 255) 2425 return false; 2426 2427 return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); 2428 } 2429 case Intrinsic::stackprotector: { 2430 // Emit code to store the stack guard onto the stack. 2431 EVT PtrTy = TLI.getPointerTy(DL); 2432 2433 const Value *Op1 = II->getArgOperand(0); // The guard's value. 2434 const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1)); 2435 2436 MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]); 2437 2438 // Grab the frame index. 2439 X86AddressMode AM; 2440 if (!X86SelectAddress(Slot, AM)) return false; 2441 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 2442 return true; 2443 } 2444 case Intrinsic::dbg_declare: { 2445 const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); 2446 X86AddressMode AM; 2447 assert(DI->getAddress() && "Null address should be checked earlier!"); 2448 if (!X86SelectAddress(DI->getAddress(), AM)) 2449 return false; 2450 const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 2451 // FIXME may need to add RegState::Debug to any registers produced, 2452 // although ESP/EBP should be the only ones at the moment. 2453 assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && 2454 "Expected inlined-at fields to agree"); 2455 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM) 2456 .addImm(0) 2457 .addMetadata(DI->getVariable()) 2458 .addMetadata(DI->getExpression()); 2459 return true; 2460 } 2461 case Intrinsic::trap: { 2462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); 2463 return true; 2464 } 2465 case Intrinsic::sqrt: { 2466 if (!Subtarget->hasSSE1()) 2467 return false; 2468 2469 Type *RetTy = II->getCalledFunction()->getReturnType(); 2470 2471 MVT VT; 2472 if (!isTypeLegal(RetTy, VT)) 2473 return false; 2474 2475 // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT 2476 // is not generated by FastISel yet. 2477 // FIXME: Update this code once tablegen can handle it. 2478 static const unsigned SqrtOpc[2][2] = { 2479 {X86::SQRTSSr, X86::VSQRTSSr}, 2480 {X86::SQRTSDr, X86::VSQRTSDr} 2481 }; 2482 bool HasAVX = Subtarget->hasAVX(); 2483 unsigned Opc; 2484 const TargetRegisterClass *RC; 2485 switch (VT.SimpleTy) { 2486 default: return false; 2487 case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; 2488 case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; 2489 } 2490 2491 const Value *SrcVal = II->getArgOperand(0); 2492 unsigned SrcReg = getRegForValue(SrcVal); 2493 2494 if (SrcReg == 0) 2495 return false; 2496 2497 unsigned ImplicitDefReg = 0; 2498 if (HasAVX) { 2499 ImplicitDefReg = createResultReg(RC); 2500 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2501 TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); 2502 } 2503 2504 unsigned ResultReg = createResultReg(RC); 2505 MachineInstrBuilder MIB; 2506 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), 2507 ResultReg); 2508 2509 if (ImplicitDefReg) 2510 MIB.addReg(ImplicitDefReg); 2511 2512 MIB.addReg(SrcReg); 2513 2514 updateValueMap(II, ResultReg); 2515 return true; 2516 } 2517 case Intrinsic::sadd_with_overflow: 2518 case Intrinsic::uadd_with_overflow: 2519 case Intrinsic::ssub_with_overflow: 2520 case Intrinsic::usub_with_overflow: 2521 case Intrinsic::smul_with_overflow: 2522 case Intrinsic::umul_with_overflow: { 2523 // This implements the basic lowering of the xalu with overflow intrinsics 2524 // into add/sub/mul followed by either seto or setb. 2525 const Function *Callee = II->getCalledFunction(); 2526 auto *Ty = cast<StructType>(Callee->getReturnType()); 2527 Type *RetTy = Ty->getTypeAtIndex(0U); 2528 Type *CondTy = Ty->getTypeAtIndex(1); 2529 2530 MVT VT; 2531 if (!isTypeLegal(RetTy, VT)) 2532 return false; 2533 2534 if (VT < MVT::i8 || VT > MVT::i64) 2535 return false; 2536 2537 const Value *LHS = II->getArgOperand(0); 2538 const Value *RHS = II->getArgOperand(1); 2539 2540 // Canonicalize immediate to the RHS. 2541 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 2542 isCommutativeIntrinsic(II)) 2543 std::swap(LHS, RHS); 2544 2545 bool UseIncDec = false; 2546 if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne()) 2547 UseIncDec = true; 2548 2549 unsigned BaseOpc, CondOpc; 2550 switch (II->getIntrinsicID()) { 2551 default: llvm_unreachable("Unexpected intrinsic!"); 2552 case Intrinsic::sadd_with_overflow: 2553 BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD); 2554 CondOpc = X86::SETOr; 2555 break; 2556 case Intrinsic::uadd_with_overflow: 2557 BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; 2558 case Intrinsic::ssub_with_overflow: 2559 BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB); 2560 CondOpc = X86::SETOr; 2561 break; 2562 case Intrinsic::usub_with_overflow: 2563 BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; 2564 case Intrinsic::smul_with_overflow: 2565 BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; 2566 case Intrinsic::umul_with_overflow: 2567 BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; 2568 } 2569 2570 unsigned LHSReg = getRegForValue(LHS); 2571 if (LHSReg == 0) 2572 return false; 2573 bool LHSIsKill = hasTrivialKill(LHS); 2574 2575 unsigned ResultReg = 0; 2576 // Check if we have an immediate version. 2577 if (const auto *CI = dyn_cast<ConstantInt>(RHS)) { 2578 static const unsigned Opc[2][4] = { 2579 { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r }, 2580 { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } 2581 }; 2582 2583 if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) { 2584 ResultReg = createResultReg(TLI.getRegClassFor(VT)); 2585 bool IsDec = BaseOpc == X86ISD::DEC; 2586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2587 TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg) 2588 .addReg(LHSReg, getKillRegState(LHSIsKill)); 2589 } else 2590 ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, 2591 CI->getZExtValue()); 2592 } 2593 2594 unsigned RHSReg; 2595 bool RHSIsKill; 2596 if (!ResultReg) { 2597 RHSReg = getRegForValue(RHS); 2598 if (RHSReg == 0) 2599 return false; 2600 RHSIsKill = hasTrivialKill(RHS); 2601 ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg, 2602 RHSIsKill); 2603 } 2604 2605 // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit 2606 // it manually. 2607 if (BaseOpc == X86ISD::UMUL && !ResultReg) { 2608 static const unsigned MULOpc[] = 2609 { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; 2610 static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; 2611 // First copy the first operand into RAX, which is an implicit input to 2612 // the X86::MUL*r instruction. 2613 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2614 TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) 2615 .addReg(LHSReg, getKillRegState(LHSIsKill)); 2616 ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], 2617 TLI.getRegClassFor(VT), RHSReg, RHSIsKill); 2618 } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { 2619 static const unsigned MULOpc[] = 2620 { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; 2621 if (VT == MVT::i8) { 2622 // Copy the first operand into AL, which is an implicit input to the 2623 // X86::IMUL8r instruction. 2624 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2625 TII.get(TargetOpcode::COPY), X86::AL) 2626 .addReg(LHSReg, getKillRegState(LHSIsKill)); 2627 ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, 2628 RHSIsKill); 2629 } else 2630 ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], 2631 TLI.getRegClassFor(VT), LHSReg, LHSIsKill, 2632 RHSReg, RHSIsKill); 2633 } 2634 2635 if (!ResultReg) 2636 return false; 2637 2638 unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy); 2639 assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); 2640 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), 2641 ResultReg2); 2642 2643 updateValueMap(II, ResultReg, 2); 2644 return true; 2645 } 2646 case Intrinsic::x86_sse_cvttss2si: 2647 case Intrinsic::x86_sse_cvttss2si64: 2648 case Intrinsic::x86_sse2_cvttsd2si: 2649 case Intrinsic::x86_sse2_cvttsd2si64: { 2650 bool IsInputDouble; 2651 switch (II->getIntrinsicID()) { 2652 default: llvm_unreachable("Unexpected intrinsic."); 2653 case Intrinsic::x86_sse_cvttss2si: 2654 case Intrinsic::x86_sse_cvttss2si64: 2655 if (!Subtarget->hasSSE1()) 2656 return false; 2657 IsInputDouble = false; 2658 break; 2659 case Intrinsic::x86_sse2_cvttsd2si: 2660 case Intrinsic::x86_sse2_cvttsd2si64: 2661 if (!Subtarget->hasSSE2()) 2662 return false; 2663 IsInputDouble = true; 2664 break; 2665 } 2666 2667 Type *RetTy = II->getCalledFunction()->getReturnType(); 2668 MVT VT; 2669 if (!isTypeLegal(RetTy, VT)) 2670 return false; 2671 2672 static const unsigned CvtOpc[2][2][2] = { 2673 { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, 2674 { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, 2675 { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, 2676 { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } 2677 }; 2678 bool HasAVX = Subtarget->hasAVX(); 2679 unsigned Opc; 2680 switch (VT.SimpleTy) { 2681 default: llvm_unreachable("Unexpected result type."); 2682 case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; 2683 case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; 2684 } 2685 2686 // Check if we can fold insertelement instructions into the convert. 2687 const Value *Op = II->getArgOperand(0); 2688 while (auto *IE = dyn_cast<InsertElementInst>(Op)) { 2689 const Value *Index = IE->getOperand(2); 2690 if (!isa<ConstantInt>(Index)) 2691 break; 2692 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); 2693 2694 if (Idx == 0) { 2695 Op = IE->getOperand(1); 2696 break; 2697 } 2698 Op = IE->getOperand(0); 2699 } 2700 2701 unsigned Reg = getRegForValue(Op); 2702 if (Reg == 0) 2703 return false; 2704 2705 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 2706 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2707 .addReg(Reg); 2708 2709 updateValueMap(II, ResultReg); 2710 return true; 2711 } 2712 } 2713} 2714 2715bool X86FastISel::fastLowerArguments() { 2716 if (!FuncInfo.CanLowerReturn) 2717 return false; 2718 2719 const Function *F = FuncInfo.Fn; 2720 if (F->isVarArg()) 2721 return false; 2722 2723 CallingConv::ID CC = F->getCallingConv(); 2724 if (CC != CallingConv::C) 2725 return false; 2726 2727 if (Subtarget->isCallingConvWin64(CC)) 2728 return false; 2729 2730 if (!Subtarget->is64Bit()) 2731 return false; 2732 2733 // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. 2734 unsigned GPRCnt = 0; 2735 unsigned FPRCnt = 0; 2736 unsigned Idx = 0; 2737 for (auto const &Arg : F->args()) { 2738 // The first argument is at index 1. 2739 ++Idx; 2740 if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || 2741 F->getAttributes().hasAttribute(Idx, Attribute::InReg) || 2742 F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || 2743 F->getAttributes().hasAttribute(Idx, Attribute::Nest)) 2744 return false; 2745 2746 Type *ArgTy = Arg.getType(); 2747 if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) 2748 return false; 2749 2750 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2751 if (!ArgVT.isSimple()) return false; 2752 switch (ArgVT.getSimpleVT().SimpleTy) { 2753 default: return false; 2754 case MVT::i32: 2755 case MVT::i64: 2756 ++GPRCnt; 2757 break; 2758 case MVT::f32: 2759 case MVT::f64: 2760 if (!Subtarget->hasSSE1()) 2761 return false; 2762 ++FPRCnt; 2763 break; 2764 } 2765 2766 if (GPRCnt > 6) 2767 return false; 2768 2769 if (FPRCnt > 8) 2770 return false; 2771 } 2772 2773 static const MCPhysReg GPR32ArgRegs[] = { 2774 X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D 2775 }; 2776 static const MCPhysReg GPR64ArgRegs[] = { 2777 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 2778 }; 2779 static const MCPhysReg XMMArgRegs[] = { 2780 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 2781 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 2782 }; 2783 2784 unsigned GPRIdx = 0; 2785 unsigned FPRIdx = 0; 2786 for (auto const &Arg : F->args()) { 2787 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2788 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 2789 unsigned SrcReg; 2790 switch (VT.SimpleTy) { 2791 default: llvm_unreachable("Unexpected value type."); 2792 case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; 2793 case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; 2794 case MVT::f32: // fall-through 2795 case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; 2796 } 2797 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2798 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2799 // Without this, EmitLiveInCopies may eliminate the livein if its only 2800 // use is a bitcast (which isn't turned into an instruction). 2801 unsigned ResultReg = createResultReg(RC); 2802 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2803 TII.get(TargetOpcode::COPY), ResultReg) 2804 .addReg(DstReg, getKillRegState(true)); 2805 updateValueMap(&Arg, ResultReg); 2806 } 2807 return true; 2808} 2809 2810static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget, 2811 CallingConv::ID CC, 2812 ImmutableCallSite *CS) { 2813 if (Subtarget->is64Bit()) 2814 return 0; 2815 if (Subtarget->getTargetTriple().isOSMSVCRT()) 2816 return 0; 2817 if (CC == CallingConv::Fast || CC == CallingConv::GHC || 2818 CC == CallingConv::HiPE) 2819 return 0; 2820 2821 if (CS) 2822 if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) || 2823 CS->paramHasAttr(1, Attribute::InReg)) 2824 return 0; 2825 2826 return 4; 2827} 2828 2829bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { 2830 auto &OutVals = CLI.OutVals; 2831 auto &OutFlags = CLI.OutFlags; 2832 auto &OutRegs = CLI.OutRegs; 2833 auto &Ins = CLI.Ins; 2834 auto &InRegs = CLI.InRegs; 2835 CallingConv::ID CC = CLI.CallConv; 2836 bool &IsTailCall = CLI.IsTailCall; 2837 bool IsVarArg = CLI.IsVarArg; 2838 const Value *Callee = CLI.Callee; 2839 MCSymbol *Symbol = CLI.Symbol; 2840 2841 bool Is64Bit = Subtarget->is64Bit(); 2842 bool IsWin64 = Subtarget->isCallingConvWin64(CC); 2843 2844 // Handle only C, fastcc, and webkit_js calling conventions for now. 2845 switch (CC) { 2846 default: return false; 2847 case CallingConv::C: 2848 case CallingConv::Fast: 2849 case CallingConv::WebKit_JS: 2850 case CallingConv::X86_FastCall: 2851 case CallingConv::X86_64_Win64: 2852 case CallingConv::X86_64_SysV: 2853 break; 2854 } 2855 2856 // Allow SelectionDAG isel to handle tail calls. 2857 if (IsTailCall) 2858 return false; 2859 2860 // fastcc with -tailcallopt is intended to provide a guaranteed 2861 // tail call optimization. Fastisel doesn't know how to do that. 2862 if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt) 2863 return false; 2864 2865 // Don't know how to handle Win64 varargs yet. Nothing special needed for 2866 // x86-32. Special handling for x86-64 is implemented. 2867 if (IsVarArg && IsWin64) 2868 return false; 2869 2870 // Don't know about inalloca yet. 2871 if (CLI.CS && CLI.CS->hasInAllocaArgument()) 2872 return false; 2873 2874 // Fast-isel doesn't know about callee-pop yet. 2875 if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg, 2876 TM.Options.GuaranteedTailCallOpt)) 2877 return false; 2878 2879 SmallVector<MVT, 16> OutVTs; 2880 SmallVector<unsigned, 16> ArgRegs; 2881 2882 // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra 2883 // instruction. This is safe because it is common to all FastISel supported 2884 // calling conventions on x86. 2885 for (int i = 0, e = OutVals.size(); i != e; ++i) { 2886 Value *&Val = OutVals[i]; 2887 ISD::ArgFlagsTy Flags = OutFlags[i]; 2888 if (auto *CI = dyn_cast<ConstantInt>(Val)) { 2889 if (CI->getBitWidth() < 32) { 2890 if (Flags.isSExt()) 2891 Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext())); 2892 else 2893 Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext())); 2894 } 2895 } 2896 2897 // Passing bools around ends up doing a trunc to i1 and passing it. 2898 // Codegen this as an argument + "and 1". 2899 MVT VT; 2900 auto *TI = dyn_cast<TruncInst>(Val); 2901 unsigned ResultReg; 2902 if (TI && TI->getType()->isIntegerTy(1) && CLI.CS && 2903 (TI->getParent() == CLI.CS->getInstruction()->getParent()) && 2904 TI->hasOneUse()) { 2905 Value *PrevVal = TI->getOperand(0); 2906 ResultReg = getRegForValue(PrevVal); 2907 2908 if (!ResultReg) 2909 return false; 2910 2911 if (!isTypeLegal(PrevVal->getType(), VT)) 2912 return false; 2913 2914 ResultReg = 2915 fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1); 2916 } else { 2917 if (!isTypeLegal(Val->getType(), VT)) 2918 return false; 2919 ResultReg = getRegForValue(Val); 2920 } 2921 2922 if (!ResultReg) 2923 return false; 2924 2925 ArgRegs.push_back(ResultReg); 2926 OutVTs.push_back(VT); 2927 } 2928 2929 // Analyze operands of the call, assigning locations to each operand. 2930 SmallVector<CCValAssign, 16> ArgLocs; 2931 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext()); 2932 2933 // Allocate shadow area for Win64 2934 if (IsWin64) 2935 CCInfo.AllocateStack(32, 8); 2936 2937 CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86); 2938 2939 // Get a count of how many bytes are to be pushed on the stack. 2940 unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); 2941 2942 // Issue CALLSEQ_START 2943 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 2944 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 2945 .addImm(NumBytes).addImm(0); 2946 2947 // Walk the register/memloc assignments, inserting copies/loads. 2948 const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 2949 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2950 CCValAssign const &VA = ArgLocs[i]; 2951 const Value *ArgVal = OutVals[VA.getValNo()]; 2952 MVT ArgVT = OutVTs[VA.getValNo()]; 2953 2954 if (ArgVT == MVT::x86mmx) 2955 return false; 2956 2957 unsigned ArgReg = ArgRegs[VA.getValNo()]; 2958 2959 // Promote the value if needed. 2960 switch (VA.getLocInfo()) { 2961 case CCValAssign::Full: break; 2962 case CCValAssign::SExt: { 2963 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 2964 "Unexpected extend"); 2965 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 2966 ArgVT, ArgReg); 2967 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 2968 ArgVT = VA.getLocVT(); 2969 break; 2970 } 2971 case CCValAssign::ZExt: { 2972 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 2973 "Unexpected extend"); 2974 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, 2975 ArgVT, ArgReg); 2976 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 2977 ArgVT = VA.getLocVT(); 2978 break; 2979 } 2980 case CCValAssign::AExt: { 2981 assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() && 2982 "Unexpected extend"); 2983 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg, 2984 ArgVT, ArgReg); 2985 if (!Emitted) 2986 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg, 2987 ArgVT, ArgReg); 2988 if (!Emitted) 2989 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg, 2990 ArgVT, ArgReg); 2991 2992 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 2993 ArgVT = VA.getLocVT(); 2994 break; 2995 } 2996 case CCValAssign::BCvt: { 2997 ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg, 2998 /*TODO: Kill=*/false); 2999 assert(ArgReg && "Failed to emit a bitcast!"); 3000 ArgVT = VA.getLocVT(); 3001 break; 3002 } 3003 case CCValAssign::VExt: 3004 // VExt has not been implemented, so this should be impossible to reach 3005 // for now. However, fallback to Selection DAG isel once implemented. 3006 return false; 3007 case CCValAssign::AExtUpper: 3008 case CCValAssign::SExtUpper: 3009 case CCValAssign::ZExtUpper: 3010 case CCValAssign::FPExt: 3011 llvm_unreachable("Unexpected loc info!"); 3012 case CCValAssign::Indirect: 3013 // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully 3014 // support this. 3015 return false; 3016 } 3017 3018 if (VA.isRegLoc()) { 3019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3020 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3021 OutRegs.push_back(VA.getLocReg()); 3022 } else { 3023 assert(VA.isMemLoc()); 3024 3025 // Don't emit stores for undef values. 3026 if (isa<UndefValue>(ArgVal)) 3027 continue; 3028 3029 unsigned LocMemOffset = VA.getLocMemOffset(); 3030 X86AddressMode AM; 3031 AM.Base.Reg = RegInfo->getStackRegister(); 3032 AM.Disp = LocMemOffset; 3033 ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()]; 3034 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 3035 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3036 MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset), 3037 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3038 if (Flags.isByVal()) { 3039 X86AddressMode SrcAM; 3040 SrcAM.Base.Reg = ArgReg; 3041 if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize())) 3042 return false; 3043 } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) { 3044 // If this is a really simple value, emit this with the Value* version 3045 // of X86FastEmitStore. If it isn't simple, we don't want to do this, 3046 // as it can cause us to reevaluate the argument. 3047 if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO)) 3048 return false; 3049 } else { 3050 bool ValIsKill = hasTrivialKill(ArgVal); 3051 if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO)) 3052 return false; 3053 } 3054 } 3055 } 3056 3057 // ELF / PIC requires GOT in the EBX register before function calls via PLT 3058 // GOT pointer. 3059 if (Subtarget->isPICStyleGOT()) { 3060 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 3061 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3062 TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base); 3063 } 3064 3065 if (Is64Bit && IsVarArg && !IsWin64) { 3066 // From AMD64 ABI document: 3067 // For calls that may call functions that use varargs or stdargs 3068 // (prototype-less calls or calls to functions containing ellipsis (...) in 3069 // the declaration) %al is used as hidden argument to specify the number 3070 // of SSE registers used. The contents of %al do not need to match exactly 3071 // the number of registers, but must be an ubound on the number of SSE 3072 // registers used and is in the range 0 - 8 inclusive. 3073 3074 // Count the number of XMM registers allocated. 3075 static const MCPhysReg XMMArgRegs[] = { 3076 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 3077 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 3078 }; 3079 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); 3080 assert((Subtarget->hasSSE1() || !NumXMMRegs) 3081 && "SSE registers cannot be used when SSE is disabled"); 3082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), 3083 X86::AL).addImm(NumXMMRegs); 3084 } 3085 3086 // Materialize callee address in a register. FIXME: GV address can be 3087 // handled with a CALLpcrel32 instead. 3088 X86AddressMode CalleeAM; 3089 if (!X86SelectCallAddress(Callee, CalleeAM)) 3090 return false; 3091 3092 unsigned CalleeOp = 0; 3093 const GlobalValue *GV = nullptr; 3094 if (CalleeAM.GV != nullptr) { 3095 GV = CalleeAM.GV; 3096 } else if (CalleeAM.Base.Reg != 0) { 3097 CalleeOp = CalleeAM.Base.Reg; 3098 } else 3099 return false; 3100 3101 // Issue the call. 3102 MachineInstrBuilder MIB; 3103 if (CalleeOp) { 3104 // Register-indirect call. 3105 unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r; 3106 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)) 3107 .addReg(CalleeOp); 3108 } else { 3109 // Direct call. 3110 assert(GV && "Not a direct call"); 3111 unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; 3112 3113 // See if we need any target-specific flags on the GV operand. 3114 unsigned char OpFlags = 0; 3115 3116 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 3117 // external symbols most go through the PLT in PIC mode. If the symbol 3118 // has hidden or protected visibility, or if it is static or local, then 3119 // we don't need to use the PLT - we can directly call it. 3120 if (Subtarget->isTargetELF() && 3121 TM.getRelocationModel() == Reloc::PIC_ && 3122 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 3123 OpFlags = X86II::MO_PLT; 3124 } else if (Subtarget->isPICStyleStubAny() && 3125 !GV->isStrongDefinitionForLinker() && 3126 (!Subtarget->getTargetTriple().isMacOSX() || 3127 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { 3128 // PC-relative references to external symbols should go through $stub, 3129 // unless we're building with the leopard linker or later, which 3130 // automatically synthesizes these stubs. 3131 OpFlags = X86II::MO_DARWIN_STUB; 3132 } 3133 3134 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc)); 3135 if (Symbol) 3136 MIB.addSym(Symbol, OpFlags); 3137 else 3138 MIB.addGlobalAddress(GV, 0, OpFlags); 3139 } 3140 3141 // Add a register mask operand representing the call-preserved registers. 3142 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3143 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3144 3145 // Add an implicit use GOT pointer in EBX. 3146 if (Subtarget->isPICStyleGOT()) 3147 MIB.addReg(X86::EBX, RegState::Implicit); 3148 3149 if (Is64Bit && IsVarArg && !IsWin64) 3150 MIB.addReg(X86::AL, RegState::Implicit); 3151 3152 // Add implicit physical register uses to the call. 3153 for (auto Reg : OutRegs) 3154 MIB.addReg(Reg, RegState::Implicit); 3155 3156 // Issue CALLSEQ_END 3157 unsigned NumBytesForCalleeToPop = 3158 computeBytesPoppedByCallee(Subtarget, CC, CLI.CS); 3159 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3160 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3161 .addImm(NumBytes).addImm(NumBytesForCalleeToPop); 3162 3163 // Now handle call return values. 3164 SmallVector<CCValAssign, 16> RVLocs; 3165 CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, 3166 CLI.RetTy->getContext()); 3167 CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86); 3168 3169 // Copy all of the result registers out of their specified physreg. 3170 unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3171 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3172 CCValAssign &VA = RVLocs[i]; 3173 EVT CopyVT = VA.getValVT(); 3174 unsigned CopyReg = ResultReg + i; 3175 3176 // If this is x86-64, and we disabled SSE, we can't return FP values 3177 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && 3178 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { 3179 report_fatal_error("SSE register return with SSE disabled"); 3180 } 3181 3182 // If we prefer to use the value in xmm registers, copy it out as f80 and 3183 // use a truncate to move it from fp stack reg to xmm reg. 3184 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && 3185 isScalarFPTypeInSSEReg(VA.getValVT())) { 3186 CopyVT = MVT::f80; 3187 CopyReg = createResultReg(&X86::RFP80RegClass); 3188 } 3189 3190 // Copy out the result. 3191 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3192 TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg()); 3193 InRegs.push_back(VA.getLocReg()); 3194 3195 // Round the f80 to the right size, which also moves it to the appropriate 3196 // xmm register. This is accomplished by storing the f80 value in memory 3197 // and then loading it back. 3198 if (CopyVT != VA.getValVT()) { 3199 EVT ResVT = VA.getValVT(); 3200 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 3201 unsigned MemSize = ResVT.getSizeInBits()/8; 3202 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 3203 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3204 TII.get(Opc)), FI) 3205 .addReg(CopyReg); 3206 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 3207 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3208 TII.get(Opc), ResultReg + i), FI); 3209 } 3210 } 3211 3212 CLI.ResultReg = ResultReg; 3213 CLI.NumResultRegs = RVLocs.size(); 3214 CLI.Call = MIB; 3215 3216 return true; 3217} 3218 3219bool 3220X86FastISel::fastSelectInstruction(const Instruction *I) { 3221 switch (I->getOpcode()) { 3222 default: break; 3223 case Instruction::Load: 3224 return X86SelectLoad(I); 3225 case Instruction::Store: 3226 return X86SelectStore(I); 3227 case Instruction::Ret: 3228 return X86SelectRet(I); 3229 case Instruction::ICmp: 3230 case Instruction::FCmp: 3231 return X86SelectCmp(I); 3232 case Instruction::ZExt: 3233 return X86SelectZExt(I); 3234 case Instruction::Br: 3235 return X86SelectBranch(I); 3236 case Instruction::LShr: 3237 case Instruction::AShr: 3238 case Instruction::Shl: 3239 return X86SelectShift(I); 3240 case Instruction::SDiv: 3241 case Instruction::UDiv: 3242 case Instruction::SRem: 3243 case Instruction::URem: 3244 return X86SelectDivRem(I); 3245 case Instruction::Select: 3246 return X86SelectSelect(I); 3247 case Instruction::Trunc: 3248 return X86SelectTrunc(I); 3249 case Instruction::FPExt: 3250 return X86SelectFPExt(I); 3251 case Instruction::FPTrunc: 3252 return X86SelectFPTrunc(I); 3253 case Instruction::SIToFP: 3254 return X86SelectSIToFP(I); 3255 case Instruction::IntToPtr: // Deliberate fall-through. 3256 case Instruction::PtrToInt: { 3257 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 3258 EVT DstVT = TLI.getValueType(DL, I->getType()); 3259 if (DstVT.bitsGT(SrcVT)) 3260 return X86SelectZExt(I); 3261 if (DstVT.bitsLT(SrcVT)) 3262 return X86SelectTrunc(I); 3263 unsigned Reg = getRegForValue(I->getOperand(0)); 3264 if (Reg == 0) return false; 3265 updateValueMap(I, Reg); 3266 return true; 3267 } 3268 case Instruction::BitCast: { 3269 // Select SSE2/AVX bitcasts between 128/256 bit vector types. 3270 if (!Subtarget->hasSSE2()) 3271 return false; 3272 3273 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); 3274 EVT DstVT = TLI.getValueType(DL, I->getType()); 3275 3276 if (!SrcVT.isSimple() || !DstVT.isSimple()) 3277 return false; 3278 3279 if (!SrcVT.is128BitVector() && 3280 !(Subtarget->hasAVX() && SrcVT.is256BitVector())) 3281 return false; 3282 3283 unsigned Reg = getRegForValue(I->getOperand(0)); 3284 if (Reg == 0) 3285 return false; 3286 3287 // No instruction is needed for conversion. Reuse the register used by 3288 // the fist operand. 3289 updateValueMap(I, Reg); 3290 return true; 3291 } 3292 } 3293 3294 return false; 3295} 3296 3297unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { 3298 if (VT > MVT::i64) 3299 return 0; 3300 3301 uint64_t Imm = CI->getZExtValue(); 3302 if (Imm == 0) { 3303 unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass); 3304 switch (VT.SimpleTy) { 3305 default: llvm_unreachable("Unexpected value type"); 3306 case MVT::i1: 3307 case MVT::i8: 3308 return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true, 3309 X86::sub_8bit); 3310 case MVT::i16: 3311 return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true, 3312 X86::sub_16bit); 3313 case MVT::i32: 3314 return SrcReg; 3315 case MVT::i64: { 3316 unsigned ResultReg = createResultReg(&X86::GR64RegClass); 3317 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3318 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) 3319 .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); 3320 return ResultReg; 3321 } 3322 } 3323 } 3324 3325 unsigned Opc = 0; 3326 switch (VT.SimpleTy) { 3327 default: llvm_unreachable("Unexpected value type"); 3328 case MVT::i1: VT = MVT::i8; // fall-through 3329 case MVT::i8: Opc = X86::MOV8ri; break; 3330 case MVT::i16: Opc = X86::MOV16ri; break; 3331 case MVT::i32: Opc = X86::MOV32ri; break; 3332 case MVT::i64: { 3333 if (isUInt<32>(Imm)) 3334 Opc = X86::MOV32ri; 3335 else if (isInt<32>(Imm)) 3336 Opc = X86::MOV64ri32; 3337 else 3338 Opc = X86::MOV64ri; 3339 break; 3340 } 3341 } 3342 if (VT == MVT::i64 && Opc == X86::MOV32ri) { 3343 unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm); 3344 unsigned ResultReg = createResultReg(&X86::GR64RegClass); 3345 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3346 TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg) 3347 .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit); 3348 return ResultReg; 3349 } 3350 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 3351} 3352 3353unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) { 3354 if (CFP->isNullValue()) 3355 return fastMaterializeFloatZero(CFP); 3356 3357 // Can't handle alternate code models yet. 3358 CodeModel::Model CM = TM.getCodeModel(); 3359 if (CM != CodeModel::Small && CM != CodeModel::Large) 3360 return 0; 3361 3362 // Get opcode and regclass of the output for the given load instruction. 3363 unsigned Opc = 0; 3364 const TargetRegisterClass *RC = nullptr; 3365 switch (VT.SimpleTy) { 3366 default: return 0; 3367 case MVT::f32: 3368 if (X86ScalarSSEf32) { 3369 Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm; 3370 RC = &X86::FR32RegClass; 3371 } else { 3372 Opc = X86::LD_Fp32m; 3373 RC = &X86::RFP32RegClass; 3374 } 3375 break; 3376 case MVT::f64: 3377 if (X86ScalarSSEf64) { 3378 Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm; 3379 RC = &X86::FR64RegClass; 3380 } else { 3381 Opc = X86::LD_Fp64m; 3382 RC = &X86::RFP64RegClass; 3383 } 3384 break; 3385 case MVT::f80: 3386 // No f80 support yet. 3387 return 0; 3388 } 3389 3390 // MachineConstantPool wants an explicit alignment. 3391 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 3392 if (Align == 0) { 3393 // Alignment of vector types. FIXME! 3394 Align = DL.getTypeAllocSize(CFP->getType()); 3395 } 3396 3397 // x86-32 PIC requires a PIC base register for constant pools. 3398 unsigned PICBase = 0; 3399 unsigned char OpFlag = 0; 3400 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 3401 OpFlag = X86II::MO_PIC_BASE_OFFSET; 3402 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 3403 } else if (Subtarget->isPICStyleGOT()) { 3404 OpFlag = X86II::MO_GOTOFF; 3405 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 3406 } else if (Subtarget->isPICStyleRIPRel() && 3407 TM.getCodeModel() == CodeModel::Small) { 3408 PICBase = X86::RIP; 3409 } 3410 3411 // Create the load from the constant pool. 3412 unsigned CPI = MCP.getConstantPoolIndex(CFP, Align); 3413 unsigned ResultReg = createResultReg(RC); 3414 3415 if (CM == CodeModel::Large) { 3416 unsigned AddrReg = createResultReg(&X86::GR64RegClass); 3417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), 3418 AddrReg) 3419 .addConstantPoolIndex(CPI, 0, OpFlag); 3420 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3421 TII.get(Opc), ResultReg); 3422 addDirectMem(MIB, AddrReg); 3423 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3424 MachinePointerInfo::getConstantPool(*FuncInfo.MF), 3425 MachineMemOperand::MOLoad, DL.getPointerSize(), Align); 3426 MIB->addMemOperand(*FuncInfo.MF, MMO); 3427 return ResultReg; 3428 } 3429 3430 addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3431 TII.get(Opc), ResultReg), 3432 CPI, PICBase, OpFlag); 3433 return ResultReg; 3434} 3435 3436unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) { 3437 // Can't handle alternate code models yet. 3438 if (TM.getCodeModel() != CodeModel::Small) 3439 return 0; 3440 3441 // Materialize addresses with LEA/MOV instructions. 3442 X86AddressMode AM; 3443 if (X86SelectAddress(GV, AM)) { 3444 // If the expression is just a basereg, then we're done, otherwise we need 3445 // to emit an LEA. 3446 if (AM.BaseType == X86AddressMode::RegBase && 3447 AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) 3448 return AM.Base.Reg; 3449 3450 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3451 if (TM.getRelocationModel() == Reloc::Static && 3452 TLI.getPointerTy(DL) == MVT::i64) { 3453 // The displacement code could be more than 32 bits away so we need to use 3454 // an instruction with a 64 bit immediate 3455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri), 3456 ResultReg) 3457 .addGlobalAddress(GV); 3458 } else { 3459 unsigned Opc = 3460 TLI.getPointerTy(DL) == MVT::i32 3461 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) 3462 : X86::LEA64r; 3463 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3464 TII.get(Opc), ResultReg), AM); 3465 } 3466 return ResultReg; 3467 } 3468 return 0; 3469} 3470 3471unsigned X86FastISel::fastMaterializeConstant(const Constant *C) { 3472 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 3473 3474 // Only handle simple types. 3475 if (!CEVT.isSimple()) 3476 return 0; 3477 MVT VT = CEVT.getSimpleVT(); 3478 3479 if (const auto *CI = dyn_cast<ConstantInt>(C)) 3480 return X86MaterializeInt(CI, VT); 3481 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 3482 return X86MaterializeFP(CFP, VT); 3483 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 3484 return X86MaterializeGV(GV, VT); 3485 3486 return 0; 3487} 3488 3489unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) { 3490 // Fail on dynamic allocas. At this point, getRegForValue has already 3491 // checked its CSE maps, so if we're here trying to handle a dynamic 3492 // alloca, we're not going to succeed. X86SelectAddress has a 3493 // check for dynamic allocas, because it's called directly from 3494 // various places, but targetMaterializeAlloca also needs a check 3495 // in order to avoid recursion between getRegForValue, 3496 // X86SelectAddrss, and targetMaterializeAlloca. 3497 if (!FuncInfo.StaticAllocaMap.count(C)) 3498 return 0; 3499 assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?"); 3500 3501 X86AddressMode AM; 3502 if (!X86SelectAddress(C, AM)) 3503 return 0; 3504 unsigned Opc = 3505 TLI.getPointerTy(DL) == MVT::i32 3506 ? (Subtarget->isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r) 3507 : X86::LEA64r; 3508 const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); 3509 unsigned ResultReg = createResultReg(RC); 3510 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3511 TII.get(Opc), ResultReg), AM); 3512 return ResultReg; 3513} 3514 3515unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) { 3516 MVT VT; 3517 if (!isTypeLegal(CF->getType(), VT)) 3518 return 0; 3519 3520 // Get opcode and regclass for the given zero. 3521 unsigned Opc = 0; 3522 const TargetRegisterClass *RC = nullptr; 3523 switch (VT.SimpleTy) { 3524 default: return 0; 3525 case MVT::f32: 3526 if (X86ScalarSSEf32) { 3527 Opc = X86::FsFLD0SS; 3528 RC = &X86::FR32RegClass; 3529 } else { 3530 Opc = X86::LD_Fp032; 3531 RC = &X86::RFP32RegClass; 3532 } 3533 break; 3534 case MVT::f64: 3535 if (X86ScalarSSEf64) { 3536 Opc = X86::FsFLD0SD; 3537 RC = &X86::FR64RegClass; 3538 } else { 3539 Opc = X86::LD_Fp064; 3540 RC = &X86::RFP64RegClass; 3541 } 3542 break; 3543 case MVT::f80: 3544 // No f80 support yet. 3545 return 0; 3546 } 3547 3548 unsigned ResultReg = createResultReg(RC); 3549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); 3550 return ResultReg; 3551} 3552 3553 3554bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, 3555 const LoadInst *LI) { 3556 const Value *Ptr = LI->getPointerOperand(); 3557 X86AddressMode AM; 3558 if (!X86SelectAddress(Ptr, AM)) 3559 return false; 3560 3561 const X86InstrInfo &XII = (const X86InstrInfo &)TII; 3562 3563 unsigned Size = DL.getTypeAllocSize(LI->getType()); 3564 unsigned Alignment = LI->getAlignment(); 3565 3566 if (Alignment == 0) // Ensure that codegen never sees alignment 0 3567 Alignment = DL.getABITypeAlignment(LI->getType()); 3568 3569 SmallVector<MachineOperand, 8> AddrOps; 3570 AM.getFullAddress(AddrOps); 3571 3572 MachineInstr *Result = XII.foldMemoryOperandImpl( 3573 *FuncInfo.MF, MI, OpNo, AddrOps, FuncInfo.InsertPt, Size, Alignment, 3574 /*AllowCommute=*/true); 3575 if (!Result) 3576 return false; 3577 3578 // The index register could be in the wrong register class. Unfortunately, 3579 // foldMemoryOperandImpl could have commuted the instruction so its not enough 3580 // to just look at OpNo + the offset to the index reg. We actually need to 3581 // scan the instruction to find the index reg and see if its the correct reg 3582 // class. 3583 unsigned OperandNo = 0; 3584 for (MachineInstr::mop_iterator I = Result->operands_begin(), 3585 E = Result->operands_end(); I != E; ++I, ++OperandNo) { 3586 MachineOperand &MO = *I; 3587 if (!MO.isReg() || MO.isDef() || MO.getReg() != AM.IndexReg) 3588 continue; 3589 // Found the index reg, now try to rewrite it. 3590 unsigned IndexReg = constrainOperandRegClass(Result->getDesc(), 3591 MO.getReg(), OperandNo); 3592 if (IndexReg == MO.getReg()) 3593 continue; 3594 MO.setReg(IndexReg); 3595 } 3596 3597 Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); 3598 MI->eraseFromParent(); 3599 return true; 3600} 3601 3602 3603namespace llvm { 3604 FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo, 3605 const TargetLibraryInfo *libInfo) { 3606 return new X86FastISel(funcInfo, libInfo); 3607 } 3608} 3609