SelectionDAGISel.cpp revision 2bbd81064a6998496a71ff7ae8160b3caada64fa
1//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the SelectionDAGISel class. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "isel" 15#include "llvm/CodeGen/SelectionDAGISel.h" 16#include "llvm/CodeGen/ScheduleDAG.h" 17#include "llvm/CallingConv.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/GlobalVariable.h" 22#include "llvm/InlineAsm.h" 23#include "llvm/Instructions.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/CodeGen/IntrinsicLowering.h" 27#include "llvm/CodeGen/MachineDebugInfo.h" 28#include "llvm/CodeGen/MachineFunction.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineInstrBuilder.h" 31#include "llvm/CodeGen/SelectionDAG.h" 32#include "llvm/CodeGen/SSARegMap.h" 33#include "llvm/Target/MRegisterInfo.h" 34#include "llvm/Target/TargetData.h" 35#include "llvm/Target/TargetFrameInfo.h" 36#include "llvm/Target/TargetInstrInfo.h" 37#include "llvm/Target/TargetLowering.h" 38#include "llvm/Target/TargetMachine.h" 39#include "llvm/Transforms/Utils/BasicBlockUtils.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/MathExtras.h" 42#include "llvm/Support/Debug.h" 43#include <map> 44#include <set> 45#include <iostream> 46#include <algorithm> 47using namespace llvm; 48 49#ifndef NDEBUG 50static cl::opt<bool> 51ViewISelDAGs("view-isel-dags", cl::Hidden, 52 cl::desc("Pop up a window to show isel dags as they are selected")); 53static cl::opt<bool> 54ViewSchedDAGs("view-sched-dags", cl::Hidden, 55 cl::desc("Pop up a window to show sched dags as they are processed")); 56#else 57static const bool ViewISelDAGs = 0; 58static const bool ViewSchedDAGs = 0; 59#endif 60 61// Scheduling heuristics 62enum SchedHeuristics { 63 defaultScheduling, // Let the target specify its preference. 64 noScheduling, // No scheduling, emit breadth first sequence. 65 simpleScheduling, // Two pass, min. critical path, max. utilization. 66 simpleNoItinScheduling, // Same as above exact using generic latency. 67 listSchedulingBURR, // Bottom up reg reduction list scheduling. 68 listSchedulingTD // Top-down list scheduler. 69}; 70 71namespace { 72 cl::opt<SchedHeuristics> 73 ISHeuristic( 74 "sched", 75 cl::desc("Choose scheduling style"), 76 cl::init(defaultScheduling), 77 cl::values( 78 clEnumValN(defaultScheduling, "default", 79 "Target preferred scheduling style"), 80 clEnumValN(noScheduling, "none", 81 "No scheduling: breadth first sequencing"), 82 clEnumValN(simpleScheduling, "simple", 83 "Simple two pass scheduling: minimize critical path " 84 "and maximize processor utilization"), 85 clEnumValN(simpleNoItinScheduling, "simple-noitin", 86 "Simple two pass scheduling: Same as simple " 87 "except using generic latency"), 88 clEnumValN(listSchedulingBURR, "list-burr", 89 "Bottom up register reduction list scheduling"), 90 clEnumValN(listSchedulingTD, "list-td", 91 "Top-down list scheduler"), 92 clEnumValEnd)); 93} // namespace 94 95namespace { 96 /// RegsForValue - This struct represents the physical registers that a 97 /// particular value is assigned and the type information about the value. 98 /// This is needed because values can be promoted into larger registers and 99 /// expanded into multiple smaller registers than the value. 100 struct RegsForValue { 101 /// Regs - This list hold the register (for legal and promoted values) 102 /// or register set (for expanded values) that the value should be assigned 103 /// to. 104 std::vector<unsigned> Regs; 105 106 /// RegVT - The value type of each register. 107 /// 108 MVT::ValueType RegVT; 109 110 /// ValueVT - The value type of the LLVM value, which may be promoted from 111 /// RegVT or made from merging the two expanded parts. 112 MVT::ValueType ValueVT; 113 114 RegsForValue() : RegVT(MVT::Other), ValueVT(MVT::Other) {} 115 116 RegsForValue(unsigned Reg, MVT::ValueType regvt, MVT::ValueType valuevt) 117 : RegVT(regvt), ValueVT(valuevt) { 118 Regs.push_back(Reg); 119 } 120 RegsForValue(const std::vector<unsigned> ®s, 121 MVT::ValueType regvt, MVT::ValueType valuevt) 122 : Regs(regs), RegVT(regvt), ValueVT(valuevt) { 123 } 124 125 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 126 /// this value and returns the result as a ValueVT value. This uses 127 /// Chain/Flag as the input and updates them for the output Chain/Flag. 128 SDOperand getCopyFromRegs(SelectionDAG &DAG, 129 SDOperand &Chain, SDOperand &Flag) const; 130 131 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 132 /// specified value into the registers specified by this object. This uses 133 /// Chain/Flag as the input and updates them for the output Chain/Flag. 134 void getCopyToRegs(SDOperand Val, SelectionDAG &DAG, 135 SDOperand &Chain, SDOperand &Flag) const; 136 137 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 138 /// operand list. This adds the code marker and includes the number of 139 /// values added into it. 140 void AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG, 141 std::vector<SDOperand> &Ops) const; 142 }; 143} 144 145namespace llvm { 146 //===--------------------------------------------------------------------===// 147 /// FunctionLoweringInfo - This contains information that is global to a 148 /// function that is used when lowering a region of the function. 149 class FunctionLoweringInfo { 150 public: 151 TargetLowering &TLI; 152 Function &Fn; 153 MachineFunction &MF; 154 SSARegMap *RegMap; 155 156 FunctionLoweringInfo(TargetLowering &TLI, Function &Fn,MachineFunction &MF); 157 158 /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. 159 std::map<const BasicBlock*, MachineBasicBlock *> MBBMap; 160 161 /// ValueMap - Since we emit code for the function a basic block at a time, 162 /// we must remember which virtual registers hold the values for 163 /// cross-basic-block values. 164 std::map<const Value*, unsigned> ValueMap; 165 166 /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in 167 /// the entry block. This allows the allocas to be efficiently referenced 168 /// anywhere in the function. 169 std::map<const AllocaInst*, int> StaticAllocaMap; 170 171 unsigned MakeReg(MVT::ValueType VT) { 172 return RegMap->createVirtualRegister(TLI.getRegClassFor(VT)); 173 } 174 175 unsigned CreateRegForValue(const Value *V); 176 177 unsigned InitializeRegForValue(const Value *V) { 178 unsigned &R = ValueMap[V]; 179 assert(R == 0 && "Already initialized this value register!"); 180 return R = CreateRegForValue(V); 181 } 182 }; 183} 184 185/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by 186/// PHI nodes or outside of the basic block that defines it, or used by a 187/// switch instruction, which may expand to multiple basic blocks. 188static bool isUsedOutsideOfDefiningBlock(Instruction *I) { 189 if (isa<PHINode>(I)) return true; 190 BasicBlock *BB = I->getParent(); 191 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) 192 if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI) || 193 isa<SwitchInst>(*UI)) 194 return true; 195 return false; 196} 197 198/// isOnlyUsedInEntryBlock - If the specified argument is only used in the 199/// entry block, return true. This includes arguments used by switches, since 200/// the switch may expand into multiple basic blocks. 201static bool isOnlyUsedInEntryBlock(Argument *A) { 202 BasicBlock *Entry = A->getParent()->begin(); 203 for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) 204 if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI)) 205 return false; // Use not in entry block. 206 return true; 207} 208 209FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli, 210 Function &fn, MachineFunction &mf) 211 : TLI(tli), Fn(fn), MF(mf), RegMap(MF.getSSARegMap()) { 212 213 // Create a vreg for each argument register that is not dead and is used 214 // outside of the entry block for the function. 215 for (Function::arg_iterator AI = Fn.arg_begin(), E = Fn.arg_end(); 216 AI != E; ++AI) 217 if (!isOnlyUsedInEntryBlock(AI)) 218 InitializeRegForValue(AI); 219 220 // Initialize the mapping of values to registers. This is only set up for 221 // instruction values that are used outside of the block that defines 222 // them. 223 Function::iterator BB = Fn.begin(), EB = Fn.end(); 224 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 225 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 226 if (ConstantUInt *CUI = dyn_cast<ConstantUInt>(AI->getArraySize())) { 227 const Type *Ty = AI->getAllocatedType(); 228 uint64_t TySize = TLI.getTargetData().getTypeSize(Ty); 229 unsigned Align = 230 std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty), 231 AI->getAlignment()); 232 233 // If the alignment of the value is smaller than the size of the value, 234 // and if the size of the value is particularly small (<= 8 bytes), 235 // round up to the size of the value for potentially better performance. 236 // 237 // FIXME: This could be made better with a preferred alignment hook in 238 // TargetData. It serves primarily to 8-byte align doubles for X86. 239 if (Align < TySize && TySize <= 8) Align = TySize; 240 TySize *= CUI->getValue(); // Get total allocated size. 241 if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. 242 StaticAllocaMap[AI] = 243 MF.getFrameInfo()->CreateStackObject((unsigned)TySize, Align); 244 } 245 246 for (; BB != EB; ++BB) 247 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) 248 if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) 249 if (!isa<AllocaInst>(I) || 250 !StaticAllocaMap.count(cast<AllocaInst>(I))) 251 InitializeRegForValue(I); 252 253 // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This 254 // also creates the initial PHI MachineInstrs, though none of the input 255 // operands are populated. 256 for (BB = Fn.begin(), EB = Fn.end(); BB != EB; ++BB) { 257 MachineBasicBlock *MBB = new MachineBasicBlock(BB); 258 MBBMap[BB] = MBB; 259 MF.getBasicBlockList().push_back(MBB); 260 261 // Create Machine PHI nodes for LLVM PHI nodes, lowering them as 262 // appropriate. 263 PHINode *PN; 264 for (BasicBlock::iterator I = BB->begin(); 265 (PN = dyn_cast<PHINode>(I)); ++I) 266 if (!PN->use_empty()) { 267 unsigned NumElements = 268 TLI.getNumElements(TLI.getValueType(PN->getType())); 269 unsigned PHIReg = ValueMap[PN]; 270 assert(PHIReg &&"PHI node does not have an assigned virtual register!"); 271 for (unsigned i = 0; i != NumElements; ++i) 272 BuildMI(MBB, TargetInstrInfo::PHI, PN->getNumOperands(), PHIReg+i); 273 } 274 } 275} 276 277/// CreateRegForValue - Allocate the appropriate number of virtual registers of 278/// the correctly promoted or expanded types. Assign these registers 279/// consecutive vreg numbers and return the first assigned number. 280unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { 281 MVT::ValueType VT = TLI.getValueType(V->getType()); 282 283 // The number of multiples of registers that we need, to, e.g., split up 284 // a <2 x int64> -> 4 x i32 registers. 285 unsigned NumVectorRegs = 1; 286 287 // If this is a packed type, figure out what type it will decompose into 288 // and how many of the elements it will use. 289 if (VT == MVT::Vector) { 290 const PackedType *PTy = cast<PackedType>(V->getType()); 291 unsigned NumElts = PTy->getNumElements(); 292 MVT::ValueType EltTy = TLI.getValueType(PTy->getElementType()); 293 294 // Divide the input until we get to a supported size. This will always 295 // end with a scalar if the target doesn't support vectors. 296 while (NumElts > 1 && !TLI.isTypeLegal(getVectorType(EltTy, NumElts))) { 297 NumElts >>= 1; 298 NumVectorRegs <<= 1; 299 } 300 if (NumElts == 1) 301 VT = EltTy; 302 else 303 VT = getVectorType(EltTy, NumElts); 304 } 305 306 // The common case is that we will only create one register for this 307 // value. If we have that case, create and return the virtual register. 308 unsigned NV = TLI.getNumElements(VT); 309 if (NV == 1) { 310 // If we are promoting this value, pick the next largest supported type. 311 MVT::ValueType PromotedType = TLI.getTypeToTransformTo(VT); 312 unsigned Reg = MakeReg(PromotedType); 313 // If this is a vector of supported or promoted types (e.g. 4 x i16), 314 // create all of the registers. 315 for (unsigned i = 1; i != NumVectorRegs; ++i) 316 MakeReg(PromotedType); 317 return Reg; 318 } 319 320 // If this value is represented with multiple target registers, make sure 321 // to create enough consecutive registers of the right (smaller) type. 322 unsigned NT = VT-1; // Find the type to use. 323 while (TLI.getNumElements((MVT::ValueType)NT) != 1) 324 --NT; 325 326 unsigned R = MakeReg((MVT::ValueType)NT); 327 for (unsigned i = 1; i != NV*NumVectorRegs; ++i) 328 MakeReg((MVT::ValueType)NT); 329 return R; 330} 331 332//===----------------------------------------------------------------------===// 333/// SelectionDAGLowering - This is the common target-independent lowering 334/// implementation that is parameterized by a TargetLowering object. 335/// Also, targets can overload any lowering method. 336/// 337namespace llvm { 338class SelectionDAGLowering { 339 MachineBasicBlock *CurMBB; 340 341 std::map<const Value*, SDOperand> NodeMap; 342 343 /// PendingLoads - Loads are not emitted to the program immediately. We bunch 344 /// them up and then emit token factor nodes when possible. This allows us to 345 /// get simple disambiguation between loads without worrying about alias 346 /// analysis. 347 std::vector<SDOperand> PendingLoads; 348 349 /// Case - A pair of values to record the Value for a switch case, and the 350 /// case's target basic block. 351 typedef std::pair<Constant*, MachineBasicBlock*> Case; 352 typedef std::vector<Case>::iterator CaseItr; 353 typedef std::pair<CaseItr, CaseItr> CaseRange; 354 355 /// CaseRec - A struct with ctor used in lowering switches to a binary tree 356 /// of conditional branches. 357 struct CaseRec { 358 CaseRec(MachineBasicBlock *bb, Constant *lt, Constant *ge, CaseRange r) : 359 CaseBB(bb), LT(lt), GE(ge), Range(r) {} 360 361 /// CaseBB - The MBB in which to emit the compare and branch 362 MachineBasicBlock *CaseBB; 363 /// LT, GE - If nonzero, we know the current case value must be less-than or 364 /// greater-than-or-equal-to these Constants. 365 Constant *LT; 366 Constant *GE; 367 /// Range - A pair of iterators representing the range of case values to be 368 /// processed at this point in the binary search tree. 369 CaseRange Range; 370 }; 371 372 /// The comparison function for sorting Case values. 373 struct CaseCmp { 374 bool operator () (const Case& C1, const Case& C2) { 375 if (const ConstantUInt* U1 = dyn_cast<const ConstantUInt>(C1.first)) 376 return U1->getValue() < cast<const ConstantUInt>(C2.first)->getValue(); 377 378 const ConstantSInt* S1 = dyn_cast<const ConstantSInt>(C1.first); 379 return S1->getValue() < cast<const ConstantSInt>(C2.first)->getValue(); 380 } 381 }; 382 383public: 384 // TLI - This is information that describes the available target features we 385 // need for lowering. This indicates when operations are unavailable, 386 // implemented with a libcall, etc. 387 TargetLowering &TLI; 388 SelectionDAG &DAG; 389 const TargetData &TD; 390 391 /// SwitchCases - Vector of CaseBlock structures used to communicate 392 /// SwitchInst code generation information. 393 std::vector<SelectionDAGISel::CaseBlock> SwitchCases; 394 395 /// FuncInfo - Information about the function as a whole. 396 /// 397 FunctionLoweringInfo &FuncInfo; 398 399 SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, 400 FunctionLoweringInfo &funcinfo) 401 : TLI(tli), DAG(dag), TD(DAG.getTarget().getTargetData()), 402 FuncInfo(funcinfo) { 403 } 404 405 /// getRoot - Return the current virtual root of the Selection DAG. 406 /// 407 SDOperand getRoot() { 408 if (PendingLoads.empty()) 409 return DAG.getRoot(); 410 411 if (PendingLoads.size() == 1) { 412 SDOperand Root = PendingLoads[0]; 413 DAG.setRoot(Root); 414 PendingLoads.clear(); 415 return Root; 416 } 417 418 // Otherwise, we have to make a token factor node. 419 SDOperand Root = DAG.getNode(ISD::TokenFactor, MVT::Other, PendingLoads); 420 PendingLoads.clear(); 421 DAG.setRoot(Root); 422 return Root; 423 } 424 425 void visit(Instruction &I) { visit(I.getOpcode(), I); } 426 427 void visit(unsigned Opcode, User &I) { 428 switch (Opcode) { 429 default: assert(0 && "Unknown instruction type encountered!"); 430 abort(); 431 // Build the switch statement using the Instruction.def file. 432#define HANDLE_INST(NUM, OPCODE, CLASS) \ 433 case Instruction::OPCODE:return visit##OPCODE((CLASS&)I); 434#include "llvm/Instruction.def" 435 } 436 } 437 438 void setCurrentBasicBlock(MachineBasicBlock *MBB) { CurMBB = MBB; } 439 440 SDOperand getLoadFrom(const Type *Ty, SDOperand Ptr, 441 SDOperand SrcValue, SDOperand Root, 442 bool isVolatile); 443 444 SDOperand getIntPtrConstant(uint64_t Val) { 445 return DAG.getConstant(Val, TLI.getPointerTy()); 446 } 447 448 SDOperand getValue(const Value *V); 449 450 const SDOperand &setValue(const Value *V, SDOperand NewN) { 451 SDOperand &N = NodeMap[V]; 452 assert(N.Val == 0 && "Already set a value for this node!"); 453 return N = NewN; 454 } 455 456 RegsForValue GetRegistersForValue(const std::string &ConstrCode, 457 MVT::ValueType VT, 458 bool OutReg, bool InReg, 459 std::set<unsigned> &OutputRegs, 460 std::set<unsigned> &InputRegs); 461 462 // Terminator instructions. 463 void visitRet(ReturnInst &I); 464 void visitBr(BranchInst &I); 465 void visitSwitch(SwitchInst &I); 466 void visitUnreachable(UnreachableInst &I) { /* noop */ } 467 468 // Helper for visitSwitch 469 void visitSwitchCase(SelectionDAGISel::CaseBlock &CB); 470 471 // These all get lowered before this pass. 472 void visitInvoke(InvokeInst &I) { assert(0 && "TODO"); } 473 void visitUnwind(UnwindInst &I) { assert(0 && "TODO"); } 474 475 void visitBinary(User &I, unsigned IntOp, unsigned FPOp, unsigned VecOp); 476 void visitShift(User &I, unsigned Opcode); 477 void visitAdd(User &I) { 478 visitBinary(I, ISD::ADD, ISD::FADD, ISD::VADD); 479 } 480 void visitSub(User &I); 481 void visitMul(User &I) { 482 visitBinary(I, ISD::MUL, ISD::FMUL, ISD::VMUL); 483 } 484 void visitDiv(User &I) { 485 const Type *Ty = I.getType(); 486 visitBinary(I, 487 Ty->isSigned() ? ISD::SDIV : ISD::UDIV, ISD::FDIV, 488 Ty->isSigned() ? ISD::VSDIV : ISD::VUDIV); 489 } 490 void visitRem(User &I) { 491 const Type *Ty = I.getType(); 492 visitBinary(I, Ty->isSigned() ? ISD::SREM : ISD::UREM, ISD::FREM, 0); 493 } 494 void visitAnd(User &I) { visitBinary(I, ISD::AND, 0, ISD::VAND); } 495 void visitOr (User &I) { visitBinary(I, ISD::OR, 0, ISD::VOR); } 496 void visitXor(User &I) { visitBinary(I, ISD::XOR, 0, ISD::VXOR); } 497 void visitShl(User &I) { visitShift(I, ISD::SHL); } 498 void visitShr(User &I) { 499 visitShift(I, I.getType()->isUnsigned() ? ISD::SRL : ISD::SRA); 500 } 501 502 void visitSetCC(User &I, ISD::CondCode SignedOpc, ISD::CondCode UnsignedOpc); 503 void visitSetEQ(User &I) { visitSetCC(I, ISD::SETEQ, ISD::SETEQ); } 504 void visitSetNE(User &I) { visitSetCC(I, ISD::SETNE, ISD::SETNE); } 505 void visitSetLE(User &I) { visitSetCC(I, ISD::SETLE, ISD::SETULE); } 506 void visitSetGE(User &I) { visitSetCC(I, ISD::SETGE, ISD::SETUGE); } 507 void visitSetLT(User &I) { visitSetCC(I, ISD::SETLT, ISD::SETULT); } 508 void visitSetGT(User &I) { visitSetCC(I, ISD::SETGT, ISD::SETUGT); } 509 510 void visitExtractElement(User &I); 511 void visitInsertElement(User &I); 512 513 void visitGetElementPtr(User &I); 514 void visitCast(User &I); 515 void visitSelect(User &I); 516 517 void visitMalloc(MallocInst &I); 518 void visitFree(FreeInst &I); 519 void visitAlloca(AllocaInst &I); 520 void visitLoad(LoadInst &I); 521 void visitStore(StoreInst &I); 522 void visitPHI(PHINode &I) { } // PHI nodes are handled specially. 523 void visitCall(CallInst &I); 524 void visitInlineAsm(CallInst &I); 525 const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); 526 void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); 527 528 void visitVAStart(CallInst &I); 529 void visitVAArg(VAArgInst &I); 530 void visitVAEnd(CallInst &I); 531 void visitVACopy(CallInst &I); 532 void visitFrameReturnAddress(CallInst &I, bool isFrameAddress); 533 534 void visitMemIntrinsic(CallInst &I, unsigned Op); 535 536 void visitUserOp1(Instruction &I) { 537 assert(0 && "UserOp1 should not exist at instruction selection time!"); 538 abort(); 539 } 540 void visitUserOp2(Instruction &I) { 541 assert(0 && "UserOp2 should not exist at instruction selection time!"); 542 abort(); 543 } 544}; 545} // end namespace llvm 546 547SDOperand SelectionDAGLowering::getValue(const Value *V) { 548 SDOperand &N = NodeMap[V]; 549 if (N.Val) return N; 550 551 const Type *VTy = V->getType(); 552 MVT::ValueType VT = TLI.getValueType(VTy); 553 if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) { 554 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 555 visit(CE->getOpcode(), *CE); 556 assert(N.Val && "visit didn't populate the ValueMap!"); 557 return N; 558 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) { 559 return N = DAG.getGlobalAddress(GV, VT); 560 } else if (isa<ConstantPointerNull>(C)) { 561 return N = DAG.getConstant(0, TLI.getPointerTy()); 562 } else if (isa<UndefValue>(C)) { 563 if (!isa<PackedType>(VTy)) 564 return N = DAG.getNode(ISD::UNDEF, VT); 565 566 // Create a VBUILD_VECTOR of undef nodes. 567 const PackedType *PTy = cast<PackedType>(VTy); 568 unsigned NumElements = PTy->getNumElements(); 569 MVT::ValueType PVT = TLI.getValueType(PTy->getElementType()); 570 571 std::vector<SDOperand> Ops; 572 Ops.assign(NumElements, DAG.getNode(ISD::UNDEF, PVT)); 573 574 // Create a VConstant node with generic Vector type. 575 Ops.push_back(DAG.getConstant(NumElements, MVT::i32)); 576 Ops.push_back(DAG.getValueType(PVT)); 577 return N = DAG.getNode(ISD::VBUILD_VECTOR, MVT::Vector, Ops); 578 } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) { 579 return N = DAG.getConstantFP(CFP->getValue(), VT); 580 } else if (const PackedType *PTy = dyn_cast<PackedType>(VTy)) { 581 unsigned NumElements = PTy->getNumElements(); 582 MVT::ValueType PVT = TLI.getValueType(PTy->getElementType()); 583 584 // Now that we know the number and type of the elements, push a 585 // Constant or ConstantFP node onto the ops list for each element of 586 // the packed constant. 587 std::vector<SDOperand> Ops; 588 if (ConstantPacked *CP = dyn_cast<ConstantPacked>(C)) { 589 for (unsigned i = 0; i != NumElements; ++i) 590 Ops.push_back(getValue(CP->getOperand(i))); 591 } else { 592 assert(isa<ConstantAggregateZero>(C) && "Unknown packed constant!"); 593 SDOperand Op; 594 if (MVT::isFloatingPoint(PVT)) 595 Op = DAG.getConstantFP(0, PVT); 596 else 597 Op = DAG.getConstant(0, PVT); 598 Ops.assign(NumElements, Op); 599 } 600 601 // Create a VBUILD_VECTOR node with generic Vector type. 602 Ops.push_back(DAG.getConstant(NumElements, MVT::i32)); 603 Ops.push_back(DAG.getValueType(PVT)); 604 return N = DAG.getNode(ISD::VBUILD_VECTOR, MVT::Vector, Ops); 605 } else { 606 // Canonicalize all constant ints to be unsigned. 607 return N = DAG.getConstant(cast<ConstantIntegral>(C)->getRawValue(),VT); 608 } 609 } 610 611 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 612 std::map<const AllocaInst*, int>::iterator SI = 613 FuncInfo.StaticAllocaMap.find(AI); 614 if (SI != FuncInfo.StaticAllocaMap.end()) 615 return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); 616 } 617 618 std::map<const Value*, unsigned>::const_iterator VMI = 619 FuncInfo.ValueMap.find(V); 620 assert(VMI != FuncInfo.ValueMap.end() && "Value not in map!"); 621 622 unsigned InReg = VMI->second; 623 624 // If this type is not legal, make it so now. 625 if (VT == MVT::Vector) { 626 // FIXME: We only handle legal vectors right now. We need a VBUILD_VECTOR 627 const PackedType *PTy = cast<PackedType>(VTy); 628 unsigned NumElements = PTy->getNumElements(); 629 MVT::ValueType PVT = TLI.getValueType(PTy->getElementType()); 630 MVT::ValueType TVT = MVT::getVectorType(PVT, NumElements); 631 assert(TLI.isTypeLegal(TVT) && 632 "FIXME: Cannot handle illegal vector types here yet!"); 633 VT = TVT; 634 } 635 636 MVT::ValueType DestVT = TLI.getTypeToTransformTo(VT); 637 638 N = DAG.getCopyFromReg(DAG.getEntryNode(), InReg, DestVT); 639 if (DestVT < VT) { 640 // Source must be expanded. This input value is actually coming from the 641 // register pair VMI->second and VMI->second+1. 642 N = DAG.getNode(ISD::BUILD_PAIR, VT, N, 643 DAG.getCopyFromReg(DAG.getEntryNode(), InReg+1, DestVT)); 644 } else { 645 if (DestVT > VT) { // Promotion case 646 if (MVT::isFloatingPoint(VT)) 647 N = DAG.getNode(ISD::FP_ROUND, VT, N); 648 else 649 N = DAG.getNode(ISD::TRUNCATE, VT, N); 650 } 651 } 652 653 return N; 654} 655 656 657void SelectionDAGLowering::visitRet(ReturnInst &I) { 658 if (I.getNumOperands() == 0) { 659 DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, getRoot())); 660 return; 661 } 662 std::vector<SDOperand> NewValues; 663 NewValues.push_back(getRoot()); 664 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 665 SDOperand RetOp = getValue(I.getOperand(i)); 666 667 // If this is an integer return value, we need to promote it ourselves to 668 // the full width of a register, since LegalizeOp will use ANY_EXTEND rather 669 // than sign/zero. 670 if (MVT::isInteger(RetOp.getValueType()) && 671 RetOp.getValueType() < MVT::i64) { 672 MVT::ValueType TmpVT; 673 if (TLI.getTypeAction(MVT::i32) == TargetLowering::Promote) 674 TmpVT = TLI.getTypeToTransformTo(MVT::i32); 675 else 676 TmpVT = MVT::i32; 677 678 if (I.getOperand(i)->getType()->isSigned()) 679 RetOp = DAG.getNode(ISD::SIGN_EXTEND, TmpVT, RetOp); 680 else 681 RetOp = DAG.getNode(ISD::ZERO_EXTEND, TmpVT, RetOp); 682 } 683 NewValues.push_back(RetOp); 684 } 685 DAG.setRoot(DAG.getNode(ISD::RET, MVT::Other, NewValues)); 686} 687 688void SelectionDAGLowering::visitBr(BranchInst &I) { 689 // Update machine-CFG edges. 690 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; 691 CurMBB->addSuccessor(Succ0MBB); 692 693 // Figure out which block is immediately after the current one. 694 MachineBasicBlock *NextBlock = 0; 695 MachineFunction::iterator BBI = CurMBB; 696 if (++BBI != CurMBB->getParent()->end()) 697 NextBlock = BBI; 698 699 if (I.isUnconditional()) { 700 // If this is not a fall-through branch, emit the branch. 701 if (Succ0MBB != NextBlock) 702 DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), 703 DAG.getBasicBlock(Succ0MBB))); 704 } else { 705 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; 706 CurMBB->addSuccessor(Succ1MBB); 707 708 SDOperand Cond = getValue(I.getCondition()); 709 if (Succ1MBB == NextBlock) { 710 // If the condition is false, fall through. This means we should branch 711 // if the condition is true to Succ #0. 712 DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), 713 Cond, DAG.getBasicBlock(Succ0MBB))); 714 } else if (Succ0MBB == NextBlock) { 715 // If the condition is true, fall through. This means we should branch if 716 // the condition is false to Succ #1. Invert the condition first. 717 SDOperand True = DAG.getConstant(1, Cond.getValueType()); 718 Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); 719 DAG.setRoot(DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), 720 Cond, DAG.getBasicBlock(Succ1MBB))); 721 } else { 722 std::vector<SDOperand> Ops; 723 Ops.push_back(getRoot()); 724 // If the false case is the current basic block, then this is a self 725 // loop. We do not want to emit "Loop: ... brcond Out; br Loop", as it 726 // adds an extra instruction in the loop. Instead, invert the 727 // condition and emit "Loop: ... br!cond Loop; br Out. 728 if (CurMBB == Succ1MBB) { 729 std::swap(Succ0MBB, Succ1MBB); 730 SDOperand True = DAG.getConstant(1, Cond.getValueType()); 731 Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); 732 } 733 SDOperand True = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond, 734 DAG.getBasicBlock(Succ0MBB)); 735 DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, True, 736 DAG.getBasicBlock(Succ1MBB))); 737 } 738 } 739} 740 741/// visitSwitchCase - Emits the necessary code to represent a single node in 742/// the binary search tree resulting from lowering a switch instruction. 743void SelectionDAGLowering::visitSwitchCase(SelectionDAGISel::CaseBlock &CB) { 744 SDOperand SwitchOp = getValue(CB.SwitchV); 745 SDOperand CaseOp = getValue(CB.CaseC); 746 SDOperand Cond = DAG.getSetCC(MVT::i1, SwitchOp, CaseOp, CB.CC); 747 748 // Set NextBlock to be the MBB immediately after the current one, if any. 749 // This is used to avoid emitting unnecessary branches to the next block. 750 MachineBasicBlock *NextBlock = 0; 751 MachineFunction::iterator BBI = CurMBB; 752 if (++BBI != CurMBB->getParent()->end()) 753 NextBlock = BBI; 754 755 // If the lhs block is the next block, invert the condition so that we can 756 // fall through to the lhs instead of the rhs block. 757 if (CB.LHSBB == NextBlock) { 758 std::swap(CB.LHSBB, CB.RHSBB); 759 SDOperand True = DAG.getConstant(1, Cond.getValueType()); 760 Cond = DAG.getNode(ISD::XOR, Cond.getValueType(), Cond, True); 761 } 762 SDOperand BrCond = DAG.getNode(ISD::BRCOND, MVT::Other, getRoot(), Cond, 763 DAG.getBasicBlock(CB.LHSBB)); 764 if (CB.RHSBB == NextBlock) 765 DAG.setRoot(BrCond); 766 else 767 DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, BrCond, 768 DAG.getBasicBlock(CB.RHSBB))); 769 // Update successor info 770 CurMBB->addSuccessor(CB.LHSBB); 771 CurMBB->addSuccessor(CB.RHSBB); 772} 773 774void SelectionDAGLowering::visitSwitch(SwitchInst &I) { 775 // Figure out which block is immediately after the current one. 776 MachineBasicBlock *NextBlock = 0; 777 MachineFunction::iterator BBI = CurMBB; 778 if (++BBI != CurMBB->getParent()->end()) 779 NextBlock = BBI; 780 781 // If there is only the default destination, branch to it if it is not the 782 // next basic block. Otherwise, just fall through. 783 if (I.getNumOperands() == 2) { 784 // Update machine-CFG edges. 785 MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[I.getDefaultDest()]; 786 // If this is not a fall-through branch, emit the branch. 787 if (DefaultMBB != NextBlock) 788 DAG.setRoot(DAG.getNode(ISD::BR, MVT::Other, getRoot(), 789 DAG.getBasicBlock(DefaultMBB))); 790 return; 791 } 792 793 // If there are any non-default case statements, create a vector of Cases 794 // representing each one, and sort the vector so that we can efficiently 795 // create a binary search tree from them. 796 std::vector<Case> Cases; 797 for (unsigned i = 1; i < I.getNumSuccessors(); ++i) { 798 MachineBasicBlock *SMBB = FuncInfo.MBBMap[I.getSuccessor(i)]; 799 Cases.push_back(Case(I.getSuccessorValue(i), SMBB)); 800 } 801 std::sort(Cases.begin(), Cases.end(), CaseCmp()); 802 803 // Get the Value to be switched on and default basic blocks, which will be 804 // inserted into CaseBlock records, representing basic blocks in the binary 805 // search tree. 806 Value *SV = I.getOperand(0); 807 MachineBasicBlock *Default = FuncInfo.MBBMap[I.getDefaultDest()]; 808 809 // Get the current MachineFunction and LLVM basic block, for use in creating 810 // and inserting new MBBs during the creation of the binary search tree. 811 MachineFunction *CurMF = CurMBB->getParent(); 812 const BasicBlock *LLVMBB = CurMBB->getBasicBlock(); 813 814 // Push the initial CaseRec onto the worklist 815 std::vector<CaseRec> CaseVec; 816 CaseVec.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end()))); 817 818 while (!CaseVec.empty()) { 819 // Grab a record representing a case range to process off the worklist 820 CaseRec CR = CaseVec.back(); 821 CaseVec.pop_back(); 822 823 // Size is the number of Cases represented by this range. If Size is 1, 824 // then we are processing a leaf of the binary search tree. Otherwise, 825 // we need to pick a pivot, and push left and right ranges onto the 826 // worklist. 827 unsigned Size = CR.Range.second - CR.Range.first; 828 829 if (Size == 1) { 830 // Create a CaseBlock record representing a conditional branch to 831 // the Case's target mbb if the value being switched on SV is equal 832 // to C. Otherwise, branch to default. 833 Constant *C = CR.Range.first->first; 834 MachineBasicBlock *Target = CR.Range.first->second; 835 SelectionDAGISel::CaseBlock CB(ISD::SETEQ, SV, C, Target, Default, 836 CR.CaseBB); 837 // If the MBB representing the leaf node is the current MBB, then just 838 // call visitSwitchCase to emit the code into the current block. 839 // Otherwise, push the CaseBlock onto the vector to be later processed 840 // by SDISel, and insert the node's MBB before the next MBB. 841 if (CR.CaseBB == CurMBB) 842 visitSwitchCase(CB); 843 else { 844 SwitchCases.push_back(CB); 845 CurMF->getBasicBlockList().insert(BBI, CR.CaseBB); 846 } 847 } else { 848 // split case range at pivot 849 CaseItr Pivot = CR.Range.first + (Size / 2); 850 CaseRange LHSR(CR.Range.first, Pivot); 851 CaseRange RHSR(Pivot, CR.Range.second); 852 Constant *C = Pivot->first; 853 MachineBasicBlock *RHSBB = 0, *LHSBB = 0; 854 // We know that we branch to the LHS if the Value being switched on is 855 // less than the Pivot value, C. We use this to optimize our binary 856 // tree a bit, by recognizing that if SV is greater than or equal to the 857 // LHS's Case Value, and that Case Value is exactly one less than the 858 // Pivot's Value, then we can branch directly to the LHS's Target, 859 // rather than creating a leaf node for it. 860 if ((LHSR.second - LHSR.first) == 1 && 861 LHSR.first->first == CR.GE && 862 cast<ConstantIntegral>(C)->getRawValue() == 863 (cast<ConstantIntegral>(CR.GE)->getRawValue() + 1ULL)) { 864 LHSBB = LHSR.first->second; 865 } else { 866 LHSBB = new MachineBasicBlock(LLVMBB); 867 CaseVec.push_back(CaseRec(LHSBB,C,CR.GE,LHSR)); 868 } 869 // Similar to the optimization above, if the Value being switched on is 870 // known to be less than the Constant CR.LT, and the current Case Value 871 // is CR.LT - 1, then we can branch directly to the target block for 872 // the current Case Value, rather than emitting a RHS leaf node for it. 873 if ((RHSR.second - RHSR.first) == 1 && CR.LT && 874 cast<ConstantIntegral>(RHSR.first->first)->getRawValue() == 875 (cast<ConstantIntegral>(CR.LT)->getRawValue() - 1ULL)) { 876 RHSBB = RHSR.first->second; 877 } else { 878 RHSBB = new MachineBasicBlock(LLVMBB); 879 CaseVec.push_back(CaseRec(RHSBB,CR.LT,C,RHSR)); 880 } 881 // Create a CaseBlock record representing a conditional branch to 882 // the LHS node if the value being switched on SV is less than C. 883 // Otherwise, branch to LHS. 884 ISD::CondCode CC = C->getType()->isSigned() ? ISD::SETLT : ISD::SETULT; 885 SelectionDAGISel::CaseBlock CB(CC, SV, C, LHSBB, RHSBB, CR.CaseBB); 886 if (CR.CaseBB == CurMBB) 887 visitSwitchCase(CB); 888 else { 889 SwitchCases.push_back(CB); 890 CurMF->getBasicBlockList().insert(BBI, CR.CaseBB); 891 } 892 } 893 } 894} 895 896void SelectionDAGLowering::visitSub(User &I) { 897 // -0.0 - X --> fneg 898 if (I.getType()->isFloatingPoint()) { 899 if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) 900 if (CFP->isExactlyValue(-0.0)) { 901 SDOperand Op2 = getValue(I.getOperand(1)); 902 setValue(&I, DAG.getNode(ISD::FNEG, Op2.getValueType(), Op2)); 903 return; 904 } 905 } 906 visitBinary(I, ISD::SUB, ISD::FSUB, ISD::VSUB); 907} 908 909void SelectionDAGLowering::visitBinary(User &I, unsigned IntOp, unsigned FPOp, 910 unsigned VecOp) { 911 const Type *Ty = I.getType(); 912 SDOperand Op1 = getValue(I.getOperand(0)); 913 SDOperand Op2 = getValue(I.getOperand(1)); 914 915 if (Ty->isIntegral()) { 916 setValue(&I, DAG.getNode(IntOp, Op1.getValueType(), Op1, Op2)); 917 } else if (Ty->isFloatingPoint()) { 918 setValue(&I, DAG.getNode(FPOp, Op1.getValueType(), Op1, Op2)); 919 } else { 920 const PackedType *PTy = cast<PackedType>(Ty); 921 SDOperand Num = DAG.getConstant(PTy->getNumElements(), MVT::i32); 922 SDOperand Typ = DAG.getValueType(TLI.getValueType(PTy->getElementType())); 923 setValue(&I, DAG.getNode(VecOp, MVT::Vector, Op1, Op2, Num, Typ)); 924 } 925} 926 927void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { 928 SDOperand Op1 = getValue(I.getOperand(0)); 929 SDOperand Op2 = getValue(I.getOperand(1)); 930 931 Op2 = DAG.getNode(ISD::ANY_EXTEND, TLI.getShiftAmountTy(), Op2); 932 933 setValue(&I, DAG.getNode(Opcode, Op1.getValueType(), Op1, Op2)); 934} 935 936void SelectionDAGLowering::visitSetCC(User &I,ISD::CondCode SignedOpcode, 937 ISD::CondCode UnsignedOpcode) { 938 SDOperand Op1 = getValue(I.getOperand(0)); 939 SDOperand Op2 = getValue(I.getOperand(1)); 940 ISD::CondCode Opcode = SignedOpcode; 941 if (I.getOperand(0)->getType()->isUnsigned()) 942 Opcode = UnsignedOpcode; 943 setValue(&I, DAG.getSetCC(MVT::i1, Op1, Op2, Opcode)); 944} 945 946void SelectionDAGLowering::visitSelect(User &I) { 947 SDOperand Cond = getValue(I.getOperand(0)); 948 SDOperand TrueVal = getValue(I.getOperand(1)); 949 SDOperand FalseVal = getValue(I.getOperand(2)); 950 setValue(&I, DAG.getNode(ISD::SELECT, TrueVal.getValueType(), Cond, 951 TrueVal, FalseVal)); 952} 953 954void SelectionDAGLowering::visitCast(User &I) { 955 SDOperand N = getValue(I.getOperand(0)); 956 MVT::ValueType SrcVT = N.getValueType(); 957 MVT::ValueType DestVT = TLI.getValueType(I.getType()); 958 959 if (DestVT == MVT::Vector) { 960 // This is a cast to a vector from something else. This is always a bit 961 // convert. Get information about the input vector. 962 const PackedType *DestTy = cast<PackedType>(I.getType()); 963 MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType()); 964 setValue(&I, DAG.getNode(ISD::VBIT_CONVERT, DestVT, N, 965 DAG.getConstant(DestTy->getNumElements(),MVT::i32), 966 DAG.getValueType(EltVT))); 967 } else if (SrcVT == DestVT) { 968 setValue(&I, N); // noop cast. 969 } else if (DestVT == MVT::i1) { 970 // Cast to bool is a comparison against zero, not truncation to zero. 971 SDOperand Zero = isInteger(SrcVT) ? DAG.getConstant(0, N.getValueType()) : 972 DAG.getConstantFP(0.0, N.getValueType()); 973 setValue(&I, DAG.getSetCC(MVT::i1, N, Zero, ISD::SETNE)); 974 } else if (isInteger(SrcVT)) { 975 if (isInteger(DestVT)) { // Int -> Int cast 976 if (DestVT < SrcVT) // Truncating cast? 977 setValue(&I, DAG.getNode(ISD::TRUNCATE, DestVT, N)); 978 else if (I.getOperand(0)->getType()->isSigned()) 979 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, DestVT, N)); 980 else 981 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, DestVT, N)); 982 } else if (isFloatingPoint(DestVT)) { // Int -> FP cast 983 if (I.getOperand(0)->getType()->isSigned()) 984 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, DestVT, N)); 985 else 986 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, DestVT, N)); 987 } else { 988 assert(0 && "Unknown cast!"); 989 } 990 } else if (isFloatingPoint(SrcVT)) { 991 if (isFloatingPoint(DestVT)) { // FP -> FP cast 992 if (DestVT < SrcVT) // Rounding cast? 993 setValue(&I, DAG.getNode(ISD::FP_ROUND, DestVT, N)); 994 else 995 setValue(&I, DAG.getNode(ISD::FP_EXTEND, DestVT, N)); 996 } else if (isInteger(DestVT)) { // FP -> Int cast. 997 if (I.getType()->isSigned()) 998 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, DestVT, N)); 999 else 1000 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, DestVT, N)); 1001 } else { 1002 assert(0 && "Unknown cast!"); 1003 } 1004 } else { 1005 assert(SrcVT == MVT::Vector && "Unknown cast!"); 1006 assert(DestVT != MVT::Vector && "Casts to vector already handled!"); 1007 // This is a cast from a vector to something else. This is always a bit 1008 // convert. Get information about the input vector. 1009 setValue(&I, DAG.getNode(ISD::VBIT_CONVERT, DestVT, N)); 1010 } 1011} 1012 1013void SelectionDAGLowering::visitInsertElement(User &I) { 1014 SDOperand InVec = getValue(I.getOperand(0)); 1015 SDOperand InVal = getValue(I.getOperand(1)); 1016 SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), 1017 getValue(I.getOperand(2))); 1018 1019 SDOperand Num = *(InVec.Val->op_end()-2); 1020 SDOperand Typ = *(InVec.Val->op_end()-1); 1021 setValue(&I, DAG.getNode(ISD::VINSERT_VECTOR_ELT, MVT::Vector, 1022 InVec, InVal, InIdx, Num, Typ)); 1023} 1024 1025void SelectionDAGLowering::visitExtractElement(User &I) { 1026 SDOperand InVec = getValue(I.getOperand(0)); 1027 SDOperand InIdx = DAG.getNode(ISD::ZERO_EXTEND, TLI.getPointerTy(), 1028 getValue(I.getOperand(1))); 1029 SDOperand Typ = *(InVec.Val->op_end()-1); 1030 setValue(&I, DAG.getNode(ISD::VEXTRACT_VECTOR_ELT, 1031 TLI.getValueType(I.getType()), InVec, InIdx)); 1032} 1033 1034void SelectionDAGLowering::visitGetElementPtr(User &I) { 1035 SDOperand N = getValue(I.getOperand(0)); 1036 const Type *Ty = I.getOperand(0)->getType(); 1037 const Type *UIntPtrTy = TD.getIntPtrType(); 1038 1039 for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end(); 1040 OI != E; ++OI) { 1041 Value *Idx = *OI; 1042 if (const StructType *StTy = dyn_cast<StructType>(Ty)) { 1043 unsigned Field = cast<ConstantUInt>(Idx)->getValue(); 1044 if (Field) { 1045 // N = N + Offset 1046 uint64_t Offset = TD.getStructLayout(StTy)->MemberOffsets[Field]; 1047 N = DAG.getNode(ISD::ADD, N.getValueType(), N, 1048 getIntPtrConstant(Offset)); 1049 } 1050 Ty = StTy->getElementType(Field); 1051 } else { 1052 Ty = cast<SequentialType>(Ty)->getElementType(); 1053 1054 // If this is a constant subscript, handle it quickly. 1055 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { 1056 if (CI->getRawValue() == 0) continue; 1057 1058 uint64_t Offs; 1059 if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI)) 1060 Offs = (int64_t)TD.getTypeSize(Ty)*CSI->getValue(); 1061 else 1062 Offs = TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue(); 1063 N = DAG.getNode(ISD::ADD, N.getValueType(), N, getIntPtrConstant(Offs)); 1064 continue; 1065 } 1066 1067 // N = N + Idx * ElementSize; 1068 uint64_t ElementSize = TD.getTypeSize(Ty); 1069 SDOperand IdxN = getValue(Idx); 1070 1071 // If the index is smaller or larger than intptr_t, truncate or extend 1072 // it. 1073 if (IdxN.getValueType() < N.getValueType()) { 1074 if (Idx->getType()->isSigned()) 1075 IdxN = DAG.getNode(ISD::SIGN_EXTEND, N.getValueType(), IdxN); 1076 else 1077 IdxN = DAG.getNode(ISD::ZERO_EXTEND, N.getValueType(), IdxN); 1078 } else if (IdxN.getValueType() > N.getValueType()) 1079 IdxN = DAG.getNode(ISD::TRUNCATE, N.getValueType(), IdxN); 1080 1081 // If this is a multiply by a power of two, turn it into a shl 1082 // immediately. This is a very common case. 1083 if (isPowerOf2_64(ElementSize)) { 1084 unsigned Amt = Log2_64(ElementSize); 1085 IdxN = DAG.getNode(ISD::SHL, N.getValueType(), IdxN, 1086 DAG.getConstant(Amt, TLI.getShiftAmountTy())); 1087 N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN); 1088 continue; 1089 } 1090 1091 SDOperand Scale = getIntPtrConstant(ElementSize); 1092 IdxN = DAG.getNode(ISD::MUL, N.getValueType(), IdxN, Scale); 1093 N = DAG.getNode(ISD::ADD, N.getValueType(), N, IdxN); 1094 } 1095 } 1096 setValue(&I, N); 1097} 1098 1099void SelectionDAGLowering::visitAlloca(AllocaInst &I) { 1100 // If this is a fixed sized alloca in the entry block of the function, 1101 // allocate it statically on the stack. 1102 if (FuncInfo.StaticAllocaMap.count(&I)) 1103 return; // getValue will auto-populate this. 1104 1105 const Type *Ty = I.getAllocatedType(); 1106 uint64_t TySize = TLI.getTargetData().getTypeSize(Ty); 1107 unsigned Align = std::max((unsigned)TLI.getTargetData().getTypeAlignment(Ty), 1108 I.getAlignment()); 1109 1110 SDOperand AllocSize = getValue(I.getArraySize()); 1111 MVT::ValueType IntPtr = TLI.getPointerTy(); 1112 if (IntPtr < AllocSize.getValueType()) 1113 AllocSize = DAG.getNode(ISD::TRUNCATE, IntPtr, AllocSize); 1114 else if (IntPtr > AllocSize.getValueType()) 1115 AllocSize = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, AllocSize); 1116 1117 AllocSize = DAG.getNode(ISD::MUL, IntPtr, AllocSize, 1118 getIntPtrConstant(TySize)); 1119 1120 // Handle alignment. If the requested alignment is less than or equal to the 1121 // stack alignment, ignore it and round the size of the allocation up to the 1122 // stack alignment size. If the size is greater than the stack alignment, we 1123 // note this in the DYNAMIC_STACKALLOC node. 1124 unsigned StackAlign = 1125 TLI.getTargetMachine().getFrameInfo()->getStackAlignment(); 1126 if (Align <= StackAlign) { 1127 Align = 0; 1128 // Add SA-1 to the size. 1129 AllocSize = DAG.getNode(ISD::ADD, AllocSize.getValueType(), AllocSize, 1130 getIntPtrConstant(StackAlign-1)); 1131 // Mask out the low bits for alignment purposes. 1132 AllocSize = DAG.getNode(ISD::AND, AllocSize.getValueType(), AllocSize, 1133 getIntPtrConstant(~(uint64_t)(StackAlign-1))); 1134 } 1135 1136 std::vector<MVT::ValueType> VTs; 1137 VTs.push_back(AllocSize.getValueType()); 1138 VTs.push_back(MVT::Other); 1139 std::vector<SDOperand> Ops; 1140 Ops.push_back(getRoot()); 1141 Ops.push_back(AllocSize); 1142 Ops.push_back(getIntPtrConstant(Align)); 1143 SDOperand DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, VTs, Ops); 1144 DAG.setRoot(setValue(&I, DSA).getValue(1)); 1145 1146 // Inform the Frame Information that we have just allocated a variable-sized 1147 // object. 1148 CurMBB->getParent()->getFrameInfo()->CreateVariableSizedObject(); 1149} 1150 1151void SelectionDAGLowering::visitLoad(LoadInst &I) { 1152 SDOperand Ptr = getValue(I.getOperand(0)); 1153 1154 SDOperand Root; 1155 if (I.isVolatile()) 1156 Root = getRoot(); 1157 else { 1158 // Do not serialize non-volatile loads against each other. 1159 Root = DAG.getRoot(); 1160 } 1161 1162 setValue(&I, getLoadFrom(I.getType(), Ptr, DAG.getSrcValue(I.getOperand(0)), 1163 Root, I.isVolatile())); 1164} 1165 1166SDOperand SelectionDAGLowering::getLoadFrom(const Type *Ty, SDOperand Ptr, 1167 SDOperand SrcValue, SDOperand Root, 1168 bool isVolatile) { 1169 SDOperand L; 1170 if (const PackedType *PTy = dyn_cast<PackedType>(Ty)) { 1171 MVT::ValueType PVT = TLI.getValueType(PTy->getElementType()); 1172 L = DAG.getVecLoad(PTy->getNumElements(), PVT, Root, Ptr, SrcValue); 1173 } else { 1174 L = DAG.getLoad(TLI.getValueType(Ty), Root, Ptr, SrcValue); 1175 } 1176 1177 if (isVolatile) 1178 DAG.setRoot(L.getValue(1)); 1179 else 1180 PendingLoads.push_back(L.getValue(1)); 1181 1182 return L; 1183} 1184 1185 1186void SelectionDAGLowering::visitStore(StoreInst &I) { 1187 Value *SrcV = I.getOperand(0); 1188 SDOperand Src = getValue(SrcV); 1189 SDOperand Ptr = getValue(I.getOperand(1)); 1190 DAG.setRoot(DAG.getNode(ISD::STORE, MVT::Other, getRoot(), Src, Ptr, 1191 DAG.getSrcValue(I.getOperand(1)))); 1192} 1193 1194/// IntrinsicCannotAccessMemory - Return true if the specified intrinsic cannot 1195/// access memory and has no other side effects at all. 1196static bool IntrinsicCannotAccessMemory(unsigned IntrinsicID) { 1197#define GET_NO_MEMORY_INTRINSICS 1198#include "llvm/Intrinsics.gen" 1199#undef GET_NO_MEMORY_INTRINSICS 1200 return false; 1201} 1202 1203/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC 1204/// node. 1205void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, 1206 unsigned Intrinsic) { 1207 bool HasChain = !IntrinsicCannotAccessMemory(Intrinsic); 1208 1209 // Build the operand list. 1210 std::vector<SDOperand> Ops; 1211 if (HasChain) // If this intrinsic has side-effects, chainify it. 1212 Ops.push_back(getRoot()); 1213 1214 // Add the intrinsic ID as an integer operand. 1215 Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); 1216 1217 // Add all operands of the call to the operand list. 1218 for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { 1219 SDOperand Op = getValue(I.getOperand(i)); 1220 1221 // If this is a vector type, force it to the right packed type. 1222 if (Op.getValueType() == MVT::Vector) { 1223 const PackedType *OpTy = cast<PackedType>(I.getOperand(i)->getType()); 1224 MVT::ValueType EltVT = TLI.getValueType(OpTy->getElementType()); 1225 1226 MVT::ValueType VVT = MVT::getVectorType(EltVT, OpTy->getNumElements()); 1227 assert(VVT != MVT::Other && "Intrinsic uses a non-legal type?"); 1228 Op = DAG.getNode(ISD::VBIT_CONVERT, VVT, Op); 1229 } 1230 1231 assert(TLI.isTypeLegal(Op.getValueType()) && 1232 "Intrinsic uses a non-legal type?"); 1233 Ops.push_back(Op); 1234 } 1235 1236 std::vector<MVT::ValueType> VTs; 1237 if (I.getType() != Type::VoidTy) { 1238 MVT::ValueType VT = TLI.getValueType(I.getType()); 1239 if (VT == MVT::Vector) { 1240 const PackedType *DestTy = cast<PackedType>(I.getType()); 1241 MVT::ValueType EltVT = TLI.getValueType(DestTy->getElementType()); 1242 1243 VT = MVT::getVectorType(EltVT, DestTy->getNumElements()); 1244 assert(VT != MVT::Other && "Intrinsic uses a non-legal type?"); 1245 } 1246 1247 assert(TLI.isTypeLegal(VT) && "Intrinsic uses a non-legal type?"); 1248 VTs.push_back(VT); 1249 } 1250 if (HasChain) 1251 VTs.push_back(MVT::Other); 1252 1253 // Create the node. 1254 SDOperand Result; 1255 if (!HasChain) 1256 Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VTs, Ops); 1257 else if (I.getType() != Type::VoidTy) 1258 Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, VTs, Ops); 1259 else 1260 Result = DAG.getNode(ISD::INTRINSIC_VOID, VTs, Ops); 1261 1262 if (HasChain) 1263 DAG.setRoot(Result.getValue(Result.Val->getNumValues()-1)); 1264 if (I.getType() != Type::VoidTy) { 1265 if (const PackedType *PTy = dyn_cast<PackedType>(I.getType())) { 1266 MVT::ValueType EVT = TLI.getValueType(PTy->getElementType()); 1267 Result = DAG.getNode(ISD::VBIT_CONVERT, MVT::Vector, Result, 1268 DAG.getConstant(PTy->getNumElements(), MVT::i32), 1269 DAG.getValueType(EVT)); 1270 } 1271 setValue(&I, Result); 1272 } 1273} 1274 1275/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If 1276/// we want to emit this as a call to a named external function, return the name 1277/// otherwise lower it and return null. 1278const char * 1279SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { 1280 switch (Intrinsic) { 1281 default: 1282 // By default, turn this into a target intrinsic node. 1283 visitTargetIntrinsic(I, Intrinsic); 1284 return 0; 1285 case Intrinsic::vastart: visitVAStart(I); return 0; 1286 case Intrinsic::vaend: visitVAEnd(I); return 0; 1287 case Intrinsic::vacopy: visitVACopy(I); return 0; 1288 case Intrinsic::returnaddress: visitFrameReturnAddress(I, false); return 0; 1289 case Intrinsic::frameaddress: visitFrameReturnAddress(I, true); return 0; 1290 case Intrinsic::setjmp: 1291 return "_setjmp"+!TLI.usesUnderscoreSetJmpLongJmp(); 1292 break; 1293 case Intrinsic::longjmp: 1294 return "_longjmp"+!TLI.usesUnderscoreSetJmpLongJmp(); 1295 break; 1296 case Intrinsic::memcpy_i32: 1297 case Intrinsic::memcpy_i64: 1298 visitMemIntrinsic(I, ISD::MEMCPY); 1299 return 0; 1300 case Intrinsic::memset_i32: 1301 case Intrinsic::memset_i64: 1302 visitMemIntrinsic(I, ISD::MEMSET); 1303 return 0; 1304 case Intrinsic::memmove_i32: 1305 case Intrinsic::memmove_i64: 1306 visitMemIntrinsic(I, ISD::MEMMOVE); 1307 return 0; 1308 1309 case Intrinsic::dbg_stoppoint: { 1310 MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo(); 1311 DbgStopPointInst &SPI = cast<DbgStopPointInst>(I); 1312 if (DebugInfo && SPI.getContext() && DebugInfo->Verify(SPI.getContext())) { 1313 std::vector<SDOperand> Ops; 1314 1315 Ops.push_back(getRoot()); 1316 Ops.push_back(getValue(SPI.getLineValue())); 1317 Ops.push_back(getValue(SPI.getColumnValue())); 1318 1319 DebugInfoDesc *DD = DebugInfo->getDescFor(SPI.getContext()); 1320 assert(DD && "Not a debug information descriptor"); 1321 CompileUnitDesc *CompileUnit = cast<CompileUnitDesc>(DD); 1322 1323 Ops.push_back(DAG.getString(CompileUnit->getFileName())); 1324 Ops.push_back(DAG.getString(CompileUnit->getDirectory())); 1325 1326 DAG.setRoot(DAG.getNode(ISD::LOCATION, MVT::Other, Ops)); 1327 } 1328 1329 return 0; 1330 } 1331 case Intrinsic::dbg_region_start: { 1332 MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo(); 1333 DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I); 1334 if (DebugInfo && RSI.getContext() && DebugInfo->Verify(RSI.getContext())) { 1335 std::vector<SDOperand> Ops; 1336 1337 unsigned LabelID = DebugInfo->RecordRegionStart(RSI.getContext()); 1338 1339 Ops.push_back(getRoot()); 1340 Ops.push_back(DAG.getConstant(LabelID, MVT::i32)); 1341 1342 DAG.setRoot(DAG.getNode(ISD::DEBUG_LABEL, MVT::Other, Ops)); 1343 } 1344 1345 return 0; 1346 } 1347 case Intrinsic::dbg_region_end: { 1348 MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo(); 1349 DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I); 1350 if (DebugInfo && REI.getContext() && DebugInfo->Verify(REI.getContext())) { 1351 std::vector<SDOperand> Ops; 1352 1353 unsigned LabelID = DebugInfo->RecordRegionEnd(REI.getContext()); 1354 1355 Ops.push_back(getRoot()); 1356 Ops.push_back(DAG.getConstant(LabelID, MVT::i32)); 1357 1358 DAG.setRoot(DAG.getNode(ISD::DEBUG_LABEL, MVT::Other, Ops)); 1359 } 1360 1361 return 0; 1362 } 1363 case Intrinsic::dbg_func_start: { 1364 MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo(); 1365 DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I); 1366 if (DebugInfo && FSI.getSubprogram() && 1367 DebugInfo->Verify(FSI.getSubprogram())) { 1368 std::vector<SDOperand> Ops; 1369 1370 unsigned LabelID = DebugInfo->RecordRegionStart(FSI.getSubprogram()); 1371 1372 Ops.push_back(getRoot()); 1373 Ops.push_back(DAG.getConstant(LabelID, MVT::i32)); 1374 1375 DAG.setRoot(DAG.getNode(ISD::DEBUG_LABEL, MVT::Other, Ops)); 1376 } 1377 1378 return 0; 1379 } 1380 case Intrinsic::dbg_declare: { 1381 MachineDebugInfo *DebugInfo = DAG.getMachineDebugInfo(); 1382 DbgDeclareInst &DI = cast<DbgDeclareInst>(I); 1383 if (DebugInfo && DI.getVariable() && DebugInfo->Verify(DI.getVariable())) { 1384 std::vector<SDOperand> Ops; 1385 1386 SDOperand AddressOp = getValue(DI.getAddress()); 1387 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddressOp)) { 1388 DebugInfo->RecordVariable(DI.getVariable(), FI->getIndex()); 1389 } 1390 } 1391 1392 return 0; 1393 } 1394 1395 case Intrinsic::isunordered_f32: 1396 case Intrinsic::isunordered_f64: 1397 setValue(&I, DAG.getSetCC(MVT::i1,getValue(I.getOperand(1)), 1398 getValue(I.getOperand(2)), ISD::SETUO)); 1399 return 0; 1400 1401 case Intrinsic::sqrt_f32: 1402 case Intrinsic::sqrt_f64: 1403 setValue(&I, DAG.getNode(ISD::FSQRT, 1404 getValue(I.getOperand(1)).getValueType(), 1405 getValue(I.getOperand(1)))); 1406 return 0; 1407 case Intrinsic::pcmarker: { 1408 SDOperand Tmp = getValue(I.getOperand(1)); 1409 DAG.setRoot(DAG.getNode(ISD::PCMARKER, MVT::Other, getRoot(), Tmp)); 1410 return 0; 1411 } 1412 case Intrinsic::readcyclecounter: { 1413 std::vector<MVT::ValueType> VTs; 1414 VTs.push_back(MVT::i64); 1415 VTs.push_back(MVT::Other); 1416 std::vector<SDOperand> Ops; 1417 Ops.push_back(getRoot()); 1418 SDOperand Tmp = DAG.getNode(ISD::READCYCLECOUNTER, VTs, Ops); 1419 setValue(&I, Tmp); 1420 DAG.setRoot(Tmp.getValue(1)); 1421 return 0; 1422 } 1423 case Intrinsic::bswap_i16: 1424 case Intrinsic::bswap_i32: 1425 case Intrinsic::bswap_i64: 1426 setValue(&I, DAG.getNode(ISD::BSWAP, 1427 getValue(I.getOperand(1)).getValueType(), 1428 getValue(I.getOperand(1)))); 1429 return 0; 1430 case Intrinsic::cttz_i8: 1431 case Intrinsic::cttz_i16: 1432 case Intrinsic::cttz_i32: 1433 case Intrinsic::cttz_i64: 1434 setValue(&I, DAG.getNode(ISD::CTTZ, 1435 getValue(I.getOperand(1)).getValueType(), 1436 getValue(I.getOperand(1)))); 1437 return 0; 1438 case Intrinsic::ctlz_i8: 1439 case Intrinsic::ctlz_i16: 1440 case Intrinsic::ctlz_i32: 1441 case Intrinsic::ctlz_i64: 1442 setValue(&I, DAG.getNode(ISD::CTLZ, 1443 getValue(I.getOperand(1)).getValueType(), 1444 getValue(I.getOperand(1)))); 1445 return 0; 1446 case Intrinsic::ctpop_i8: 1447 case Intrinsic::ctpop_i16: 1448 case Intrinsic::ctpop_i32: 1449 case Intrinsic::ctpop_i64: 1450 setValue(&I, DAG.getNode(ISD::CTPOP, 1451 getValue(I.getOperand(1)).getValueType(), 1452 getValue(I.getOperand(1)))); 1453 return 0; 1454 case Intrinsic::stacksave: { 1455 std::vector<MVT::ValueType> VTs; 1456 VTs.push_back(TLI.getPointerTy()); 1457 VTs.push_back(MVT::Other); 1458 std::vector<SDOperand> Ops; 1459 Ops.push_back(getRoot()); 1460 SDOperand Tmp = DAG.getNode(ISD::STACKSAVE, VTs, Ops); 1461 setValue(&I, Tmp); 1462 DAG.setRoot(Tmp.getValue(1)); 1463 return 0; 1464 } 1465 case Intrinsic::stackrestore: { 1466 SDOperand Tmp = getValue(I.getOperand(1)); 1467 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, MVT::Other, getRoot(), Tmp)); 1468 return 0; 1469 } 1470 case Intrinsic::prefetch: 1471 // FIXME: Currently discarding prefetches. 1472 return 0; 1473 } 1474} 1475 1476 1477void SelectionDAGLowering::visitCall(CallInst &I) { 1478 const char *RenameFn = 0; 1479 if (Function *F = I.getCalledFunction()) { 1480 if (F->isExternal()) 1481 if (unsigned IID = F->getIntrinsicID()) { 1482 RenameFn = visitIntrinsicCall(I, IID); 1483 if (!RenameFn) 1484 return; 1485 } else { // Not an LLVM intrinsic. 1486 const std::string &Name = F->getName(); 1487 if (Name[0] == 'c' && (Name == "copysign" || Name == "copysignf")) { 1488 if (I.getNumOperands() == 3 && // Basic sanity checks. 1489 I.getOperand(1)->getType()->isFloatingPoint() && 1490 I.getType() == I.getOperand(1)->getType() && 1491 I.getType() == I.getOperand(2)->getType()) { 1492 SDOperand LHS = getValue(I.getOperand(1)); 1493 SDOperand RHS = getValue(I.getOperand(2)); 1494 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, LHS.getValueType(), 1495 LHS, RHS)); 1496 return; 1497 } 1498 } else if (Name[0] == 'f' && (Name == "fabs" || Name == "fabsf")) { 1499 if (I.getNumOperands() == 2 && // Basic sanity checks. 1500 I.getOperand(1)->getType()->isFloatingPoint() && 1501 I.getType() == I.getOperand(1)->getType()) { 1502 SDOperand Tmp = getValue(I.getOperand(1)); 1503 setValue(&I, DAG.getNode(ISD::FABS, Tmp.getValueType(), Tmp)); 1504 return; 1505 } 1506 } else if (Name[0] == 's' && (Name == "sin" || Name == "sinf")) { 1507 if (I.getNumOperands() == 2 && // Basic sanity checks. 1508 I.getOperand(1)->getType()->isFloatingPoint() && 1509 I.getType() == I.getOperand(1)->getType()) { 1510 SDOperand Tmp = getValue(I.getOperand(1)); 1511 setValue(&I, DAG.getNode(ISD::FSIN, Tmp.getValueType(), Tmp)); 1512 return; 1513 } 1514 } else if (Name[0] == 'c' && (Name == "cos" || Name == "cosf")) { 1515 if (I.getNumOperands() == 2 && // Basic sanity checks. 1516 I.getOperand(1)->getType()->isFloatingPoint() && 1517 I.getType() == I.getOperand(1)->getType()) { 1518 SDOperand Tmp = getValue(I.getOperand(1)); 1519 setValue(&I, DAG.getNode(ISD::FCOS, Tmp.getValueType(), Tmp)); 1520 return; 1521 } 1522 } 1523 } 1524 } else if (isa<InlineAsm>(I.getOperand(0))) { 1525 visitInlineAsm(I); 1526 return; 1527 } 1528 1529 SDOperand Callee; 1530 if (!RenameFn) 1531 Callee = getValue(I.getOperand(0)); 1532 else 1533 Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); 1534 std::vector<std::pair<SDOperand, const Type*> > Args; 1535 Args.reserve(I.getNumOperands()); 1536 for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { 1537 Value *Arg = I.getOperand(i); 1538 SDOperand ArgNode = getValue(Arg); 1539 Args.push_back(std::make_pair(ArgNode, Arg->getType())); 1540 } 1541 1542 const PointerType *PT = cast<PointerType>(I.getCalledValue()->getType()); 1543 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1544 1545 std::pair<SDOperand,SDOperand> Result = 1546 TLI.LowerCallTo(getRoot(), I.getType(), FTy->isVarArg(), I.getCallingConv(), 1547 I.isTailCall(), Callee, Args, DAG); 1548 if (I.getType() != Type::VoidTy) 1549 setValue(&I, Result.first); 1550 DAG.setRoot(Result.second); 1551} 1552 1553SDOperand RegsForValue::getCopyFromRegs(SelectionDAG &DAG, 1554 SDOperand &Chain, SDOperand &Flag)const{ 1555 SDOperand Val = DAG.getCopyFromReg(Chain, Regs[0], RegVT, Flag); 1556 Chain = Val.getValue(1); 1557 Flag = Val.getValue(2); 1558 1559 // If the result was expanded, copy from the top part. 1560 if (Regs.size() > 1) { 1561 assert(Regs.size() == 2 && 1562 "Cannot expand to more than 2 elts yet!"); 1563 SDOperand Hi = DAG.getCopyFromReg(Chain, Regs[1], RegVT, Flag); 1564 Chain = Val.getValue(1); 1565 Flag = Val.getValue(2); 1566 if (DAG.getTargetLoweringInfo().isLittleEndian()) 1567 return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Val, Hi); 1568 else 1569 return DAG.getNode(ISD::BUILD_PAIR, ValueVT, Hi, Val); 1570 } 1571 1572 // Otherwise, if the return value was promoted, truncate it to the 1573 // appropriate type. 1574 if (RegVT == ValueVT) 1575 return Val; 1576 1577 if (MVT::isInteger(RegVT)) 1578 return DAG.getNode(ISD::TRUNCATE, ValueVT, Val); 1579 else 1580 return DAG.getNode(ISD::FP_ROUND, ValueVT, Val); 1581} 1582 1583/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 1584/// specified value into the registers specified by this object. This uses 1585/// Chain/Flag as the input and updates them for the output Chain/Flag. 1586void RegsForValue::getCopyToRegs(SDOperand Val, SelectionDAG &DAG, 1587 SDOperand &Chain, SDOperand &Flag) const { 1588 if (Regs.size() == 1) { 1589 // If there is a single register and the types differ, this must be 1590 // a promotion. 1591 if (RegVT != ValueVT) { 1592 if (MVT::isInteger(RegVT)) 1593 Val = DAG.getNode(ISD::ANY_EXTEND, RegVT, Val); 1594 else 1595 Val = DAG.getNode(ISD::FP_EXTEND, RegVT, Val); 1596 } 1597 Chain = DAG.getCopyToReg(Chain, Regs[0], Val, Flag); 1598 Flag = Chain.getValue(1); 1599 } else { 1600 std::vector<unsigned> R(Regs); 1601 if (!DAG.getTargetLoweringInfo().isLittleEndian()) 1602 std::reverse(R.begin(), R.end()); 1603 1604 for (unsigned i = 0, e = R.size(); i != e; ++i) { 1605 SDOperand Part = DAG.getNode(ISD::EXTRACT_ELEMENT, RegVT, Val, 1606 DAG.getConstant(i, MVT::i32)); 1607 Chain = DAG.getCopyToReg(Chain, R[i], Part, Flag); 1608 Flag = Chain.getValue(1); 1609 } 1610 } 1611} 1612 1613/// AddInlineAsmOperands - Add this value to the specified inlineasm node 1614/// operand list. This adds the code marker and includes the number of 1615/// values added into it. 1616void RegsForValue::AddInlineAsmOperands(unsigned Code, SelectionDAG &DAG, 1617 std::vector<SDOperand> &Ops) const { 1618 Ops.push_back(DAG.getConstant(Code | (Regs.size() << 3), MVT::i32)); 1619 for (unsigned i = 0, e = Regs.size(); i != e; ++i) 1620 Ops.push_back(DAG.getRegister(Regs[i], RegVT)); 1621} 1622 1623/// isAllocatableRegister - If the specified register is safe to allocate, 1624/// i.e. it isn't a stack pointer or some other special register, return the 1625/// register class for the register. Otherwise, return null. 1626static const TargetRegisterClass * 1627isAllocatableRegister(unsigned Reg, MachineFunction &MF, 1628 const TargetLowering &TLI, const MRegisterInfo *MRI) { 1629 for (MRegisterInfo::regclass_iterator RCI = MRI->regclass_begin(), 1630 E = MRI->regclass_end(); RCI != E; ++RCI) { 1631 const TargetRegisterClass *RC = *RCI; 1632 // If none of the the value types for this register class are valid, we 1633 // can't use it. For example, 64-bit reg classes on 32-bit targets. 1634 bool isLegal = false; 1635 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); 1636 I != E; ++I) { 1637 if (TLI.isTypeLegal(*I)) { 1638 isLegal = true; 1639 break; 1640 } 1641 } 1642 1643 if (!isLegal) continue; 1644 1645 // NOTE: This isn't ideal. In particular, this might allocate the 1646 // frame pointer in functions that need it (due to them not being taken 1647 // out of allocation, because a variable sized allocation hasn't been seen 1648 // yet). This is a slight code pessimization, but should still work. 1649 for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), 1650 E = RC->allocation_order_end(MF); I != E; ++I) 1651 if (*I == Reg) 1652 return RC; 1653 } 1654 return 0; 1655} 1656 1657RegsForValue SelectionDAGLowering:: 1658GetRegistersForValue(const std::string &ConstrCode, 1659 MVT::ValueType VT, bool isOutReg, bool isInReg, 1660 std::set<unsigned> &OutputRegs, 1661 std::set<unsigned> &InputRegs) { 1662 std::pair<unsigned, const TargetRegisterClass*> PhysReg = 1663 TLI.getRegForInlineAsmConstraint(ConstrCode, VT); 1664 std::vector<unsigned> Regs; 1665 1666 unsigned NumRegs = VT != MVT::Other ? TLI.getNumElements(VT) : 1; 1667 MVT::ValueType RegVT; 1668 MVT::ValueType ValueVT = VT; 1669 1670 if (PhysReg.first) { 1671 if (VT == MVT::Other) 1672 ValueVT = *PhysReg.second->vt_begin(); 1673 RegVT = VT; 1674 1675 // This is a explicit reference to a physical register. 1676 Regs.push_back(PhysReg.first); 1677 1678 // If this is an expanded reference, add the rest of the regs to Regs. 1679 if (NumRegs != 1) { 1680 RegVT = *PhysReg.second->vt_begin(); 1681 TargetRegisterClass::iterator I = PhysReg.second->begin(); 1682 TargetRegisterClass::iterator E = PhysReg.second->end(); 1683 for (; *I != PhysReg.first; ++I) 1684 assert(I != E && "Didn't find reg!"); 1685 1686 // Already added the first reg. 1687 --NumRegs; ++I; 1688 for (; NumRegs; --NumRegs, ++I) { 1689 assert(I != E && "Ran out of registers to allocate!"); 1690 Regs.push_back(*I); 1691 } 1692 } 1693 return RegsForValue(Regs, RegVT, ValueVT); 1694 } 1695 1696 // This is a reference to a register class. Allocate NumRegs consecutive, 1697 // available, registers from the class. 1698 std::vector<unsigned> RegClassRegs = 1699 TLI.getRegClassForInlineAsmConstraint(ConstrCode, VT); 1700 1701 const MRegisterInfo *MRI = DAG.getTarget().getRegisterInfo(); 1702 MachineFunction &MF = *CurMBB->getParent(); 1703 unsigned NumAllocated = 0; 1704 for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) { 1705 unsigned Reg = RegClassRegs[i]; 1706 // See if this register is available. 1707 if ((isOutReg && OutputRegs.count(Reg)) || // Already used. 1708 (isInReg && InputRegs.count(Reg))) { // Already used. 1709 // Make sure we find consecutive registers. 1710 NumAllocated = 0; 1711 continue; 1712 } 1713 1714 // Check to see if this register is allocatable (i.e. don't give out the 1715 // stack pointer). 1716 const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, MRI); 1717 if (!RC) { 1718 // Make sure we find consecutive registers. 1719 NumAllocated = 0; 1720 continue; 1721 } 1722 1723 // Okay, this register is good, we can use it. 1724 ++NumAllocated; 1725 1726 // If we allocated enough consecutive 1727 if (NumAllocated == NumRegs) { 1728 unsigned RegStart = (i-NumAllocated)+1; 1729 unsigned RegEnd = i+1; 1730 // Mark all of the allocated registers used. 1731 for (unsigned i = RegStart; i != RegEnd; ++i) { 1732 unsigned Reg = RegClassRegs[i]; 1733 Regs.push_back(Reg); 1734 if (isOutReg) OutputRegs.insert(Reg); // Mark reg used. 1735 if (isInReg) InputRegs.insert(Reg); // Mark reg used. 1736 } 1737 1738 return RegsForValue(Regs, *RC->vt_begin(), VT); 1739 } 1740 } 1741 1742 // Otherwise, we couldn't allocate enough registers for this. 1743 return RegsForValue(); 1744} 1745 1746 1747/// visitInlineAsm - Handle a call to an InlineAsm object. 1748/// 1749void SelectionDAGLowering::visitInlineAsm(CallInst &I) { 1750 InlineAsm *IA = cast<InlineAsm>(I.getOperand(0)); 1751 1752 SDOperand AsmStr = DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), 1753 MVT::Other); 1754 1755 // Note, we treat inline asms both with and without side-effects as the same. 1756 // If an inline asm doesn't have side effects and doesn't access memory, we 1757 // could not choose to not chain it. 1758 bool hasSideEffects = IA->hasSideEffects(); 1759 1760 std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); 1761 std::vector<MVT::ValueType> ConstraintVTs; 1762 1763 /// AsmNodeOperands - A list of pairs. The first element is a register, the 1764 /// second is a bitfield where bit #0 is set if it is a use and bit #1 is set 1765 /// if it is a def of that register. 1766 std::vector<SDOperand> AsmNodeOperands; 1767 AsmNodeOperands.push_back(SDOperand()); // reserve space for input chain 1768 AsmNodeOperands.push_back(AsmStr); 1769 1770 SDOperand Chain = getRoot(); 1771 SDOperand Flag; 1772 1773 // We fully assign registers here at isel time. This is not optimal, but 1774 // should work. For register classes that correspond to LLVM classes, we 1775 // could let the LLVM RA do its thing, but we currently don't. Do a prepass 1776 // over the constraints, collecting fixed registers that we know we can't use. 1777 std::set<unsigned> OutputRegs, InputRegs; 1778 unsigned OpNum = 1; 1779 for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { 1780 assert(Constraints[i].Codes.size() == 1 && "Only handles one code so far!"); 1781 std::string &ConstraintCode = Constraints[i].Codes[0]; 1782 1783 MVT::ValueType OpVT; 1784 1785 // Compute the value type for each operand and add it to ConstraintVTs. 1786 switch (Constraints[i].Type) { 1787 case InlineAsm::isOutput: 1788 if (!Constraints[i].isIndirectOutput) { 1789 assert(I.getType() != Type::VoidTy && "Bad inline asm!"); 1790 OpVT = TLI.getValueType(I.getType()); 1791 } else { 1792 const Type *OpTy = I.getOperand(OpNum)->getType(); 1793 OpVT = TLI.getValueType(cast<PointerType>(OpTy)->getElementType()); 1794 OpNum++; // Consumes a call operand. 1795 } 1796 break; 1797 case InlineAsm::isInput: 1798 OpVT = TLI.getValueType(I.getOperand(OpNum)->getType()); 1799 OpNum++; // Consumes a call operand. 1800 break; 1801 case InlineAsm::isClobber: 1802 OpVT = MVT::Other; 1803 break; 1804 } 1805 1806 ConstraintVTs.push_back(OpVT); 1807 1808 if (TLI.getRegForInlineAsmConstraint(ConstraintCode, OpVT).first == 0) 1809 continue; // Not assigned a fixed reg. 1810 1811 // Build a list of regs that this operand uses. This always has a single 1812 // element for promoted/expanded operands. 1813 RegsForValue Regs = GetRegistersForValue(ConstraintCode, OpVT, 1814 false, false, 1815 OutputRegs, InputRegs); 1816 1817 switch (Constraints[i].Type) { 1818 case InlineAsm::isOutput: 1819 // We can't assign any other output to this register. 1820 OutputRegs.insert(Regs.Regs.begin(), Regs.Regs.end()); 1821 // If this is an early-clobber output, it cannot be assigned to the same 1822 // value as the input reg. 1823 if (Constraints[i].isEarlyClobber || Constraints[i].hasMatchingInput) 1824 InputRegs.insert(Regs.Regs.begin(), Regs.Regs.end()); 1825 break; 1826 case InlineAsm::isInput: 1827 // We can't assign any other input to this register. 1828 InputRegs.insert(Regs.Regs.begin(), Regs.Regs.end()); 1829 break; 1830 case InlineAsm::isClobber: 1831 // Clobbered regs cannot be used as inputs or outputs. 1832 InputRegs.insert(Regs.Regs.begin(), Regs.Regs.end()); 1833 OutputRegs.insert(Regs.Regs.begin(), Regs.Regs.end()); 1834 break; 1835 } 1836 } 1837 1838 // Loop over all of the inputs, copying the operand values into the 1839 // appropriate registers and processing the output regs. 1840 RegsForValue RetValRegs; 1841 std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; 1842 OpNum = 1; 1843 1844 for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { 1845 assert(Constraints[i].Codes.size() == 1 && "Only handles one code so far!"); 1846 std::string &ConstraintCode = Constraints[i].Codes[0]; 1847 1848 switch (Constraints[i].Type) { 1849 case InlineAsm::isOutput: { 1850 TargetLowering::ConstraintType CTy = TargetLowering::C_RegisterClass; 1851 if (ConstraintCode.size() == 1) // not a physreg name. 1852 CTy = TLI.getConstraintType(ConstraintCode[0]); 1853 1854 if (CTy == TargetLowering::C_Memory) { 1855 // Memory output. 1856 SDOperand InOperandVal = getValue(I.getOperand(OpNum)); 1857 1858 // Check that the operand (the address to store to) isn't a float. 1859 if (!MVT::isInteger(InOperandVal.getValueType())) 1860 assert(0 && "MATCH FAIL!"); 1861 1862 if (!Constraints[i].isIndirectOutput) 1863 assert(0 && "MATCH FAIL!"); 1864 1865 OpNum++; // Consumes a call operand. 1866 1867 // Extend/truncate to the right pointer type if needed. 1868 MVT::ValueType PtrType = TLI.getPointerTy(); 1869 if (InOperandVal.getValueType() < PtrType) 1870 InOperandVal = DAG.getNode(ISD::ZERO_EXTEND, PtrType, InOperandVal); 1871 else if (InOperandVal.getValueType() > PtrType) 1872 InOperandVal = DAG.getNode(ISD::TRUNCATE, PtrType, InOperandVal); 1873 1874 // Add information to the INLINEASM node to know about this output. 1875 unsigned ResOpType = 4/*MEM*/ | (1 << 3); 1876 AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32)); 1877 AsmNodeOperands.push_back(InOperandVal); 1878 break; 1879 } 1880 1881 // Otherwise, this is a register output. 1882 assert(CTy == TargetLowering::C_RegisterClass && "Unknown op type!"); 1883 1884 // If this is an early-clobber output, or if there is an input 1885 // constraint that matches this, we need to reserve the input register 1886 // so no other inputs allocate to it. 1887 bool UsesInputRegister = false; 1888 if (Constraints[i].isEarlyClobber || Constraints[i].hasMatchingInput) 1889 UsesInputRegister = true; 1890 1891 // Copy the output from the appropriate register. Find a register that 1892 // we can use. 1893 RegsForValue Regs = 1894 GetRegistersForValue(ConstraintCode, ConstraintVTs[i], 1895 true, UsesInputRegister, 1896 OutputRegs, InputRegs); 1897 assert(!Regs.Regs.empty() && "Couldn't allocate output reg!"); 1898 1899 if (!Constraints[i].isIndirectOutput) { 1900 assert(RetValRegs.Regs.empty() && 1901 "Cannot have multiple output constraints yet!"); 1902 assert(I.getType() != Type::VoidTy && "Bad inline asm!"); 1903 RetValRegs = Regs; 1904 } else { 1905 IndirectStoresToEmit.push_back(std::make_pair(Regs, 1906 I.getOperand(OpNum))); 1907 OpNum++; // Consumes a call operand. 1908 } 1909 1910 // Add information to the INLINEASM node to know that this register is 1911 // set. 1912 Regs.AddInlineAsmOperands(2 /*REGDEF*/, DAG, AsmNodeOperands); 1913 break; 1914 } 1915 case InlineAsm::isInput: { 1916 SDOperand InOperandVal = getValue(I.getOperand(OpNum)); 1917 OpNum++; // Consumes a call operand. 1918 1919 if (isdigit(ConstraintCode[0])) { // Matching constraint? 1920 // If this is required to match an output register we have already set, 1921 // just use its register. 1922 unsigned OperandNo = atoi(ConstraintCode.c_str()); 1923 1924 // Scan until we find the definition we already emitted of this operand. 1925 // When we find it, create a RegsForValue operand. 1926 unsigned CurOp = 2; // The first operand. 1927 for (; OperandNo; --OperandNo) { 1928 // Advance to the next operand. 1929 unsigned NumOps = 1930 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue(); 1931 assert((NumOps & 7) == 2 /*REGDEF*/ && 1932 "Skipped past definitions?"); 1933 CurOp += (NumOps>>3)+1; 1934 } 1935 1936 unsigned NumOps = 1937 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getValue(); 1938 assert((NumOps & 7) == 2 /*REGDEF*/ && 1939 "Skipped past definitions?"); 1940 1941 // Add NumOps>>3 registers to MatchedRegs. 1942 RegsForValue MatchedRegs; 1943 MatchedRegs.ValueVT = InOperandVal.getValueType(); 1944 MatchedRegs.RegVT = AsmNodeOperands[CurOp+1].getValueType(); 1945 for (unsigned i = 0, e = NumOps>>3; i != e; ++i) { 1946 unsigned Reg=cast<RegisterSDNode>(AsmNodeOperands[++CurOp])->getReg(); 1947 MatchedRegs.Regs.push_back(Reg); 1948 } 1949 1950 // Use the produced MatchedRegs object to 1951 MatchedRegs.getCopyToRegs(InOperandVal, DAG, Chain, Flag); 1952 MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/, DAG, AsmNodeOperands); 1953 break; 1954 } 1955 1956 TargetLowering::ConstraintType CTy = TargetLowering::C_RegisterClass; 1957 if (ConstraintCode.size() == 1) // not a physreg name. 1958 CTy = TLI.getConstraintType(ConstraintCode[0]); 1959 1960 if (CTy == TargetLowering::C_Other) { 1961 if (!TLI.isOperandValidForConstraint(InOperandVal, ConstraintCode[0])) 1962 assert(0 && "MATCH FAIL!"); 1963 1964 // Add information to the INLINEASM node to know about this input. 1965 unsigned ResOpType = 3 /*IMM*/ | (1 << 3); 1966 AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32)); 1967 AsmNodeOperands.push_back(InOperandVal); 1968 break; 1969 } else if (CTy == TargetLowering::C_Memory) { 1970 // Memory input. 1971 1972 // Check that the operand isn't a float. 1973 if (!MVT::isInteger(InOperandVal.getValueType())) 1974 assert(0 && "MATCH FAIL!"); 1975 1976 // Extend/truncate to the right pointer type if needed. 1977 MVT::ValueType PtrType = TLI.getPointerTy(); 1978 if (InOperandVal.getValueType() < PtrType) 1979 InOperandVal = DAG.getNode(ISD::ZERO_EXTEND, PtrType, InOperandVal); 1980 else if (InOperandVal.getValueType() > PtrType) 1981 InOperandVal = DAG.getNode(ISD::TRUNCATE, PtrType, InOperandVal); 1982 1983 // Add information to the INLINEASM node to know about this input. 1984 unsigned ResOpType = 4/*MEM*/ | (1 << 3); 1985 AsmNodeOperands.push_back(DAG.getConstant(ResOpType, MVT::i32)); 1986 AsmNodeOperands.push_back(InOperandVal); 1987 break; 1988 } 1989 1990 assert(CTy == TargetLowering::C_RegisterClass && "Unknown op type!"); 1991 1992 // Copy the input into the appropriate registers. 1993 RegsForValue InRegs = 1994 GetRegistersForValue(ConstraintCode, ConstraintVTs[i], 1995 false, true, OutputRegs, InputRegs); 1996 // FIXME: should be match fail. 1997 assert(!InRegs.Regs.empty() && "Couldn't allocate input reg!"); 1998 1999 InRegs.getCopyToRegs(InOperandVal, DAG, Chain, Flag); 2000 2001 InRegs.AddInlineAsmOperands(1/*REGUSE*/, DAG, AsmNodeOperands); 2002 break; 2003 } 2004 case InlineAsm::isClobber: { 2005 RegsForValue ClobberedRegs = 2006 GetRegistersForValue(ConstraintCode, MVT::Other, false, false, 2007 OutputRegs, InputRegs); 2008 // Add the clobbered value to the operand list, so that the register 2009 // allocator is aware that the physreg got clobbered. 2010 if (!ClobberedRegs.Regs.empty()) 2011 ClobberedRegs.AddInlineAsmOperands(2/*REGDEF*/, DAG, AsmNodeOperands); 2012 break; 2013 } 2014 } 2015 } 2016 2017 // Finish up input operands. 2018 AsmNodeOperands[0] = Chain; 2019 if (Flag.Val) AsmNodeOperands.push_back(Flag); 2020 2021 std::vector<MVT::ValueType> VTs; 2022 VTs.push_back(MVT::Other); 2023 VTs.push_back(MVT::Flag); 2024 Chain = DAG.getNode(ISD::INLINEASM, VTs, AsmNodeOperands); 2025 Flag = Chain.getValue(1); 2026 2027 // If this asm returns a register value, copy the result from that register 2028 // and set it as the value of the call. 2029 if (!RetValRegs.Regs.empty()) 2030 setValue(&I, RetValRegs.getCopyFromRegs(DAG, Chain, Flag)); 2031 2032 std::vector<std::pair<SDOperand, Value*> > StoresToEmit; 2033 2034 // Process indirect outputs, first output all of the flagged copies out of 2035 // physregs. 2036 for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { 2037 RegsForValue &OutRegs = IndirectStoresToEmit[i].first; 2038 Value *Ptr = IndirectStoresToEmit[i].second; 2039 SDOperand OutVal = OutRegs.getCopyFromRegs(DAG, Chain, Flag); 2040 StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); 2041 } 2042 2043 // Emit the non-flagged stores from the physregs. 2044 std::vector<SDOperand> OutChains; 2045 for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) 2046 OutChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 2047 StoresToEmit[i].first, 2048 getValue(StoresToEmit[i].second), 2049 DAG.getSrcValue(StoresToEmit[i].second))); 2050 if (!OutChains.empty()) 2051 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains); 2052 DAG.setRoot(Chain); 2053} 2054 2055 2056void SelectionDAGLowering::visitMalloc(MallocInst &I) { 2057 SDOperand Src = getValue(I.getOperand(0)); 2058 2059 MVT::ValueType IntPtr = TLI.getPointerTy(); 2060 2061 if (IntPtr < Src.getValueType()) 2062 Src = DAG.getNode(ISD::TRUNCATE, IntPtr, Src); 2063 else if (IntPtr > Src.getValueType()) 2064 Src = DAG.getNode(ISD::ZERO_EXTEND, IntPtr, Src); 2065 2066 // Scale the source by the type size. 2067 uint64_t ElementSize = TD.getTypeSize(I.getType()->getElementType()); 2068 Src = DAG.getNode(ISD::MUL, Src.getValueType(), 2069 Src, getIntPtrConstant(ElementSize)); 2070 2071 std::vector<std::pair<SDOperand, const Type*> > Args; 2072 Args.push_back(std::make_pair(Src, TLI.getTargetData().getIntPtrType())); 2073 2074 std::pair<SDOperand,SDOperand> Result = 2075 TLI.LowerCallTo(getRoot(), I.getType(), false, CallingConv::C, true, 2076 DAG.getExternalSymbol("malloc", IntPtr), 2077 Args, DAG); 2078 setValue(&I, Result.first); // Pointers always fit in registers 2079 DAG.setRoot(Result.second); 2080} 2081 2082void SelectionDAGLowering::visitFree(FreeInst &I) { 2083 std::vector<std::pair<SDOperand, const Type*> > Args; 2084 Args.push_back(std::make_pair(getValue(I.getOperand(0)), 2085 TLI.getTargetData().getIntPtrType())); 2086 MVT::ValueType IntPtr = TLI.getPointerTy(); 2087 std::pair<SDOperand,SDOperand> Result = 2088 TLI.LowerCallTo(getRoot(), Type::VoidTy, false, CallingConv::C, true, 2089 DAG.getExternalSymbol("free", IntPtr), Args, DAG); 2090 DAG.setRoot(Result.second); 2091} 2092 2093// InsertAtEndOfBasicBlock - This method should be implemented by targets that 2094// mark instructions with the 'usesCustomDAGSchedInserter' flag. These 2095// instructions are special in various ways, which require special support to 2096// insert. The specified MachineInstr is created but not inserted into any 2097// basic blocks, and the scheduler passes ownership of it to this method. 2098MachineBasicBlock *TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 2099 MachineBasicBlock *MBB) { 2100 std::cerr << "If a target marks an instruction with " 2101 "'usesCustomDAGSchedInserter', it must implement " 2102 "TargetLowering::InsertAtEndOfBasicBlock!\n"; 2103 abort(); 2104 return 0; 2105} 2106 2107void SelectionDAGLowering::visitVAStart(CallInst &I) { 2108 DAG.setRoot(DAG.getNode(ISD::VASTART, MVT::Other, getRoot(), 2109 getValue(I.getOperand(1)), 2110 DAG.getSrcValue(I.getOperand(1)))); 2111} 2112 2113void SelectionDAGLowering::visitVAArg(VAArgInst &I) { 2114 SDOperand V = DAG.getVAArg(TLI.getValueType(I.getType()), getRoot(), 2115 getValue(I.getOperand(0)), 2116 DAG.getSrcValue(I.getOperand(0))); 2117 setValue(&I, V); 2118 DAG.setRoot(V.getValue(1)); 2119} 2120 2121void SelectionDAGLowering::visitVAEnd(CallInst &I) { 2122 DAG.setRoot(DAG.getNode(ISD::VAEND, MVT::Other, getRoot(), 2123 getValue(I.getOperand(1)), 2124 DAG.getSrcValue(I.getOperand(1)))); 2125} 2126 2127void SelectionDAGLowering::visitVACopy(CallInst &I) { 2128 DAG.setRoot(DAG.getNode(ISD::VACOPY, MVT::Other, getRoot(), 2129 getValue(I.getOperand(1)), 2130 getValue(I.getOperand(2)), 2131 DAG.getSrcValue(I.getOperand(1)), 2132 DAG.getSrcValue(I.getOperand(2)))); 2133} 2134 2135// It is always conservatively correct for llvm.returnaddress and 2136// llvm.frameaddress to return 0. 2137std::pair<SDOperand, SDOperand> 2138TargetLowering::LowerFrameReturnAddress(bool isFrameAddr, SDOperand Chain, 2139 unsigned Depth, SelectionDAG &DAG) { 2140 return std::make_pair(DAG.getConstant(0, getPointerTy()), Chain); 2141} 2142 2143SDOperand TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 2144 assert(0 && "LowerOperation not implemented for this target!"); 2145 abort(); 2146 return SDOperand(); 2147} 2148 2149SDOperand TargetLowering::CustomPromoteOperation(SDOperand Op, 2150 SelectionDAG &DAG) { 2151 assert(0 && "CustomPromoteOperation not implemented for this target!"); 2152 abort(); 2153 return SDOperand(); 2154} 2155 2156void SelectionDAGLowering::visitFrameReturnAddress(CallInst &I, bool isFrame) { 2157 unsigned Depth = (unsigned)cast<ConstantUInt>(I.getOperand(1))->getValue(); 2158 std::pair<SDOperand,SDOperand> Result = 2159 TLI.LowerFrameReturnAddress(isFrame, getRoot(), Depth, DAG); 2160 setValue(&I, Result.first); 2161 DAG.setRoot(Result.second); 2162} 2163 2164/// getMemsetValue - Vectorized representation of the memset value 2165/// operand. 2166static SDOperand getMemsetValue(SDOperand Value, MVT::ValueType VT, 2167 SelectionDAG &DAG) { 2168 MVT::ValueType CurVT = VT; 2169 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { 2170 uint64_t Val = C->getValue() & 255; 2171 unsigned Shift = 8; 2172 while (CurVT != MVT::i8) { 2173 Val = (Val << Shift) | Val; 2174 Shift <<= 1; 2175 CurVT = (MVT::ValueType)((unsigned)CurVT - 1); 2176 } 2177 return DAG.getConstant(Val, VT); 2178 } else { 2179 Value = DAG.getNode(ISD::ZERO_EXTEND, VT, Value); 2180 unsigned Shift = 8; 2181 while (CurVT != MVT::i8) { 2182 Value = 2183 DAG.getNode(ISD::OR, VT, 2184 DAG.getNode(ISD::SHL, VT, Value, 2185 DAG.getConstant(Shift, MVT::i8)), Value); 2186 Shift <<= 1; 2187 CurVT = (MVT::ValueType)((unsigned)CurVT - 1); 2188 } 2189 2190 return Value; 2191 } 2192} 2193 2194/// getMemsetStringVal - Similar to getMemsetValue. Except this is only 2195/// used when a memcpy is turned into a memset when the source is a constant 2196/// string ptr. 2197static SDOperand getMemsetStringVal(MVT::ValueType VT, 2198 SelectionDAG &DAG, TargetLowering &TLI, 2199 std::string &Str, unsigned Offset) { 2200 MVT::ValueType CurVT = VT; 2201 uint64_t Val = 0; 2202 unsigned MSB = getSizeInBits(VT) / 8; 2203 if (TLI.isLittleEndian()) 2204 Offset = Offset + MSB - 1; 2205 for (unsigned i = 0; i != MSB; ++i) { 2206 Val = (Val << 8) | Str[Offset]; 2207 Offset += TLI.isLittleEndian() ? -1 : 1; 2208 } 2209 return DAG.getConstant(Val, VT); 2210} 2211 2212/// getMemBasePlusOffset - Returns base and offset node for the 2213static SDOperand getMemBasePlusOffset(SDOperand Base, unsigned Offset, 2214 SelectionDAG &DAG, TargetLowering &TLI) { 2215 MVT::ValueType VT = Base.getValueType(); 2216 return DAG.getNode(ISD::ADD, VT, Base, DAG.getConstant(Offset, VT)); 2217} 2218 2219/// MeetsMaxMemopRequirement - Determines if the number of memory ops required 2220/// to replace the memset / memcpy is below the threshold. It also returns the 2221/// types of the sequence of memory ops to perform memset / memcpy. 2222static bool MeetsMaxMemopRequirement(std::vector<MVT::ValueType> &MemOps, 2223 unsigned Limit, uint64_t Size, 2224 unsigned Align, TargetLowering &TLI) { 2225 MVT::ValueType VT; 2226 2227 if (TLI.allowsUnalignedMemoryAccesses()) { 2228 VT = MVT::i64; 2229 } else { 2230 switch (Align & 7) { 2231 case 0: 2232 VT = MVT::i64; 2233 break; 2234 case 4: 2235 VT = MVT::i32; 2236 break; 2237 case 2: 2238 VT = MVT::i16; 2239 break; 2240 default: 2241 VT = MVT::i8; 2242 break; 2243 } 2244 } 2245 2246 MVT::ValueType LVT = MVT::i64; 2247 while (!TLI.isTypeLegal(LVT)) 2248 LVT = (MVT::ValueType)((unsigned)LVT - 1); 2249 assert(MVT::isInteger(LVT)); 2250 2251 if (VT > LVT) 2252 VT = LVT; 2253 2254 unsigned NumMemOps = 0; 2255 while (Size != 0) { 2256 unsigned VTSize = getSizeInBits(VT) / 8; 2257 while (VTSize > Size) { 2258 VT = (MVT::ValueType)((unsigned)VT - 1); 2259 VTSize >>= 1; 2260 } 2261 assert(MVT::isInteger(VT)); 2262 2263 if (++NumMemOps > Limit) 2264 return false; 2265 MemOps.push_back(VT); 2266 Size -= VTSize; 2267 } 2268 2269 return true; 2270} 2271 2272void SelectionDAGLowering::visitMemIntrinsic(CallInst &I, unsigned Op) { 2273 SDOperand Op1 = getValue(I.getOperand(1)); 2274 SDOperand Op2 = getValue(I.getOperand(2)); 2275 SDOperand Op3 = getValue(I.getOperand(3)); 2276 SDOperand Op4 = getValue(I.getOperand(4)); 2277 unsigned Align = (unsigned)cast<ConstantSDNode>(Op4)->getValue(); 2278 if (Align == 0) Align = 1; 2279 2280 if (ConstantSDNode *Size = dyn_cast<ConstantSDNode>(Op3)) { 2281 std::vector<MVT::ValueType> MemOps; 2282 2283 // Expand memset / memcpy to a series of load / store ops 2284 // if the size operand falls below a certain threshold. 2285 std::vector<SDOperand> OutChains; 2286 switch (Op) { 2287 default: break; // Do nothing for now. 2288 case ISD::MEMSET: { 2289 if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemset(), 2290 Size->getValue(), Align, TLI)) { 2291 unsigned NumMemOps = MemOps.size(); 2292 unsigned Offset = 0; 2293 for (unsigned i = 0; i < NumMemOps; i++) { 2294 MVT::ValueType VT = MemOps[i]; 2295 unsigned VTSize = getSizeInBits(VT) / 8; 2296 SDOperand Value = getMemsetValue(Op2, VT, DAG); 2297 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, getRoot(), 2298 Value, 2299 getMemBasePlusOffset(Op1, Offset, DAG, TLI), 2300 DAG.getSrcValue(I.getOperand(1), Offset)); 2301 OutChains.push_back(Store); 2302 Offset += VTSize; 2303 } 2304 } 2305 break; 2306 } 2307 case ISD::MEMCPY: { 2308 if (MeetsMaxMemopRequirement(MemOps, TLI.getMaxStoresPerMemcpy(), 2309 Size->getValue(), Align, TLI)) { 2310 unsigned NumMemOps = MemOps.size(); 2311 unsigned SrcOff = 0, DstOff = 0, SrcDelta = 0; 2312 GlobalAddressSDNode *G = NULL; 2313 std::string Str; 2314 bool CopyFromStr = false; 2315 2316 if (Op2.getOpcode() == ISD::GlobalAddress) 2317 G = cast<GlobalAddressSDNode>(Op2); 2318 else if (Op2.getOpcode() == ISD::ADD && 2319 Op2.getOperand(0).getOpcode() == ISD::GlobalAddress && 2320 Op2.getOperand(1).getOpcode() == ISD::Constant) { 2321 G = cast<GlobalAddressSDNode>(Op2.getOperand(0)); 2322 SrcDelta = cast<ConstantSDNode>(Op2.getOperand(1))->getValue(); 2323 } 2324 if (G) { 2325 GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal()); 2326 if (GV) { 2327 Str = GV->getStringValue(false); 2328 if (!Str.empty()) { 2329 CopyFromStr = true; 2330 SrcOff += SrcDelta; 2331 } 2332 } 2333 } 2334 2335 for (unsigned i = 0; i < NumMemOps; i++) { 2336 MVT::ValueType VT = MemOps[i]; 2337 unsigned VTSize = getSizeInBits(VT) / 8; 2338 SDOperand Value, Chain, Store; 2339 2340 if (CopyFromStr) { 2341 Value = getMemsetStringVal(VT, DAG, TLI, Str, SrcOff); 2342 Chain = getRoot(); 2343 Store = 2344 DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2345 getMemBasePlusOffset(Op1, DstOff, DAG, TLI), 2346 DAG.getSrcValue(I.getOperand(1), DstOff)); 2347 } else { 2348 Value = DAG.getLoad(VT, getRoot(), 2349 getMemBasePlusOffset(Op2, SrcOff, DAG, TLI), 2350 DAG.getSrcValue(I.getOperand(2), SrcOff)); 2351 Chain = Value.getValue(1); 2352 Store = 2353 DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, 2354 getMemBasePlusOffset(Op1, DstOff, DAG, TLI), 2355 DAG.getSrcValue(I.getOperand(1), DstOff)); 2356 } 2357 OutChains.push_back(Store); 2358 SrcOff += VTSize; 2359 DstOff += VTSize; 2360 } 2361 } 2362 break; 2363 } 2364 } 2365 2366 if (!OutChains.empty()) { 2367 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, OutChains)); 2368 return; 2369 } 2370 } 2371 2372 std::vector<SDOperand> Ops; 2373 Ops.push_back(getRoot()); 2374 Ops.push_back(Op1); 2375 Ops.push_back(Op2); 2376 Ops.push_back(Op3); 2377 Ops.push_back(Op4); 2378 DAG.setRoot(DAG.getNode(Op, MVT::Other, Ops)); 2379} 2380 2381//===----------------------------------------------------------------------===// 2382// SelectionDAGISel code 2383//===----------------------------------------------------------------------===// 2384 2385unsigned SelectionDAGISel::MakeReg(MVT::ValueType VT) { 2386 return RegMap->createVirtualRegister(TLI.getRegClassFor(VT)); 2387} 2388 2389void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { 2390 // FIXME: we only modify the CFG to split critical edges. This 2391 // updates dom and loop info. 2392} 2393 2394 2395/// InsertGEPComputeCode - Insert code into BB to compute Ptr+PtrOffset, 2396/// casting to the type of GEPI. 2397static Value *InsertGEPComputeCode(Value *&V, BasicBlock *BB, Instruction *GEPI, 2398 Value *Ptr, Value *PtrOffset) { 2399 if (V) return V; // Already computed. 2400 2401 BasicBlock::iterator InsertPt; 2402 if (BB == GEPI->getParent()) { 2403 // If insert into the GEP's block, insert right after the GEP. 2404 InsertPt = GEPI; 2405 ++InsertPt; 2406 } else { 2407 // Otherwise, insert at the top of BB, after any PHI nodes 2408 InsertPt = BB->begin(); 2409 while (isa<PHINode>(InsertPt)) ++InsertPt; 2410 } 2411 2412 // If Ptr is itself a cast, but in some other BB, emit a copy of the cast into 2413 // BB so that there is only one value live across basic blocks (the cast 2414 // operand). 2415 if (CastInst *CI = dyn_cast<CastInst>(Ptr)) 2416 if (CI->getParent() != BB && isa<PointerType>(CI->getOperand(0)->getType())) 2417 Ptr = new CastInst(CI->getOperand(0), CI->getType(), "", InsertPt); 2418 2419 // Add the offset, cast it to the right type. 2420 Ptr = BinaryOperator::createAdd(Ptr, PtrOffset, "", InsertPt); 2421 Ptr = new CastInst(Ptr, GEPI->getType(), "", InsertPt); 2422 return V = Ptr; 2423} 2424 2425 2426/// OptimizeGEPExpression - Since we are doing basic-block-at-a-time instruction 2427/// selection, we want to be a bit careful about some things. In particular, if 2428/// we have a GEP instruction that is used in a different block than it is 2429/// defined, the addressing expression of the GEP cannot be folded into loads or 2430/// stores that use it. In this case, decompose the GEP and move constant 2431/// indices into blocks that use it. 2432static void OptimizeGEPExpression(GetElementPtrInst *GEPI, 2433 const TargetData &TD) { 2434 // If this GEP is only used inside the block it is defined in, there is no 2435 // need to rewrite it. 2436 bool isUsedOutsideDefBB = false; 2437 BasicBlock *DefBB = GEPI->getParent(); 2438 for (Value::use_iterator UI = GEPI->use_begin(), E = GEPI->use_end(); 2439 UI != E; ++UI) { 2440 if (cast<Instruction>(*UI)->getParent() != DefBB) { 2441 isUsedOutsideDefBB = true; 2442 break; 2443 } 2444 } 2445 if (!isUsedOutsideDefBB) return; 2446 2447 // If this GEP has no non-zero constant indices, there is nothing we can do, 2448 // ignore it. 2449 bool hasConstantIndex = false; 2450 for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, 2451 E = GEPI->op_end(); OI != E; ++OI) { 2452 if (ConstantInt *CI = dyn_cast<ConstantInt>(*OI)) 2453 if (CI->getRawValue()) { 2454 hasConstantIndex = true; 2455 break; 2456 } 2457 } 2458 // If this is a GEP &Alloca, 0, 0, forward subst the frame index into uses. 2459 if (!hasConstantIndex && !isa<AllocaInst>(GEPI->getOperand(0))) return; 2460 2461 // Otherwise, decompose the GEP instruction into multiplies and adds. Sum the 2462 // constant offset (which we now know is non-zero) and deal with it later. 2463 uint64_t ConstantOffset = 0; 2464 const Type *UIntPtrTy = TD.getIntPtrType(); 2465 Value *Ptr = new CastInst(GEPI->getOperand(0), UIntPtrTy, "", GEPI); 2466 const Type *Ty = GEPI->getOperand(0)->getType(); 2467 2468 for (GetElementPtrInst::op_iterator OI = GEPI->op_begin()+1, 2469 E = GEPI->op_end(); OI != E; ++OI) { 2470 Value *Idx = *OI; 2471 if (const StructType *StTy = dyn_cast<StructType>(Ty)) { 2472 unsigned Field = cast<ConstantUInt>(Idx)->getValue(); 2473 if (Field) 2474 ConstantOffset += TD.getStructLayout(StTy)->MemberOffsets[Field]; 2475 Ty = StTy->getElementType(Field); 2476 } else { 2477 Ty = cast<SequentialType>(Ty)->getElementType(); 2478 2479 // Handle constant subscripts. 2480 if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { 2481 if (CI->getRawValue() == 0) continue; 2482 2483 if (ConstantSInt *CSI = dyn_cast<ConstantSInt>(CI)) 2484 ConstantOffset += (int64_t)TD.getTypeSize(Ty)*CSI->getValue(); 2485 else 2486 ConstantOffset+=TD.getTypeSize(Ty)*cast<ConstantUInt>(CI)->getValue(); 2487 continue; 2488 } 2489 2490 // Ptr = Ptr + Idx * ElementSize; 2491 2492 // Cast Idx to UIntPtrTy if needed. 2493 Idx = new CastInst(Idx, UIntPtrTy, "", GEPI); 2494 2495 uint64_t ElementSize = TD.getTypeSize(Ty); 2496 // Mask off bits that should not be set. 2497 ElementSize &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits()); 2498 Constant *SizeCst = ConstantUInt::get(UIntPtrTy, ElementSize); 2499 2500 // Multiply by the element size and add to the base. 2501 Idx = BinaryOperator::createMul(Idx, SizeCst, "", GEPI); 2502 Ptr = BinaryOperator::createAdd(Ptr, Idx, "", GEPI); 2503 } 2504 } 2505 2506 // Make sure that the offset fits in uintptr_t. 2507 ConstantOffset &= ~0ULL >> (64-UIntPtrTy->getPrimitiveSizeInBits()); 2508 Constant *PtrOffset = ConstantUInt::get(UIntPtrTy, ConstantOffset); 2509 2510 // Okay, we have now emitted all of the variable index parts to the BB that 2511 // the GEP is defined in. Loop over all of the using instructions, inserting 2512 // an "add Ptr, ConstantOffset" into each block that uses it and update the 2513 // instruction to use the newly computed value, making GEPI dead. When the 2514 // user is a load or store instruction address, we emit the add into the user 2515 // block, otherwise we use a canonical version right next to the gep (these 2516 // won't be foldable as addresses, so we might as well share the computation). 2517 2518 std::map<BasicBlock*,Value*> InsertedExprs; 2519 while (!GEPI->use_empty()) { 2520 Instruction *User = cast<Instruction>(GEPI->use_back()); 2521 2522 // If this use is not foldable into the addressing mode, use a version 2523 // emitted in the GEP block. 2524 Value *NewVal; 2525 if (!isa<LoadInst>(User) && 2526 (!isa<StoreInst>(User) || User->getOperand(0) == GEPI)) { 2527 NewVal = InsertGEPComputeCode(InsertedExprs[DefBB], DefBB, GEPI, 2528 Ptr, PtrOffset); 2529 } else { 2530 // Otherwise, insert the code in the User's block so it can be folded into 2531 // any users in that block. 2532 NewVal = InsertGEPComputeCode(InsertedExprs[User->getParent()], 2533 User->getParent(), GEPI, 2534 Ptr, PtrOffset); 2535 } 2536 User->replaceUsesOfWith(GEPI, NewVal); 2537 } 2538 2539 // Finally, the GEP is dead, remove it. 2540 GEPI->eraseFromParent(); 2541} 2542 2543bool SelectionDAGISel::runOnFunction(Function &Fn) { 2544 MachineFunction &MF = MachineFunction::construct(&Fn, TLI.getTargetMachine()); 2545 RegMap = MF.getSSARegMap(); 2546 DEBUG(std::cerr << "\n\n\n=== " << Fn.getName() << "\n"); 2547 2548 // First, split all critical edges for PHI nodes with incoming values that are 2549 // constants, this way the load of the constant into a vreg will not be placed 2550 // into MBBs that are used some other way. 2551 // 2552 // In this pass we also look for GEP instructions that are used across basic 2553 // blocks and rewrites them to improve basic-block-at-a-time selection. 2554 // 2555 for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 2556 PHINode *PN; 2557 BasicBlock::iterator BBI; 2558 for (BBI = BB->begin(); (PN = dyn_cast<PHINode>(BBI)); ++BBI) 2559 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 2560 if (isa<Constant>(PN->getIncomingValue(i))) 2561 SplitCriticalEdge(PN->getIncomingBlock(i), BB); 2562 2563 for (BasicBlock::iterator E = BB->end(); BBI != E; ) 2564 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(BBI++)) 2565 OptimizeGEPExpression(GEPI, TLI.getTargetData()); 2566 } 2567 2568 FunctionLoweringInfo FuncInfo(TLI, Fn, MF); 2569 2570 for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) 2571 SelectBasicBlock(I, MF, FuncInfo); 2572 2573 return true; 2574} 2575 2576 2577SDOperand SelectionDAGISel:: 2578CopyValueToVirtualRegister(SelectionDAGLowering &SDL, Value *V, unsigned Reg) { 2579 SDOperand Op = SDL.getValue(V); 2580 assert((Op.getOpcode() != ISD::CopyFromReg || 2581 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && 2582 "Copy from a reg to the same reg!"); 2583 2584 // If this type is not legal, we must make sure to not create an invalid 2585 // register use. 2586 MVT::ValueType SrcVT = Op.getValueType(); 2587 MVT::ValueType DestVT = TLI.getTypeToTransformTo(SrcVT); 2588 SelectionDAG &DAG = SDL.DAG; 2589 if (SrcVT == DestVT) { 2590 return DAG.getCopyToReg(SDL.getRoot(), Reg, Op); 2591 } else if (SrcVT == MVT::Vector) { 2592 // FIXME: THIS DOES NOT SUPPORT PROMOTED/EXPANDED ELEMENTS! 2593 2594 // Figure out the right, legal destination reg to copy into. 2595 const PackedType *PTy = cast<PackedType>(V->getType()); 2596 unsigned NumElts = PTy->getNumElements(); 2597 MVT::ValueType EltTy = TLI.getValueType(PTy->getElementType()); 2598 2599 unsigned NumVectorRegs = 1; 2600 2601 // Divide the input until we get to a supported size. This will always 2602 // end with a scalar if the target doesn't support vectors. 2603 while (NumElts > 1 && !TLI.isTypeLegal(getVectorType(EltTy, NumElts))) { 2604 NumElts >>= 1; 2605 NumVectorRegs <<= 1; 2606 } 2607 2608 MVT::ValueType VT; 2609 if (NumElts == 1) 2610 VT = EltTy; 2611 else 2612 VT = getVectorType(EltTy, NumElts); 2613 2614 // FIXME: THIS ASSUMES THAT THE INPUT VECTOR WILL BE LEGAL! 2615 Op = DAG.getNode(ISD::BIT_CONVERT, VT, Op); 2616 return DAG.getCopyToReg(SDL.getRoot(), Reg, Op); 2617 } else if (SrcVT < DestVT) { 2618 // The src value is promoted to the register. 2619 if (MVT::isFloatingPoint(SrcVT)) 2620 Op = DAG.getNode(ISD::FP_EXTEND, DestVT, Op); 2621 else 2622 Op = DAG.getNode(ISD::ANY_EXTEND, DestVT, Op); 2623 return DAG.getCopyToReg(SDL.getRoot(), Reg, Op); 2624 } else { 2625 // The src value is expanded into multiple registers. 2626 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT, 2627 Op, DAG.getConstant(0, MVT::i32)); 2628 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DestVT, 2629 Op, DAG.getConstant(1, MVT::i32)); 2630 Op = DAG.getCopyToReg(SDL.getRoot(), Reg, Lo); 2631 return DAG.getCopyToReg(Op, Reg+1, Hi); 2632 } 2633} 2634 2635void SelectionDAGISel:: 2636LowerArguments(BasicBlock *BB, SelectionDAGLowering &SDL, 2637 std::vector<SDOperand> &UnorderedChains) { 2638 // If this is the entry block, emit arguments. 2639 Function &F = *BB->getParent(); 2640 FunctionLoweringInfo &FuncInfo = SDL.FuncInfo; 2641 SDOperand OldRoot = SDL.DAG.getRoot(); 2642 std::vector<SDOperand> Args = TLI.LowerArguments(F, SDL.DAG); 2643 2644 unsigned a = 0; 2645 for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); 2646 AI != E; ++AI, ++a) 2647 if (!AI->use_empty()) { 2648 SDL.setValue(AI, Args[a]); 2649 2650 // If this argument is live outside of the entry block, insert a copy from 2651 // whereever we got it to the vreg that other BB's will reference it as. 2652 if (FuncInfo.ValueMap.count(AI)) { 2653 SDOperand Copy = 2654 CopyValueToVirtualRegister(SDL, AI, FuncInfo.ValueMap[AI]); 2655 UnorderedChains.push_back(Copy); 2656 } 2657 } 2658 2659 // Next, if the function has live ins that need to be copied into vregs, 2660 // emit the copies now, into the top of the block. 2661 MachineFunction &MF = SDL.DAG.getMachineFunction(); 2662 if (MF.livein_begin() != MF.livein_end()) { 2663 SSARegMap *RegMap = MF.getSSARegMap(); 2664 const MRegisterInfo &MRI = *MF.getTarget().getRegisterInfo(); 2665 for (MachineFunction::livein_iterator LI = MF.livein_begin(), 2666 E = MF.livein_end(); LI != E; ++LI) 2667 if (LI->second) 2668 MRI.copyRegToReg(*MF.begin(), MF.begin()->end(), LI->second, 2669 LI->first, RegMap->getRegClass(LI->second)); 2670 } 2671 2672 // Finally, if the target has anything special to do, allow it to do so. 2673 EmitFunctionEntryCode(F, SDL.DAG.getMachineFunction()); 2674} 2675 2676 2677void SelectionDAGISel::BuildSelectionDAG(SelectionDAG &DAG, BasicBlock *LLVMBB, 2678 std::vector<std::pair<MachineInstr*, unsigned> > &PHINodesToUpdate, 2679 FunctionLoweringInfo &FuncInfo) { 2680 SelectionDAGLowering SDL(DAG, TLI, FuncInfo); 2681 2682 std::vector<SDOperand> UnorderedChains; 2683 2684 // Lower any arguments needed in this block if this is the entry block. 2685 if (LLVMBB == &LLVMBB->getParent()->front()) 2686 LowerArguments(LLVMBB, SDL, UnorderedChains); 2687 2688 BB = FuncInfo.MBBMap[LLVMBB]; 2689 SDL.setCurrentBasicBlock(BB); 2690 2691 // Lower all of the non-terminator instructions. 2692 for (BasicBlock::iterator I = LLVMBB->begin(), E = --LLVMBB->end(); 2693 I != E; ++I) 2694 SDL.visit(*I); 2695 2696 // Ensure that all instructions which are used outside of their defining 2697 // blocks are available as virtual registers. 2698 for (BasicBlock::iterator I = LLVMBB->begin(), E = LLVMBB->end(); I != E;++I) 2699 if (!I->use_empty() && !isa<PHINode>(I)) { 2700 std::map<const Value*, unsigned>::iterator VMI =FuncInfo.ValueMap.find(I); 2701 if (VMI != FuncInfo.ValueMap.end()) 2702 UnorderedChains.push_back( 2703 CopyValueToVirtualRegister(SDL, I, VMI->second)); 2704 } 2705 2706 // Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to 2707 // ensure constants are generated when needed. Remember the virtual registers 2708 // that need to be added to the Machine PHI nodes as input. We cannot just 2709 // directly add them, because expansion might result in multiple MBB's for one 2710 // BB. As such, the start of the BB might correspond to a different MBB than 2711 // the end. 2712 // 2713 2714 // Emit constants only once even if used by multiple PHI nodes. 2715 std::map<Constant*, unsigned> ConstantsOut; 2716 2717 // Check successor nodes PHI nodes that expect a constant to be available from 2718 // this block. 2719 TerminatorInst *TI = LLVMBB->getTerminator(); 2720 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { 2721 BasicBlock *SuccBB = TI->getSuccessor(succ); 2722 MachineBasicBlock::iterator MBBI = FuncInfo.MBBMap[SuccBB]->begin(); 2723 PHINode *PN; 2724 2725 // At this point we know that there is a 1-1 correspondence between LLVM PHI 2726 // nodes and Machine PHI nodes, but the incoming operands have not been 2727 // emitted yet. 2728 for (BasicBlock::iterator I = SuccBB->begin(); 2729 (PN = dyn_cast<PHINode>(I)); ++I) 2730 if (!PN->use_empty()) { 2731 unsigned Reg; 2732 Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); 2733 if (Constant *C = dyn_cast<Constant>(PHIOp)) { 2734 unsigned &RegOut = ConstantsOut[C]; 2735 if (RegOut == 0) { 2736 RegOut = FuncInfo.CreateRegForValue(C); 2737 UnorderedChains.push_back( 2738 CopyValueToVirtualRegister(SDL, C, RegOut)); 2739 } 2740 Reg = RegOut; 2741 } else { 2742 Reg = FuncInfo.ValueMap[PHIOp]; 2743 if (Reg == 0) { 2744 assert(isa<AllocaInst>(PHIOp) && 2745 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && 2746 "Didn't codegen value into a register!??"); 2747 Reg = FuncInfo.CreateRegForValue(PHIOp); 2748 UnorderedChains.push_back( 2749 CopyValueToVirtualRegister(SDL, PHIOp, Reg)); 2750 } 2751 } 2752 2753 // Remember that this register needs to added to the machine PHI node as 2754 // the input for this MBB. 2755 unsigned NumElements = 2756 TLI.getNumElements(TLI.getValueType(PN->getType())); 2757 for (unsigned i = 0, e = NumElements; i != e; ++i) 2758 PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); 2759 } 2760 } 2761 ConstantsOut.clear(); 2762 2763 // Turn all of the unordered chains into one factored node. 2764 if (!UnorderedChains.empty()) { 2765 SDOperand Root = SDL.getRoot(); 2766 if (Root.getOpcode() != ISD::EntryToken) { 2767 unsigned i = 0, e = UnorderedChains.size(); 2768 for (; i != e; ++i) { 2769 assert(UnorderedChains[i].Val->getNumOperands() > 1); 2770 if (UnorderedChains[i].Val->getOperand(0) == Root) 2771 break; // Don't add the root if we already indirectly depend on it. 2772 } 2773 2774 if (i == e) 2775 UnorderedChains.push_back(Root); 2776 } 2777 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, UnorderedChains)); 2778 } 2779 2780 // Lower the terminator after the copies are emitted. 2781 SDL.visit(*LLVMBB->getTerminator()); 2782 2783 // Copy over any CaseBlock records that may now exist due to SwitchInst 2784 // lowering. 2785 SwitchCases.clear(); 2786 SwitchCases = SDL.SwitchCases; 2787 2788 // Make sure the root of the DAG is up-to-date. 2789 DAG.setRoot(SDL.getRoot()); 2790} 2791 2792void SelectionDAGISel::CodeGenAndEmitDAG(SelectionDAG &DAG) { 2793 // Run the DAG combiner in pre-legalize mode. 2794 DAG.Combine(false); 2795 2796 DEBUG(std::cerr << "Lowered selection DAG:\n"); 2797 DEBUG(DAG.dump()); 2798 2799 // Second step, hack on the DAG until it only uses operations and types that 2800 // the target supports. 2801 DAG.Legalize(); 2802 2803 DEBUG(std::cerr << "Legalized selection DAG:\n"); 2804 DEBUG(DAG.dump()); 2805 2806 // Run the DAG combiner in post-legalize mode. 2807 DAG.Combine(true); 2808 2809 if (ViewISelDAGs) DAG.viewGraph(); 2810 2811 // Third, instruction select all of the operations to machine code, adding the 2812 // code to the MachineBasicBlock. 2813 InstructionSelectBasicBlock(DAG); 2814 2815 DEBUG(std::cerr << "Selected machine code:\n"); 2816 DEBUG(BB->dump()); 2817} 2818 2819void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, MachineFunction &MF, 2820 FunctionLoweringInfo &FuncInfo) { 2821 std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate; 2822 { 2823 SelectionDAG DAG(TLI, MF, getAnalysisToUpdate<MachineDebugInfo>()); 2824 CurDAG = &DAG; 2825 2826 // First step, lower LLVM code to some DAG. This DAG may use operations and 2827 // types that are not supported by the target. 2828 BuildSelectionDAG(DAG, LLVMBB, PHINodesToUpdate, FuncInfo); 2829 2830 // Second step, emit the lowered DAG as machine code. 2831 CodeGenAndEmitDAG(DAG); 2832 } 2833 2834 // Next, now that we know what the last MBB the LLVM BB expanded is, update 2835 // PHI nodes in successors. 2836 if (SwitchCases.empty()) { 2837 for (unsigned i = 0, e = PHINodesToUpdate.size(); i != e; ++i) { 2838 MachineInstr *PHI = PHINodesToUpdate[i].first; 2839 assert(PHI->getOpcode() == TargetInstrInfo::PHI && 2840 "This is not a machine PHI node that we are updating!"); 2841 PHI->addRegOperand(PHINodesToUpdate[i].second); 2842 PHI->addMachineBasicBlockOperand(BB); 2843 } 2844 return; 2845 } 2846 2847 // If we generated any switch lowering information, build and codegen any 2848 // additional DAGs necessary. 2849 for(unsigned i = 0, e = SwitchCases.size(); i != e; ++i) { 2850 SelectionDAG SDAG(TLI, MF, getAnalysisToUpdate<MachineDebugInfo>()); 2851 CurDAG = &SDAG; 2852 SelectionDAGLowering SDL(SDAG, TLI, FuncInfo); 2853 // Set the current basic block to the mbb we wish to insert the code into 2854 BB = SwitchCases[i].ThisBB; 2855 SDL.setCurrentBasicBlock(BB); 2856 // Emit the code 2857 SDL.visitSwitchCase(SwitchCases[i]); 2858 SDAG.setRoot(SDL.getRoot()); 2859 CodeGenAndEmitDAG(SDAG); 2860 // Iterate over the phi nodes, if there is a phi node in a successor of this 2861 // block (for instance, the default block), then add a pair of operands to 2862 // the phi node for this block, as if we were coming from the original 2863 // BB before switch expansion. 2864 for (unsigned pi = 0, pe = PHINodesToUpdate.size(); pi != pe; ++pi) { 2865 MachineInstr *PHI = PHINodesToUpdate[pi].first; 2866 MachineBasicBlock *PHIBB = PHI->getParent(); 2867 assert(PHI->getOpcode() == TargetInstrInfo::PHI && 2868 "This is not a machine PHI node that we are updating!"); 2869 if (PHIBB == SwitchCases[i].LHSBB || PHIBB == SwitchCases[i].RHSBB) { 2870 PHI->addRegOperand(PHINodesToUpdate[pi].second); 2871 PHI->addMachineBasicBlockOperand(BB); 2872 } 2873 } 2874 } 2875} 2876 2877//===----------------------------------------------------------------------===// 2878/// ScheduleAndEmitDAG - Pick a safe ordering and emit instructions for each 2879/// target node in the graph. 2880void SelectionDAGISel::ScheduleAndEmitDAG(SelectionDAG &DAG) { 2881 if (ViewSchedDAGs) DAG.viewGraph(); 2882 ScheduleDAG *SL = NULL; 2883 2884 switch (ISHeuristic) { 2885 default: assert(0 && "Unrecognized scheduling heuristic"); 2886 case defaultScheduling: 2887 if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency) 2888 SL = createSimpleDAGScheduler(noScheduling, DAG, BB); 2889 else /* TargetLowering::SchedulingForRegPressure */ 2890 SL = createBURRListDAGScheduler(DAG, BB); 2891 break; 2892 case noScheduling: 2893 SL = createBFS_DAGScheduler(DAG, BB); 2894 break; 2895 case simpleScheduling: 2896 SL = createSimpleDAGScheduler(false, DAG, BB); 2897 break; 2898 case simpleNoItinScheduling: 2899 SL = createSimpleDAGScheduler(true, DAG, BB); 2900 break; 2901 case listSchedulingBURR: 2902 SL = createBURRListDAGScheduler(DAG, BB); 2903 break; 2904 case listSchedulingTD: 2905 SL = createTDListDAGScheduler(DAG, BB, CreateTargetHazardRecognizer()); 2906 break; 2907 } 2908 BB = SL->Run(); 2909 delete SL; 2910} 2911 2912HazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { 2913 return new HazardRecognizer(); 2914} 2915 2916/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated 2917/// by tblgen. Others should not call it. 2918void SelectionDAGISel:: 2919SelectInlineAsmMemoryOperands(std::vector<SDOperand> &Ops, SelectionDAG &DAG) { 2920 std::vector<SDOperand> InOps; 2921 std::swap(InOps, Ops); 2922 2923 Ops.push_back(InOps[0]); // input chain. 2924 Ops.push_back(InOps[1]); // input asm string. 2925 2926 const char *AsmStr = cast<ExternalSymbolSDNode>(InOps[1])->getSymbol(); 2927 unsigned i = 2, e = InOps.size(); 2928 if (InOps[e-1].getValueType() == MVT::Flag) 2929 --e; // Don't process a flag operand if it is here. 2930 2931 while (i != e) { 2932 unsigned Flags = cast<ConstantSDNode>(InOps[i])->getValue(); 2933 if ((Flags & 7) != 4 /*MEM*/) { 2934 // Just skip over this operand, copying the operands verbatim. 2935 Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+(Flags >> 3) + 1); 2936 i += (Flags >> 3) + 1; 2937 } else { 2938 assert((Flags >> 3) == 1 && "Memory operand with multiple values?"); 2939 // Otherwise, this is a memory operand. Ask the target to select it. 2940 std::vector<SDOperand> SelOps; 2941 if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps, DAG)) { 2942 std::cerr << "Could not match memory address. Inline asm failure!\n"; 2943 exit(1); 2944 } 2945 2946 // Add this to the output node. 2947 Ops.push_back(DAG.getConstant(4/*MEM*/ | (SelOps.size() << 3), MVT::i32)); 2948 Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); 2949 i += 2; 2950 } 2951 } 2952 2953 // Add the flag input back if present. 2954 if (e != InOps.size()) 2955 Ops.push_back(InOps.back()); 2956} 2957