X86ISelDAGToDAG.cpp revision f06c835f769aa1cf67801ed1f6bd366a447c18b1
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/MathExtras.h" 39#include "llvm/Support/Streams.h" 40#include "llvm/ADT/SmallPtrSet.h" 41#include "llvm/ADT/Statistic.h" 42#include <queue> 43#include <set> 44using namespace llvm; 45 46STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); 47STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 48 49//===----------------------------------------------------------------------===// 50// Pattern Matcher Implementation 51//===----------------------------------------------------------------------===// 52 53namespace { 54 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 55 /// SDValue's instead of register numbers for the leaves of the matched 56 /// tree. 57 struct X86ISelAddressMode { 58 enum { 59 RegBase, 60 FrameIndexBase 61 } BaseType; 62 63 struct { // This is really a union, discriminated by BaseType! 64 SDValue Reg; 65 int FrameIndex; 66 } Base; 67 68 bool isRIPRel; // RIP as base? 69 unsigned Scale; 70 SDValue IndexReg; 71 unsigned Disp; 72 GlobalValue *GV; 73 Constant *CP; 74 const char *ES; 75 int JT; 76 unsigned Align; // CP alignment. 77 78 X86ISelAddressMode() 79 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 80 GV(0), CP(0), ES(0), JT(-1), Align(0) { 81 } 82 void dump() { 83 cerr << "X86ISelAddressMode " << this << "\n"; 84 cerr << "Base.Reg "; 85 if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 86 else cerr << "nul"; 87 cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; 88 cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n"; 89 cerr << "IndexReg "; 90 if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); 91 else cerr << "nul"; 92 cerr << " Disp " << Disp << "\n"; 93 cerr << "GV "; if (GV) GV->dump(); 94 else cerr << "nul"; 95 cerr << " CP "; if (CP) CP->dump(); 96 else cerr << "nul"; 97 cerr << "\n"; 98 cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; 99 cerr << " JT" << JT << " Align" << Align << "\n"; 100 } 101 }; 102} 103 104namespace { 105 //===--------------------------------------------------------------------===// 106 /// ISel - X86 specific code to select X86 machine instructions for 107 /// SelectionDAG operations. 108 /// 109 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 110 /// ContainsFPCode - Every instruction we select that uses or defines a FP 111 /// register should set this to true. 112 bool ContainsFPCode; 113 114 /// TM - Keep a reference to X86TargetMachine. 115 /// 116 X86TargetMachine &TM; 117 118 /// X86Lowering - This object fully describes how to lower LLVM code to an 119 /// X86-specific SelectionDAG. 120 X86TargetLowering X86Lowering; 121 122 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 123 /// make the right decision when generating code for different targets. 124 const X86Subtarget *Subtarget; 125 126 /// CurBB - Current BB being isel'd. 127 /// 128 MachineBasicBlock *CurBB; 129 130 /// OptForSize - If true, selector should try to optimize for code size 131 /// instead of performance. 132 bool OptForSize; 133 134 public: 135 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 136 : SelectionDAGISel(X86Lowering, fast), 137 ContainsFPCode(false), TM(tm), 138 X86Lowering(*TM.getTargetLowering()), 139 Subtarget(&TM.getSubtarget<X86Subtarget>()), 140 OptForSize(OptimizeForSize) {} 141 142 virtual const char *getPassName() const { 143 return "X86 DAG->DAG Instruction Selection"; 144 } 145 146 /// InstructionSelect - This callback is invoked by 147 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 148 virtual void InstructionSelect(); 149 150 /// InstructionSelectPostProcessing - Post processing of selected and 151 /// scheduled basic blocks. 152 virtual void InstructionSelectPostProcessing(); 153 154 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 155 156 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; 157 158// Include the pieces autogenerated from the target description. 159#include "X86GenDAGISel.inc" 160 161 private: 162 SDNode *Select(SDValue N); 163 164 bool MatchAddress(SDValue N, X86ISelAddressMode &AM, 165 bool isRoot = true, unsigned Depth = 0); 166 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM, 167 bool isRoot, unsigned Depth); 168 bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 169 SDValue &Scale, SDValue &Index, SDValue &Disp); 170 bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 171 SDValue &Scale, SDValue &Index, SDValue &Disp); 172 bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 173 SDValue N, SDValue &Base, SDValue &Scale, 174 SDValue &Index, SDValue &Disp, 175 SDValue &InChain, SDValue &OutChain); 176 bool TryFoldLoad(SDValue P, SDValue N, 177 SDValue &Base, SDValue &Scale, 178 SDValue &Index, SDValue &Disp); 179 void PreprocessForRMW(); 180 void PreprocessForFPConvert(); 181 182 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 183 /// inline asm expressions. 184 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 185 char ConstraintCode, 186 std::vector<SDValue> &OutOps); 187 188 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 189 190 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 191 SDValue &Scale, SDValue &Index, 192 SDValue &Disp) { 193 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 194 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 195 AM.Base.Reg; 196 Scale = getI8Imm(AM.Scale); 197 Index = AM.IndexReg; 198 // These are 32-bit even in 64-bit mode since RIP relative offset 199 // is 32-bit. 200 if (AM.GV) 201 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 202 else if (AM.CP) 203 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 204 AM.Align, AM.Disp); 205 else if (AM.ES) 206 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 207 else if (AM.JT != -1) 208 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 209 else 210 Disp = getI32Imm(AM.Disp); 211 } 212 213 /// getI8Imm - Return a target constant with the specified value, of type 214 /// i8. 215 inline SDValue getI8Imm(unsigned Imm) { 216 return CurDAG->getTargetConstant(Imm, MVT::i8); 217 } 218 219 /// getI16Imm - Return a target constant with the specified value, of type 220 /// i16. 221 inline SDValue getI16Imm(unsigned Imm) { 222 return CurDAG->getTargetConstant(Imm, MVT::i16); 223 } 224 225 /// getI32Imm - Return a target constant with the specified value, of type 226 /// i32. 227 inline SDValue getI32Imm(unsigned Imm) { 228 return CurDAG->getTargetConstant(Imm, MVT::i32); 229 } 230 231 /// getGlobalBaseReg - Return an SDNode that returns the value of 232 /// the global base register. Output instructions required to 233 /// initialize the global base register, if necessary. 234 /// 235 SDNode *getGlobalBaseReg(); 236 237 /// getTruncateTo8Bit - return an SDNode that implements a subreg based 238 /// truncate of the specified operand to i8. This can be done with tablegen, 239 /// except that this code uses MVT::Flag in a tricky way that happens to 240 /// improve scheduling in some cases. 241 SDNode *getTruncateTo8Bit(SDValue N0); 242 243#ifndef NDEBUG 244 unsigned Indent; 245#endif 246 }; 247} 248 249/// findFlagUse - Return use of MVT::Flag value produced by the specified 250/// SDNode. 251/// 252static SDNode *findFlagUse(SDNode *N) { 253 unsigned FlagResNo = N->getNumValues()-1; 254 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { 255 SDNode *User = *I; 256 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 257 SDValue Op = User->getOperand(i); 258 if (Op.getNode() == N && Op.getResNo() == FlagResNo) 259 return User; 260 } 261 } 262 return NULL; 263} 264 265/// findNonImmUse - Return true by reference in "found" if "Use" is an 266/// non-immediate use of "Def". This function recursively traversing 267/// up the operand chain ignoring certain nodes. 268static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, 269 SDNode *Root, bool &found, 270 SmallPtrSet<SDNode*, 16> &Visited) { 271 if (found || 272 Use->getNodeId() < Def->getNodeId() || 273 !Visited.insert(Use)) 274 return; 275 276 for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) { 277 SDNode *N = Use->getOperand(i).getNode(); 278 if (N == Def) { 279 if (Use == ImmedUse || Use == Root) 280 continue; // We are not looking for immediate use. 281 assert(N != Root); 282 found = true; 283 break; 284 } 285 286 // Traverse up the operand chain. 287 findNonImmUse(N, Def, ImmedUse, Root, found, Visited); 288 } 289} 290 291/// isNonImmUse - Start searching from Root up the DAG to check is Def can 292/// be reached. Return true if that's the case. However, ignore direct uses 293/// by ImmedUse (which would be U in the example illustrated in 294/// CanBeFoldedBy) and by Root (which can happen in the store case). 295/// FIXME: to be really generic, we should allow direct use by any node 296/// that is being folded. But realisticly since we only fold loads which 297/// have one non-chain use, we only need to watch out for load/op/store 298/// and load/op/cmp case where the root (store / cmp) may reach the load via 299/// its chain operand. 300static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) { 301 SmallPtrSet<SDNode*, 16> Visited; 302 bool found = false; 303 findNonImmUse(Root, Def, ImmedUse, Root, found, Visited); 304 return found; 305} 306 307 308bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { 309 if (Fast) return false; 310 311 // If Root use can somehow reach N through a path that that doesn't contain 312 // U then folding N would create a cycle. e.g. In the following 313 // diagram, Root can reach N through X. If N is folded into into Root, then 314 // X is both a predecessor and a successor of U. 315 // 316 // [N*] // 317 // ^ ^ // 318 // / \ // 319 // [U*] [X]? // 320 // ^ ^ // 321 // \ / // 322 // \ / // 323 // [Root*] // 324 // 325 // * indicates nodes to be folded together. 326 // 327 // If Root produces a flag, then it gets (even more) interesting. Since it 328 // will be "glued" together with its flag use in the scheduler, we need to 329 // check if it might reach N. 330 // 331 // [N*] // 332 // ^ ^ // 333 // / \ // 334 // [U*] [X]? // 335 // ^ ^ // 336 // \ \ // 337 // \ | // 338 // [Root*] | // 339 // ^ | // 340 // f | // 341 // | / // 342 // [Y] / // 343 // ^ / // 344 // f / // 345 // | / // 346 // [FU] // 347 // 348 // If FU (flag use) indirectly reaches N (the load), and Root folds N 349 // (call it Fold), then X is a predecessor of FU and a successor of 350 // Fold. But since Fold and FU are flagged together, this will create 351 // a cycle in the scheduling graph. 352 353 MVT VT = Root->getValueType(Root->getNumValues()-1); 354 while (VT == MVT::Flag) { 355 SDNode *FU = findFlagUse(Root); 356 if (FU == NULL) 357 break; 358 Root = FU; 359 VT = Root->getValueType(Root->getNumValues()-1); 360 } 361 362 return !isNonImmUse(Root, N, U); 363} 364 365/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 366/// and move load below the TokenFactor. Replace store's chain operand with 367/// load's chain result. 368static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 369 SDValue Store, SDValue TF) { 370 SmallVector<SDValue, 4> Ops; 371 for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 372 if (Load.getNode() == TF.getOperand(i).getNode()) 373 Ops.push_back(Load.getOperand(0)); 374 else 375 Ops.push_back(TF.getOperand(i)); 376 CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 377 CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 378 CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 379 Store.getOperand(2), Store.getOperand(3)); 380} 381 382/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. 383/// 384static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 385 SDValue &Load) { 386 if (N.getOpcode() == ISD::BIT_CONVERT) 387 N = N.getOperand(0); 388 389 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 390 if (!LD || LD->isVolatile()) 391 return false; 392 if (LD->getAddressingMode() != ISD::UNINDEXED) 393 return false; 394 395 ISD::LoadExtType ExtType = LD->getExtensionType(); 396 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 397 return false; 398 399 if (N.hasOneUse() && 400 N.getOperand(1) == Address && 401 N.getNode()->isOperandOf(Chain.getNode())) { 402 Load = N; 403 return true; 404 } 405 return false; 406} 407 408/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 409/// operand and move load below the call's chain operand. 410static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 411 SDValue Call, SDValue Chain) { 412 SmallVector<SDValue, 8> Ops; 413 for (unsigned i = 0, e = Chain.getNode()->getNumOperands(); i != e; ++i) 414 if (Load.getNode() == Chain.getOperand(i).getNode()) 415 Ops.push_back(Load.getOperand(0)); 416 else 417 Ops.push_back(Chain.getOperand(i)); 418 CurDAG->UpdateNodeOperands(Chain, &Ops[0], Ops.size()); 419 CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 420 Load.getOperand(1), Load.getOperand(2)); 421 Ops.clear(); 422 Ops.push_back(SDValue(Load.getNode(), 1)); 423 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 424 Ops.push_back(Call.getOperand(i)); 425 CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 426} 427 428/// isCalleeLoad - Return true if call address is a load and it can be 429/// moved below CALLSEQ_START and the chains leading up to the call. 430/// Return the CALLSEQ_START by reference as a second output. 431static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 432 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 433 return false; 434 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 435 if (!LD || 436 LD->isVolatile() || 437 LD->getAddressingMode() != ISD::UNINDEXED || 438 LD->getExtensionType() != ISD::NON_EXTLOAD) 439 return false; 440 441 // Now let's find the callseq_start. 442 while (Chain.getOpcode() != ISD::CALLSEQ_START) { 443 if (!Chain.hasOneUse()) 444 return false; 445 Chain = Chain.getOperand(0); 446 } 447 return Chain.getOperand(0).getNode() == Callee.getNode(); 448} 449 450 451/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 452/// This is only run if not in -fast mode (aka -O0). 453/// This allows the instruction selector to pick more read-modify-write 454/// instructions. This is a common case: 455/// 456/// [Load chain] 457/// ^ 458/// | 459/// [Load] 460/// ^ ^ 461/// | | 462/// / \- 463/// / | 464/// [TokenFactor] [Op] 465/// ^ ^ 466/// | | 467/// \ / 468/// \ / 469/// [Store] 470/// 471/// The fact the store's chain operand != load's chain will prevent the 472/// (store (op (load))) instruction from being selected. We can transform it to: 473/// 474/// [Load chain] 475/// ^ 476/// | 477/// [TokenFactor] 478/// ^ 479/// | 480/// [Load] 481/// ^ ^ 482/// | | 483/// | \- 484/// | | 485/// | [Op] 486/// | ^ 487/// | | 488/// \ / 489/// \ / 490/// [Store] 491void X86DAGToDAGISel::PreprocessForRMW() { 492 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 493 E = CurDAG->allnodes_end(); I != E; ++I) { 494 if (I->getOpcode() == X86ISD::CALL) { 495 /// Also try moving call address load from outside callseq_start to just 496 /// before the call to allow it to be folded. 497 /// 498 /// [Load chain] 499 /// ^ 500 /// | 501 /// [Load] 502 /// ^ ^ 503 /// | | 504 /// / \-- 505 /// / | 506 ///[CALLSEQ_START] | 507 /// ^ | 508 /// | | 509 /// [LOAD/C2Reg] | 510 /// | | 511 /// \ / 512 /// \ / 513 /// [CALL] 514 SDValue Chain = I->getOperand(0); 515 SDValue Load = I->getOperand(1); 516 if (!isCalleeLoad(Load, Chain)) 517 continue; 518 MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 519 ++NumLoadMoved; 520 continue; 521 } 522 523 if (!ISD::isNON_TRUNCStore(I)) 524 continue; 525 SDValue Chain = I->getOperand(0); 526 527 if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 528 continue; 529 530 SDValue N1 = I->getOperand(1); 531 SDValue N2 = I->getOperand(2); 532 if ((N1.getValueType().isFloatingPoint() && 533 !N1.getValueType().isVector()) || 534 !N1.hasOneUse()) 535 continue; 536 537 bool RModW = false; 538 SDValue Load; 539 unsigned Opcode = N1.getNode()->getOpcode(); 540 switch (Opcode) { 541 case ISD::ADD: 542 case ISD::MUL: 543 case ISD::AND: 544 case ISD::OR: 545 case ISD::XOR: 546 case ISD::ADDC: 547 case ISD::ADDE: 548 case ISD::VECTOR_SHUFFLE: { 549 SDValue N10 = N1.getOperand(0); 550 SDValue N11 = N1.getOperand(1); 551 RModW = isRMWLoad(N10, Chain, N2, Load); 552 if (!RModW) 553 RModW = isRMWLoad(N11, Chain, N2, Load); 554 break; 555 } 556 case ISD::SUB: 557 case ISD::SHL: 558 case ISD::SRA: 559 case ISD::SRL: 560 case ISD::ROTL: 561 case ISD::ROTR: 562 case ISD::SUBC: 563 case ISD::SUBE: 564 case X86ISD::SHLD: 565 case X86ISD::SHRD: { 566 SDValue N10 = N1.getOperand(0); 567 RModW = isRMWLoad(N10, Chain, N2, Load); 568 break; 569 } 570 } 571 572 if (RModW) { 573 MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 574 ++NumLoadMoved; 575 } 576 } 577} 578 579 580/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 581/// nodes that target the FP stack to be store and load to the stack. This is a 582/// gross hack. We would like to simply mark these as being illegal, but when 583/// we do that, legalize produces these when it expands calls, then expands 584/// these in the same legalize pass. We would like dag combine to be able to 585/// hack on these between the call expansion and the node legalization. As such 586/// this pass basically does "really late" legalization of these inline with the 587/// X86 isel pass. 588void X86DAGToDAGISel::PreprocessForFPConvert() { 589 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 590 E = CurDAG->allnodes_end(); I != E; ) { 591 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 592 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 593 continue; 594 595 // If the source and destination are SSE registers, then this is a legal 596 // conversion that should not be lowered. 597 MVT SrcVT = N->getOperand(0).getValueType(); 598 MVT DstVT = N->getValueType(0); 599 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 600 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 601 if (SrcIsSSE && DstIsSSE) 602 continue; 603 604 if (!SrcIsSSE && !DstIsSSE) { 605 // If this is an FPStack extension, it is a noop. 606 if (N->getOpcode() == ISD::FP_EXTEND) 607 continue; 608 // If this is a value-preserving FPStack truncation, it is a noop. 609 if (N->getConstantOperandVal(1)) 610 continue; 611 } 612 613 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 614 // FPStack has extload and truncstore. SSE can fold direct loads into other 615 // operations. Based on this, decide what we want to do. 616 MVT MemVT; 617 if (N->getOpcode() == ISD::FP_ROUND) 618 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 619 else 620 MemVT = SrcIsSSE ? SrcVT : DstVT; 621 622 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 623 624 // FIXME: optimize the case where the src/dest is a load or store? 625 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), 626 N->getOperand(0), 627 MemTmp, NULL, 0, MemVT); 628 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, 629 NULL, 0, MemVT); 630 631 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 632 // extload we created. This will cause general havok on the dag because 633 // anything below the conversion could be folded into other existing nodes. 634 // To avoid invalidating 'I', back it up to the convert node. 635 --I; 636 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 637 638 // Now that we did that, the node is dead. Increment the iterator to the 639 // next node to process, then delete N. 640 ++I; 641 CurDAG->DeleteNode(N); 642 } 643} 644 645/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 646/// when it has created a SelectionDAG for us to codegen. 647void X86DAGToDAGISel::InstructionSelect() { 648 CurBB = BB; // BB can change as result of isel. 649 if (!OptForSize) { 650 const Function *F = CurDAG->getMachineFunction().getFunction(); 651 OptForSize = !F->isDeclaration() && 652 F->hasFnAttr(Attribute::OptimizeForSize); 653 } 654 655 DEBUG(BB->dump()); 656 if (!Fast) 657 PreprocessForRMW(); 658 659 // FIXME: This should only happen when not -fast. 660 PreprocessForFPConvert(); 661 662 // Codegen the basic block. 663#ifndef NDEBUG 664 DOUT << "===== Instruction selection begins:\n"; 665 Indent = 0; 666#endif 667 SelectRoot(); 668#ifndef NDEBUG 669 DOUT << "===== Instruction selection ends:\n"; 670#endif 671 672 CurDAG->RemoveDeadNodes(); 673} 674 675void X86DAGToDAGISel::InstructionSelectPostProcessing() { 676 // If we are emitting FP stack code, scan the basic block to determine if this 677 // block defines any FP values. If so, put an FP_REG_KILL instruction before 678 // the terminator of the block. 679 680 // Note that FP stack instructions are used in all modes for long double, 681 // so we always need to do this check. 682 // Also note that it's possible for an FP stack register to be live across 683 // an instruction that produces multiple basic blocks (SSE CMOV) so we 684 // must check all the generated basic blocks. 685 686 // Scan all of the machine instructions in these MBBs, checking for FP 687 // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) 688 MachineFunction::iterator MBBI = CurBB; 689 MachineFunction::iterator EndMBB = BB; ++EndMBB; 690 for (; MBBI != EndMBB; ++MBBI) { 691 MachineBasicBlock *MBB = MBBI; 692 693 // If this block returns, ignore it. We don't want to insert an FP_REG_KILL 694 // before the return. 695 if (!MBB->empty()) { 696 MachineBasicBlock::iterator EndI = MBB->end(); 697 --EndI; 698 if (EndI->getDesc().isReturn()) 699 continue; 700 } 701 702 bool ContainsFPCode = false; 703 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 704 !ContainsFPCode && I != E; ++I) { 705 if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { 706 const TargetRegisterClass *clas; 707 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { 708 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && 709 TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && 710 ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == 711 X86::RFP32RegisterClass || 712 clas == X86::RFP64RegisterClass || 713 clas == X86::RFP80RegisterClass)) { 714 ContainsFPCode = true; 715 break; 716 } 717 } 718 } 719 } 720 // Check PHI nodes in successor blocks. These PHI's will be lowered to have 721 // a copy of the input value in this block. In SSE mode, we only care about 722 // 80-bit values. 723 if (!ContainsFPCode) { 724 // Final check, check LLVM BB's that are successors to the LLVM BB 725 // corresponding to BB for FP PHI nodes. 726 const BasicBlock *LLVMBB = BB->getBasicBlock(); 727 const PHINode *PN; 728 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); 729 !ContainsFPCode && SI != E; ++SI) { 730 for (BasicBlock::const_iterator II = SI->begin(); 731 (PN = dyn_cast<PHINode>(II)); ++II) { 732 if (PN->getType()==Type::X86_FP80Ty || 733 (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || 734 (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { 735 ContainsFPCode = true; 736 break; 737 } 738 } 739 } 740 } 741 // Finally, if we found any FP code, emit the FP_REG_KILL instruction. 742 if (ContainsFPCode) { 743 BuildMI(*MBB, MBBI->getFirstTerminator(), 744 TM.getInstrInfo()->get(X86::FP_REG_KILL)); 745 ++NumFPKill; 746 } 747 } 748} 749 750/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 751/// the main function. 752void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 753 MachineFrameInfo *MFI) { 754 const TargetInstrInfo *TII = TM.getInstrInfo(); 755 if (Subtarget->isTargetCygMing()) 756 BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 757} 758 759void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 760 // If this is main, emit special code for main. 761 MachineBasicBlock *BB = MF.begin(); 762 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 763 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 764} 765 766/// MatchAddress - Add the specified node to the specified addressing mode, 767/// returning true if it cannot be done. This just pattern matches for the 768/// addressing mode. 769bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, 770 bool isRoot, unsigned Depth) { 771 DOUT << "MatchAddress: "; DEBUG(AM.dump()); 772 // Limit recursion. 773 if (Depth > 5) 774 return MatchAddressBase(N, AM, isRoot, Depth); 775 776 // RIP relative addressing: %rip + 32-bit displacement! 777 if (AM.isRIPRel) { 778 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 779 int64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 780 if (isInt32(AM.Disp + Val)) { 781 AM.Disp += Val; 782 return false; 783 } 784 } 785 return true; 786 } 787 788 int id = N.getNode()->getNodeId(); 789 bool AlreadySelected = isSelected(id); // Already selected, not yet replaced. 790 791 switch (N.getOpcode()) { 792 default: break; 793 case ISD::Constant: { 794 int64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 795 if (isInt32(AM.Disp + Val)) { 796 AM.Disp += Val; 797 return false; 798 } 799 break; 800 } 801 802 case X86ISD::Wrapper: { 803DOUT << "Wrapper: 64bit " << Subtarget->is64Bit(); 804DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n"; 805DOUT << "AlreadySelected " << AlreadySelected << "\n"; 806 bool is64Bit = Subtarget->is64Bit(); 807 // Under X86-64 non-small code model, GV (and friends) are 64-bits. 808 // Also, base and index reg must be 0 in order to use rip as base. 809 if (is64Bit && (TM.getCodeModel() != CodeModel::Small || 810 AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 811 break; 812 if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) 813 break; 814 // If value is available in a register both base and index components have 815 // been picked, we can't fit the result available in the register in the 816 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 817 if (!AlreadySelected || (AM.Base.Reg.getNode() && AM.IndexReg.getNode())) { 818 SDValue N0 = N.getOperand(0); 819 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 820 GlobalValue *GV = G->getGlobal(); 821 AM.GV = GV; 822 AM.Disp += G->getOffset(); 823 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 824 return false; 825 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 826 AM.CP = CP->getConstVal(); 827 AM.Align = CP->getAlignment(); 828 AM.Disp += CP->getOffset(); 829 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 830 return false; 831 } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { 832 AM.ES = S->getSymbol(); 833 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 834 return false; 835 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 836 AM.JT = J->getIndex(); 837 AM.isRIPRel = TM.symbolicAddressesAreRIPRel(); 838 return false; 839 } 840 } 841 break; 842 } 843 844 case ISD::FrameIndex: 845 if (AM.BaseType == X86ISelAddressMode::RegBase 846 && AM.Base.Reg.getNode() == 0) { 847 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 848 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 849 return false; 850 } 851 break; 852 853 case ISD::SHL: 854 if (AlreadySelected || AM.IndexReg.getNode() != 0 855 || AM.Scale != 1 || AM.isRIPRel) 856 break; 857 858 if (ConstantSDNode 859 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 860 unsigned Val = CN->getZExtValue(); 861 if (Val == 1 || Val == 2 || Val == 3) { 862 AM.Scale = 1 << Val; 863 SDValue ShVal = N.getNode()->getOperand(0); 864 865 // Okay, we know that we have a scale by now. However, if the scaled 866 // value is an add of something and a constant, we can fold the 867 // constant into the disp field here. 868 if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 869 isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 870 AM.IndexReg = ShVal.getNode()->getOperand(0); 871 ConstantSDNode *AddVal = 872 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 873 uint64_t Disp = AM.Disp + (AddVal->getZExtValue() << Val); 874 if (isInt32(Disp)) 875 AM.Disp = Disp; 876 else 877 AM.IndexReg = ShVal; 878 } else { 879 AM.IndexReg = ShVal; 880 } 881 return false; 882 } 883 break; 884 } 885 886 case ISD::SMUL_LOHI: 887 case ISD::UMUL_LOHI: 888 // A mul_lohi where we need the low part can be folded as a plain multiply. 889 if (N.getResNo() != 0) break; 890 // FALL THROUGH 891 case ISD::MUL: 892 // X*[3,5,9] -> X+X*[2,4,8] 893 if (!AlreadySelected && 894 AM.BaseType == X86ISelAddressMode::RegBase && 895 AM.Base.Reg.getNode() == 0 && 896 AM.IndexReg.getNode() == 0 && 897 !AM.isRIPRel) { 898 if (ConstantSDNode 899 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 900 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 901 CN->getZExtValue() == 9) { 902 AM.Scale = unsigned(CN->getZExtValue())-1; 903 904 SDValue MulVal = N.getNode()->getOperand(0); 905 SDValue Reg; 906 907 // Okay, we know that we have a scale by now. However, if the scaled 908 // value is an add of something and a constant, we can fold the 909 // constant into the disp field here. 910 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 911 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 912 Reg = MulVal.getNode()->getOperand(0); 913 ConstantSDNode *AddVal = 914 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 915 uint64_t Disp = AM.Disp + AddVal->getZExtValue() * 916 CN->getZExtValue(); 917 if (isInt32(Disp)) 918 AM.Disp = Disp; 919 else 920 Reg = N.getNode()->getOperand(0); 921 } else { 922 Reg = N.getNode()->getOperand(0); 923 } 924 925 AM.IndexReg = AM.Base.Reg = Reg; 926 return false; 927 } 928 } 929 break; 930 931 case ISD::ADD: 932 if (!AlreadySelected) { 933 X86ISelAddressMode Backup = AM; 934 if (!MatchAddress(N.getNode()->getOperand(0), AM, false, Depth+1) && 935 !MatchAddress(N.getNode()->getOperand(1), AM, false, Depth+1)) 936 return false; 937 AM = Backup; 938 if (!MatchAddress(N.getNode()->getOperand(1), AM, false, Depth+1) && 939 !MatchAddress(N.getNode()->getOperand(0), AM, false, Depth+1)) 940 return false; 941 AM = Backup; 942 } 943 break; 944 945 case ISD::OR: 946 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 947 if (AlreadySelected) break; 948 949 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 950 X86ISelAddressMode Backup = AM; 951 // Start with the LHS as an addr mode. 952 if (!MatchAddress(N.getOperand(0), AM, false) && 953 // Address could not have picked a GV address for the displacement. 954 AM.GV == NULL && 955 // On x86-64, the resultant disp must fit in 32-bits. 956 isInt32(AM.Disp + CN->getSExtValue()) && 957 // Check to see if the LHS & C is zero. 958 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 959 AM.Disp += CN->getZExtValue(); 960 return false; 961 } 962 AM = Backup; 963 } 964 break; 965 966 case ISD::AND: { 967 // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 968 // allows us to fold the shift into this addressing mode. 969 if (AlreadySelected) break; 970 SDValue Shift = N.getOperand(0); 971 if (Shift.getOpcode() != ISD::SHL) break; 972 973 // Scale must not be used already. 974 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 975 976 // Not when RIP is used as the base. 977 if (AM.isRIPRel) break; 978 979 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 980 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 981 if (!C1 || !C2) break; 982 983 // Not likely to be profitable if either the AND or SHIFT node has more 984 // than one use (unless all uses are for address computation). Besides, 985 // isel mechanism requires their node ids to be reused. 986 if (!N.hasOneUse() || !Shift.hasOneUse()) 987 break; 988 989 // Verify that the shift amount is something we can fold. 990 unsigned ShiftCst = C1->getZExtValue(); 991 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 992 break; 993 994 // Get the new AND mask, this folds to a constant. 995 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(), 996 SDValue(C2, 0), SDValue(C1, 0)); 997 SDValue NewAND = CurDAG->getNode(ISD::AND, N.getValueType(), 998 Shift.getOperand(0), NewANDMask); 999 NewANDMask.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1000 NewAND.getNode()->setNodeId(N.getNode()->getNodeId()); 1001 1002 AM.Scale = 1 << ShiftCst; 1003 AM.IndexReg = NewAND; 1004 return false; 1005 } 1006 } 1007 1008 return MatchAddressBase(N, AM, isRoot, Depth); 1009} 1010 1011/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1012/// specified addressing mode without any further recursion. 1013bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM, 1014 bool isRoot, unsigned Depth) { 1015 // Is the base register already occupied? 1016 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1017 // If so, check to see if the scale index register is set. 1018 if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) { 1019 AM.IndexReg = N; 1020 AM.Scale = 1; 1021 return false; 1022 } 1023 1024 // Otherwise, we cannot select it. 1025 return true; 1026 } 1027 1028 // Default, generate it as a register. 1029 AM.BaseType = X86ISelAddressMode::RegBase; 1030 AM.Base.Reg = N; 1031 return false; 1032} 1033 1034/// SelectAddr - returns true if it is able pattern match an addressing mode. 1035/// It returns the operands which make up the maximal addressing mode it can 1036/// match by reference. 1037bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, 1038 SDValue &Scale, SDValue &Index, 1039 SDValue &Disp) { 1040 X86ISelAddressMode AM; 1041 if (MatchAddress(N, AM)) 1042 return false; 1043 1044 MVT VT = N.getValueType(); 1045 if (AM.BaseType == X86ISelAddressMode::RegBase) { 1046 if (!AM.Base.Reg.getNode()) 1047 AM.Base.Reg = CurDAG->getRegister(0, VT); 1048 } 1049 1050 if (!AM.IndexReg.getNode()) 1051 AM.IndexReg = CurDAG->getRegister(0, VT); 1052 1053 getAddressOperands(AM, Base, Scale, Index, Disp); 1054 return true; 1055} 1056 1057/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1058/// match a load whose top elements are either undef or zeros. The load flavor 1059/// is derived from the type of N, which is either v4f32 or v2f64. 1060bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, 1061 SDValue N, SDValue &Base, 1062 SDValue &Scale, SDValue &Index, 1063 SDValue &Disp, SDValue &InChain, 1064 SDValue &OutChain) { 1065 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1066 InChain = N.getOperand(0).getValue(1); 1067 if (ISD::isNON_EXTLoad(InChain.getNode()) && 1068 InChain.getValue(0).hasOneUse() && 1069 N.hasOneUse() && 1070 CanBeFoldedBy(N.getNode(), Pred.getNode(), Op.getNode())) { 1071 LoadSDNode *LD = cast<LoadSDNode>(InChain); 1072 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 1073 return false; 1074 OutChain = LD->getChain(); 1075 return true; 1076 } 1077 } 1078 1079 // Also handle the case where we explicitly require zeros in the top 1080 // elements. This is a vector shuffle from the zero vector. 1081 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1082 // Check to see if the top elements are all zeros (or bitcast of zeros). 1083 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1084 N.getOperand(0).getNode()->hasOneUse() && 1085 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1086 N.getOperand(0).getOperand(0).hasOneUse()) { 1087 // Okay, this is a zero extending load. Fold it. 1088 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1089 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 1090 return false; 1091 OutChain = LD->getChain(); 1092 InChain = SDValue(LD, 1); 1093 return true; 1094 } 1095 return false; 1096} 1097 1098 1099/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1100/// mode it matches can be cost effectively emitted as an LEA instruction. 1101bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, 1102 SDValue &Base, SDValue &Scale, 1103 SDValue &Index, SDValue &Disp) { 1104 X86ISelAddressMode AM; 1105 if (MatchAddress(N, AM)) 1106 return false; 1107 1108 MVT VT = N.getValueType(); 1109 unsigned Complexity = 0; 1110 if (AM.BaseType == X86ISelAddressMode::RegBase) 1111 if (AM.Base.Reg.getNode()) 1112 Complexity = 1; 1113 else 1114 AM.Base.Reg = CurDAG->getRegister(0, VT); 1115 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1116 Complexity = 4; 1117 1118 if (AM.IndexReg.getNode()) 1119 Complexity++; 1120 else 1121 AM.IndexReg = CurDAG->getRegister(0, VT); 1122 1123 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1124 // a simple shift. 1125 if (AM.Scale > 1) 1126 Complexity++; 1127 1128 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1129 // to a LEA. This is determined with some expermentation but is by no means 1130 // optimal (especially for code size consideration). LEA is nice because of 1131 // its three-address nature. Tweak the cost function again when we can run 1132 // convertToThreeAddress() at register allocation time. 1133 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { 1134 // For X86-64, we should always use lea to materialize RIP relative 1135 // addresses. 1136 if (Subtarget->is64Bit()) 1137 Complexity = 4; 1138 else 1139 Complexity += 2; 1140 } 1141 1142 if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1143 Complexity++; 1144 1145 if (Complexity > 2) { 1146 getAddressOperands(AM, Base, Scale, Index, Disp); 1147 return true; 1148 } 1149 return false; 1150} 1151 1152bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, 1153 SDValue &Base, SDValue &Scale, 1154 SDValue &Index, SDValue &Disp) { 1155 if (ISD::isNON_EXTLoad(N.getNode()) && 1156 N.hasOneUse() && 1157 CanBeFoldedBy(N.getNode(), P.getNode(), P.getNode())) 1158 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp); 1159 return false; 1160} 1161 1162/// getGlobalBaseReg - Return an SDNode that returns the value of 1163/// the global base register. Output instructions required to 1164/// initialize the global base register, if necessary. 1165/// 1166SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1167 MachineFunction *MF = CurBB->getParent(); 1168 unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); 1169 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1170} 1171 1172static SDNode *FindCallStartFromCall(SDNode *Node) { 1173 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1174 assert(Node->getOperand(0).getValueType() == MVT::Other && 1175 "Node doesn't have a token chain argument!"); 1176 return FindCallStartFromCall(Node->getOperand(0).getNode()); 1177} 1178 1179/// getTruncateTo8Bit - return an SDNode that implements a subreg based 1180/// truncate of the specified operand to i8. This can be done with tablegen, 1181/// except that this code uses MVT::Flag in a tricky way that happens to 1182/// improve scheduling in some cases. 1183SDNode *X86DAGToDAGISel::getTruncateTo8Bit(SDValue N0) { 1184 assert(!Subtarget->is64Bit() && 1185 "getTruncateTo8Bit is only needed on x86-32!"); 1186 SDValue SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1187 1188 // Ensure that the source register has an 8-bit subreg on 32-bit targets 1189 unsigned Opc; 1190 MVT N0VT = N0.getValueType(); 1191 switch (N0VT.getSimpleVT()) { 1192 default: assert(0 && "Unknown truncate!"); 1193 case MVT::i16: 1194 Opc = X86::MOV16to16_; 1195 break; 1196 case MVT::i32: 1197 Opc = X86::MOV32to32_; 1198 break; 1199 } 1200 1201 // The use of MVT::Flag here is not strictly accurate, but it helps 1202 // scheduling in some cases. 1203 N0 = SDValue(CurDAG->getTargetNode(Opc, N0VT, MVT::Flag, N0), 0); 1204 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1205 MVT::i8, N0, SRIdx, N0.getValue(1)); 1206} 1207 1208 1209SDNode *X86DAGToDAGISel::Select(SDValue N) { 1210 SDNode *Node = N.getNode(); 1211 MVT NVT = Node->getValueType(0); 1212 unsigned Opc, MOpc; 1213 unsigned Opcode = Node->getOpcode(); 1214 1215#ifndef NDEBUG 1216 DOUT << std::string(Indent, ' ') << "Selecting: "; 1217 DEBUG(Node->dump(CurDAG)); 1218 DOUT << "\n"; 1219 Indent += 2; 1220#endif 1221 1222 if (Node->isMachineOpcode()) { 1223#ifndef NDEBUG 1224 DOUT << std::string(Indent-2, ' ') << "== "; 1225 DEBUG(Node->dump(CurDAG)); 1226 DOUT << "\n"; 1227 Indent -= 2; 1228#endif 1229 return NULL; // Already selected. 1230 } 1231 1232 switch (Opcode) { 1233 default: break; 1234 case X86ISD::GlobalBaseReg: 1235 return getGlobalBaseReg(); 1236 1237 case ISD::ADD: { 1238 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd 1239 // code and is matched first so to prevent it from being turned into 1240 // LEA32r X+c. 1241 // In 64-bit small code size mode, use LEA to take advantage of 1242 // RIP-relative addressing. 1243 if (TM.getCodeModel() != CodeModel::Small) 1244 break; 1245 MVT PtrVT = TLI.getPointerTy(); 1246 SDValue N0 = N.getOperand(0); 1247 SDValue N1 = N.getOperand(1); 1248 if (N.getNode()->getValueType(0) == PtrVT && 1249 N0.getOpcode() == X86ISD::Wrapper && 1250 N1.getOpcode() == ISD::Constant) { 1251 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getZExtValue(); 1252 SDValue C(0, 0); 1253 // TODO: handle ExternalSymbolSDNode. 1254 if (GlobalAddressSDNode *G = 1255 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { 1256 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, 1257 G->getOffset() + Offset); 1258 } else if (ConstantPoolSDNode *CP = 1259 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { 1260 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT, 1261 CP->getAlignment(), 1262 CP->getOffset()+Offset); 1263 } 1264 1265 if (C.getNode()) { 1266 if (Subtarget->is64Bit()) { 1267 SDValue Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), 1268 CurDAG->getRegister(0, PtrVT), C }; 1269 return CurDAG->SelectNodeTo(N.getNode(), X86::LEA64r, 1270 MVT::i64, Ops, 4); 1271 } else 1272 return CurDAG->SelectNodeTo(N.getNode(), X86::MOV32ri, PtrVT, C); 1273 } 1274 } 1275 1276 // Other cases are handled by auto-generated code. 1277 break; 1278 } 1279 1280 case ISD::SMUL_LOHI: 1281 case ISD::UMUL_LOHI: { 1282 SDValue N0 = Node->getOperand(0); 1283 SDValue N1 = Node->getOperand(1); 1284 1285 bool isSigned = Opcode == ISD::SMUL_LOHI; 1286 if (!isSigned) 1287 switch (NVT.getSimpleVT()) { 1288 default: assert(0 && "Unsupported VT!"); 1289 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1290 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1291 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1292 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1293 } 1294 else 1295 switch (NVT.getSimpleVT()) { 1296 default: assert(0 && "Unsupported VT!"); 1297 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1298 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1299 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1300 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1301 } 1302 1303 unsigned LoReg, HiReg; 1304 switch (NVT.getSimpleVT()) { 1305 default: assert(0 && "Unsupported VT!"); 1306 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1307 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1308 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1309 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1310 } 1311 1312 SDValue Tmp0, Tmp1, Tmp2, Tmp3; 1313 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1314 // multiplty is commmutative 1315 if (!foldedLoad) { 1316 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); 1317 if (foldedLoad) 1318 std::swap(N0, N1); 1319 } 1320 1321 AddToISelQueue(N0); 1322 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, 1323 N0, SDValue()).getValue(1); 1324 1325 if (foldedLoad) { 1326 AddToISelQueue(N1.getOperand(0)); 1327 AddToISelQueue(Tmp0); 1328 AddToISelQueue(Tmp1); 1329 AddToISelQueue(Tmp2); 1330 AddToISelQueue(Tmp3); 1331 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1332 SDNode *CNode = 1333 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1334 InFlag = SDValue(CNode, 1); 1335 // Update the chain. 1336 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1337 } else { 1338 AddToISelQueue(N1); 1339 InFlag = 1340 SDValue(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1341 } 1342 1343 // Copy the low half of the result, if it is needed. 1344 if (!N.getValue(0).use_empty()) { 1345 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1346 LoReg, NVT, InFlag); 1347 InFlag = Result.getValue(2); 1348 ReplaceUses(N.getValue(0), Result); 1349#ifndef NDEBUG 1350 DOUT << std::string(Indent-2, ' ') << "=> "; 1351 DEBUG(Result.getNode()->dump(CurDAG)); 1352 DOUT << "\n"; 1353#endif 1354 } 1355 // Copy the high half of the result, if it is needed. 1356 if (!N.getValue(1).use_empty()) { 1357 SDValue Result; 1358 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1359 // Prevent use of AH in a REX instruction by referencing AX instead. 1360 // Shift it down 8 bits. 1361 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1362 X86::AX, MVT::i16, InFlag); 1363 InFlag = Result.getValue(2); 1364 Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1365 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1366 // Then truncate it down to i8. 1367 SDValue SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1368 Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1369 MVT::i8, Result, SRIdx), 0); 1370 } else { 1371 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1372 HiReg, NVT, InFlag); 1373 InFlag = Result.getValue(2); 1374 } 1375 ReplaceUses(N.getValue(1), Result); 1376#ifndef NDEBUG 1377 DOUT << std::string(Indent-2, ' ') << "=> "; 1378 DEBUG(Result.getNode()->dump(CurDAG)); 1379 DOUT << "\n"; 1380#endif 1381 } 1382 1383#ifndef NDEBUG 1384 Indent -= 2; 1385#endif 1386 1387 return NULL; 1388 } 1389 1390 case ISD::SDIVREM: 1391 case ISD::UDIVREM: { 1392 SDValue N0 = Node->getOperand(0); 1393 SDValue N1 = Node->getOperand(1); 1394 1395 bool isSigned = Opcode == ISD::SDIVREM; 1396 if (!isSigned) 1397 switch (NVT.getSimpleVT()) { 1398 default: assert(0 && "Unsupported VT!"); 1399 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1400 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1401 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1402 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1403 } 1404 else 1405 switch (NVT.getSimpleVT()) { 1406 default: assert(0 && "Unsupported VT!"); 1407 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1408 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1409 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1410 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1411 } 1412 1413 unsigned LoReg, HiReg; 1414 unsigned ClrOpcode, SExtOpcode; 1415 switch (NVT.getSimpleVT()) { 1416 default: assert(0 && "Unsupported VT!"); 1417 case MVT::i8: 1418 LoReg = X86::AL; HiReg = X86::AH; 1419 ClrOpcode = 0; 1420 SExtOpcode = X86::CBW; 1421 break; 1422 case MVT::i16: 1423 LoReg = X86::AX; HiReg = X86::DX; 1424 ClrOpcode = X86::MOV16r0; 1425 SExtOpcode = X86::CWD; 1426 break; 1427 case MVT::i32: 1428 LoReg = X86::EAX; HiReg = X86::EDX; 1429 ClrOpcode = X86::MOV32r0; 1430 SExtOpcode = X86::CDQ; 1431 break; 1432 case MVT::i64: 1433 LoReg = X86::RAX; HiReg = X86::RDX; 1434 ClrOpcode = X86::MOV64r0; 1435 SExtOpcode = X86::CQO; 1436 break; 1437 } 1438 1439 SDValue Tmp0, Tmp1, Tmp2, Tmp3; 1440 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1441 1442 SDValue InFlag; 1443 if (NVT == MVT::i8 && !isSigned) { 1444 // Special case for div8, just use a move with zero extension to AX to 1445 // clear the upper 8 bits (AH). 1446 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain; 1447 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) { 1448 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) }; 1449 AddToISelQueue(N0.getOperand(0)); 1450 AddToISelQueue(Tmp0); 1451 AddToISelQueue(Tmp1); 1452 AddToISelQueue(Tmp2); 1453 AddToISelQueue(Tmp3); 1454 Move = 1455 SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other, 1456 Ops, 5), 0); 1457 Chain = Move.getValue(1); 1458 ReplaceUses(N0.getValue(1), Chain); 1459 } else { 1460 AddToISelQueue(N0); 1461 Move = 1462 SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0); 1463 Chain = CurDAG->getEntryNode(); 1464 } 1465 Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDValue()); 1466 InFlag = Chain.getValue(1); 1467 } else { 1468 AddToISelQueue(N0); 1469 InFlag = 1470 CurDAG->getCopyToReg(CurDAG->getEntryNode(), 1471 LoReg, N0, SDValue()).getValue(1); 1472 if (isSigned) { 1473 // Sign extend the low part into the high part. 1474 InFlag = 1475 SDValue(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); 1476 } else { 1477 // Zero out the high part, effectively zero extending the input. 1478 SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, NVT), 0); 1479 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, 1480 ClrNode, InFlag).getValue(1); 1481 } 1482 } 1483 1484 if (foldedLoad) { 1485 AddToISelQueue(N1.getOperand(0)); 1486 AddToISelQueue(Tmp0); 1487 AddToISelQueue(Tmp1); 1488 AddToISelQueue(Tmp2); 1489 AddToISelQueue(Tmp3); 1490 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1491 SDNode *CNode = 1492 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1493 InFlag = SDValue(CNode, 1); 1494 // Update the chain. 1495 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1496 } else { 1497 AddToISelQueue(N1); 1498 InFlag = 1499 SDValue(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1500 } 1501 1502 // Copy the division (low) result, if it is needed. 1503 if (!N.getValue(0).use_empty()) { 1504 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1505 LoReg, NVT, InFlag); 1506 InFlag = Result.getValue(2); 1507 ReplaceUses(N.getValue(0), Result); 1508#ifndef NDEBUG 1509 DOUT << std::string(Indent-2, ' ') << "=> "; 1510 DEBUG(Result.getNode()->dump(CurDAG)); 1511 DOUT << "\n"; 1512#endif 1513 } 1514 // Copy the remainder (high) result, if it is needed. 1515 if (!N.getValue(1).use_empty()) { 1516 SDValue Result; 1517 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1518 // Prevent use of AH in a REX instruction by referencing AX instead. 1519 // Shift it down 8 bits. 1520 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1521 X86::AX, MVT::i16, InFlag); 1522 InFlag = Result.getValue(2); 1523 Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1524 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1525 // Then truncate it down to i8. 1526 SDValue SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1527 Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1528 MVT::i8, Result, SRIdx), 0); 1529 } else { 1530 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1531 HiReg, NVT, InFlag); 1532 InFlag = Result.getValue(2); 1533 } 1534 ReplaceUses(N.getValue(1), Result); 1535#ifndef NDEBUG 1536 DOUT << std::string(Indent-2, ' ') << "=> "; 1537 DEBUG(Result.getNode()->dump(CurDAG)); 1538 DOUT << "\n"; 1539#endif 1540 } 1541 1542#ifndef NDEBUG 1543 Indent -= 2; 1544#endif 1545 1546 return NULL; 1547 } 1548 1549 case ISD::SIGN_EXTEND_INREG: { 1550 MVT SVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1551 if (SVT == MVT::i8 && !Subtarget->is64Bit()) { 1552 SDValue N0 = Node->getOperand(0); 1553 AddToISelQueue(N0); 1554 1555 SDValue TruncOp = SDValue(getTruncateTo8Bit(N0), 0); 1556 unsigned Opc = 0; 1557 switch (NVT.getSimpleVT()) { 1558 default: assert(0 && "Unknown sign_extend_inreg!"); 1559 case MVT::i16: 1560 Opc = X86::MOVSX16rr8; 1561 break; 1562 case MVT::i32: 1563 Opc = X86::MOVSX32rr8; 1564 break; 1565 } 1566 1567 SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp); 1568 1569#ifndef NDEBUG 1570 DOUT << std::string(Indent-2, ' ') << "=> "; 1571 DEBUG(TruncOp.getNode()->dump(CurDAG)); 1572 DOUT << "\n"; 1573 DOUT << std::string(Indent-2, ' ') << "=> "; 1574 DEBUG(ResNode->dump(CurDAG)); 1575 DOUT << "\n"; 1576 Indent -= 2; 1577#endif 1578 return ResNode; 1579 } 1580 break; 1581 } 1582 1583 case ISD::TRUNCATE: { 1584 if (NVT == MVT::i8 && !Subtarget->is64Bit()) { 1585 SDValue Input = Node->getOperand(0); 1586 AddToISelQueue(Node->getOperand(0)); 1587 SDNode *ResNode = getTruncateTo8Bit(Input); 1588 1589#ifndef NDEBUG 1590 DOUT << std::string(Indent-2, ' ') << "=> "; 1591 DEBUG(ResNode->dump(CurDAG)); 1592 DOUT << "\n"; 1593 Indent -= 2; 1594#endif 1595 return ResNode; 1596 } 1597 break; 1598 } 1599 1600 case ISD::DECLARE: { 1601 // Handle DECLARE nodes here because the second operand may have been 1602 // wrapped in X86ISD::Wrapper. 1603 SDValue Chain = Node->getOperand(0); 1604 SDValue N1 = Node->getOperand(1); 1605 SDValue N2 = Node->getOperand(2); 1606 if (!isa<FrameIndexSDNode>(N1)) 1607 break; 1608 int FI = cast<FrameIndexSDNode>(N1)->getIndex(); 1609 if (N2.getOpcode() == ISD::ADD && 1610 N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg) 1611 N2 = N2.getOperand(1); 1612 if (N2.getOpcode() == X86ISD::Wrapper && 1613 isa<GlobalAddressSDNode>(N2.getOperand(0))) { 1614 GlobalValue *GV = 1615 cast<GlobalAddressSDNode>(N2.getOperand(0))->getGlobal(); 1616 SDValue Tmp1 = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); 1617 SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy()); 1618 AddToISelQueue(Chain); 1619 SDValue Ops[] = { Tmp1, Tmp2, Chain }; 1620 return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, 1621 MVT::Other, Ops, 3); 1622 } 1623 break; 1624 } 1625 } 1626 1627 SDNode *ResNode = SelectCode(N); 1628 1629#ifndef NDEBUG 1630 DOUT << std::string(Indent-2, ' ') << "=> "; 1631 if (ResNode == NULL || ResNode == N.getNode()) 1632 DEBUG(N.getNode()->dump(CurDAG)); 1633 else 1634 DEBUG(ResNode->dump(CurDAG)); 1635 DOUT << "\n"; 1636 Indent -= 2; 1637#endif 1638 1639 return ResNode; 1640} 1641 1642bool X86DAGToDAGISel:: 1643SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 1644 std::vector<SDValue> &OutOps) { 1645 SDValue Op0, Op1, Op2, Op3; 1646 switch (ConstraintCode) { 1647 case 'o': // offsetable ?? 1648 case 'v': // not offsetable ?? 1649 default: return true; 1650 case 'm': // memory 1651 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3)) 1652 return true; 1653 break; 1654 } 1655 1656 OutOps.push_back(Op0); 1657 OutOps.push_back(Op1); 1658 OutOps.push_back(Op2); 1659 OutOps.push_back(Op3); 1660 AddToISelQueue(Op0); 1661 AddToISelQueue(Op1); 1662 AddToISelQueue(Op2); 1663 AddToISelQueue(Op3); 1664 return false; 1665} 1666 1667/// createX86ISelDag - This pass converts a legalized DAG into a 1668/// X86-specific DAG, ready for instruction scheduling. 1669/// 1670FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) { 1671 return new X86DAGToDAGISel(TM, Fast); 1672} 1673