X86ISelDAGToDAG.cpp revision 79964fdbaf11877c463d081ae3ffdc7a540f54c2
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/MathExtras.h" 39#include "llvm/ADT/Statistic.h" 40#include <queue> 41#include <set> 42using namespace llvm; 43 44STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); 45STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 46 47namespace { 48 static cl::opt<bool> 49 FoldAndInTest("x86-fold-and-in-test", cl::desc("Fold and operation in test"), 50 cl::init(false), cl::Hidden); 51} 52 53//===----------------------------------------------------------------------===// 54// Pattern Matcher Implementation 55//===----------------------------------------------------------------------===// 56 57namespace { 58 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 59 /// SDOperand's instead of register numbers for the leaves of the matched 60 /// tree. 61 struct X86ISelAddressMode { 62 enum { 63 RegBase, 64 FrameIndexBase 65 } BaseType; 66 67 struct { // This is really a union, discriminated by BaseType! 68 SDOperand Reg; 69 int FrameIndex; 70 } Base; 71 72 bool isRIPRel; // RIP as base? 73 unsigned Scale; 74 SDOperand IndexReg; 75 unsigned Disp; 76 GlobalValue *GV; 77 Constant *CP; 78 const char *ES; 79 int JT; 80 unsigned Align; // CP alignment. 81 82 X86ISelAddressMode() 83 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 84 GV(0), CP(0), ES(0), JT(-1), Align(0) { 85 } 86 }; 87} 88 89namespace { 90 //===--------------------------------------------------------------------===// 91 /// ISel - X86 specific code to select X86 machine instructions for 92 /// SelectionDAG operations. 93 /// 94 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 95 /// ContainsFPCode - Every instruction we select that uses or defines a FP 96 /// register should set this to true. 97 bool ContainsFPCode; 98 99 /// FastISel - Enable fast(er) instruction selection. 100 /// 101 bool FastISel; 102 103 /// TM - Keep a reference to X86TargetMachine. 104 /// 105 X86TargetMachine &TM; 106 107 /// X86Lowering - This object fully describes how to lower LLVM code to an 108 /// X86-specific SelectionDAG. 109 X86TargetLowering X86Lowering; 110 111 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 112 /// make the right decision when generating code for different targets. 113 const X86Subtarget *Subtarget; 114 115 /// GlobalBaseReg - keeps track of the virtual register mapped onto global 116 /// base register. 117 unsigned GlobalBaseReg; 118 119 public: 120 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 121 : SelectionDAGISel(X86Lowering), 122 ContainsFPCode(false), FastISel(fast), TM(tm), 123 X86Lowering(*TM.getTargetLowering()), 124 Subtarget(&TM.getSubtarget<X86Subtarget>()) {} 125 126 virtual bool runOnFunction(Function &Fn) { 127 // Make sure we re-emit a set of the global base reg if necessary 128 GlobalBaseReg = 0; 129 return SelectionDAGISel::runOnFunction(Fn); 130 } 131 132 virtual const char *getPassName() const { 133 return "X86 DAG->DAG Instruction Selection"; 134 } 135 136 /// InstructionSelectBasicBlock - This callback is invoked by 137 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 138 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); 139 140 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 141 142 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; 143 144// Include the pieces autogenerated from the target description. 145#include "X86GenDAGISel.inc" 146 147 private: 148 SDNode *Select(SDOperand N); 149 150 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, 151 bool isRoot = true, unsigned Depth = 0); 152 bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 153 bool isRoot, unsigned Depth); 154 bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 155 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 156 bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base, 157 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 158 bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 159 SDOperand N, SDOperand &Base, SDOperand &Scale, 160 SDOperand &Index, SDOperand &Disp, 161 SDOperand &InChain, SDOperand &OutChain); 162 bool TryFoldLoad(SDOperand P, SDOperand N, 163 SDOperand &Base, SDOperand &Scale, 164 SDOperand &Index, SDOperand &Disp); 165 void PreprocessForRMW(SelectionDAG &DAG); 166 void PreprocessForFPConvert(SelectionDAG &DAG); 167 168 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 169 /// inline asm expressions. 170 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op, 171 char ConstraintCode, 172 std::vector<SDOperand> &OutOps, 173 SelectionDAG &DAG); 174 175 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 176 177 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base, 178 SDOperand &Scale, SDOperand &Index, 179 SDOperand &Disp) { 180 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 181 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 182 AM.Base.Reg; 183 Scale = getI8Imm(AM.Scale); 184 Index = AM.IndexReg; 185 // These are 32-bit even in 64-bit mode since RIP relative offset 186 // is 32-bit. 187 if (AM.GV) 188 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 189 else if (AM.CP) 190 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp); 191 else if (AM.ES) 192 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 193 else if (AM.JT != -1) 194 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 195 else 196 Disp = getI32Imm(AM.Disp); 197 } 198 199 /// getI8Imm - Return a target constant with the specified value, of type 200 /// i8. 201 inline SDOperand getI8Imm(unsigned Imm) { 202 return CurDAG->getTargetConstant(Imm, MVT::i8); 203 } 204 205 /// getI16Imm - Return a target constant with the specified value, of type 206 /// i16. 207 inline SDOperand getI16Imm(unsigned Imm) { 208 return CurDAG->getTargetConstant(Imm, MVT::i16); 209 } 210 211 /// getI32Imm - Return a target constant with the specified value, of type 212 /// i32. 213 inline SDOperand getI32Imm(unsigned Imm) { 214 return CurDAG->getTargetConstant(Imm, MVT::i32); 215 } 216 217 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 218 /// base register. Return the virtual register that holds this value. 219 SDNode *getGlobalBaseReg(); 220 221 /// getTruncate - return an SDNode that implements a subreg based truncate 222 /// of the specified operand to the the specified value type. 223 SDNode *getTruncate(SDOperand N0, MVT::ValueType VT); 224 225#ifndef NDEBUG 226 unsigned Indent; 227#endif 228 }; 229} 230 231static SDNode *findFlagUse(SDNode *N) { 232 unsigned FlagResNo = N->getNumValues()-1; 233 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { 234 SDNode *User = *I; 235 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 236 SDOperand Op = User->getOperand(i); 237 if (Op.Val == N && Op.ResNo == FlagResNo) 238 return User; 239 } 240 } 241 return NULL; 242} 243 244static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, 245 SDNode *Root, SDNode *Skip, bool &found, 246 std::set<SDNode *> &Visited) { 247 if (found || 248 Use->getNodeId() > Def->getNodeId() || 249 !Visited.insert(Use).second) 250 return; 251 252 for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) { 253 SDNode *N = Use->getOperand(i).Val; 254 if (N == Skip) 255 continue; 256 if (N == Def) { 257 if (Use == ImmedUse) 258 continue; // Immediate use is ok. 259 if (Use == Root) { 260 assert(Use->getOpcode() == ISD::STORE || 261 Use->getOpcode() == X86ISD::CMP); 262 continue; 263 } 264 found = true; 265 break; 266 } 267 findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited); 268 } 269} 270 271/// isNonImmUse - Start searching from Root up the DAG to check is Def can 272/// be reached. Return true if that's the case. However, ignore direct uses 273/// by ImmedUse (which would be U in the example illustrated in 274/// CanBeFoldedBy) and by Root (which can happen in the store case). 275/// FIXME: to be really generic, we should allow direct use by any node 276/// that is being folded. But realisticly since we only fold loads which 277/// have one non-chain use, we only need to watch out for load/op/store 278/// and load/op/cmp case where the root (store / cmp) may reach the load via 279/// its chain operand. 280static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, 281 SDNode *Skip = NULL) { 282 std::set<SDNode *> Visited; 283 bool found = false; 284 findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited); 285 return found; 286} 287 288 289bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { 290 if (FastISel) return false; 291 292 // If U use can somehow reach N through another path then U can't fold N or 293 // it will create a cycle. e.g. In the following diagram, U can reach N 294 // through X. If N is folded into into U, then X is both a predecessor and 295 // a successor of U. 296 // 297 // [ N ] 298 // ^ ^ 299 // | | 300 // / \--- 301 // / [X] 302 // | ^ 303 // [U]--------| 304 305 if (isNonImmUse(Root, N, U)) 306 return false; 307 308 // If U produces a flag, then it gets (even more) interesting. Since it 309 // would have been "glued" together with its flag use, we need to check if 310 // it might reach N: 311 // 312 // [ N ] 313 // ^ ^ 314 // | | 315 // [U] \-- 316 // ^ [TF] 317 // | ^ 318 // | | 319 // \ / 320 // [FU] 321 // 322 // If FU (flag use) indirectly reach N (the load), and U fold N (call it 323 // NU), then TF is a predecessor of FU and a successor of NU. But since 324 // NU and FU are flagged together, this effectively creates a cycle. 325 bool HasFlagUse = false; 326 MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1); 327 while ((VT == MVT::Flag && !Root->use_empty())) { 328 SDNode *FU = findFlagUse(Root); 329 if (FU == NULL) 330 break; 331 else { 332 Root = FU; 333 HasFlagUse = true; 334 } 335 VT = Root->getValueType(Root->getNumValues()-1); 336 } 337 338 if (HasFlagUse) 339 return !isNonImmUse(Root, N, Root, U); 340 return true; 341} 342 343/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 344/// and move load below the TokenFactor. Replace store's chain operand with 345/// load's chain result. 346static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, 347 SDOperand Store, SDOperand TF) { 348 std::vector<SDOperand> Ops; 349 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i) 350 if (Load.Val == TF.Val->getOperand(i).Val) 351 Ops.push_back(Load.Val->getOperand(0)); 352 else 353 Ops.push_back(TF.Val->getOperand(i)); 354 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size()); 355 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 356 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 357 Store.getOperand(2), Store.getOperand(3)); 358} 359 360/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 361/// This is only run if not in -fast mode (aka -O0). 362/// This allows the instruction selector to pick more read-modify-write 363/// instructions. This is a common case: 364/// 365/// [Load chain] 366/// ^ 367/// | 368/// [Load] 369/// ^ ^ 370/// | | 371/// / \- 372/// / | 373/// [TokenFactor] [Op] 374/// ^ ^ 375/// | | 376/// \ / 377/// \ / 378/// [Store] 379/// 380/// The fact the store's chain operand != load's chain will prevent the 381/// (store (op (load))) instruction from being selected. We can transform it to: 382/// 383/// [Load chain] 384/// ^ 385/// | 386/// [TokenFactor] 387/// ^ 388/// | 389/// [Load] 390/// ^ ^ 391/// | | 392/// | \- 393/// | | 394/// | [Op] 395/// | ^ 396/// | | 397/// \ / 398/// \ / 399/// [Store] 400void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) { 401 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 402 E = DAG.allnodes_end(); I != E; ++I) { 403 if (!ISD::isNON_TRUNCStore(I)) 404 continue; 405 SDOperand Chain = I->getOperand(0); 406 if (Chain.Val->getOpcode() != ISD::TokenFactor) 407 continue; 408 409 SDOperand N1 = I->getOperand(1); 410 SDOperand N2 = I->getOperand(2); 411 if (MVT::isFloatingPoint(N1.getValueType()) || 412 MVT::isVector(N1.getValueType()) || 413 !N1.hasOneUse()) 414 continue; 415 416 bool RModW = false; 417 SDOperand Load; 418 unsigned Opcode = N1.Val->getOpcode(); 419 switch (Opcode) { 420 case ISD::ADD: 421 case ISD::MUL: 422 case ISD::AND: 423 case ISD::OR: 424 case ISD::XOR: 425 case ISD::ADDC: 426 case ISD::ADDE: { 427 SDOperand N10 = N1.getOperand(0); 428 SDOperand N11 = N1.getOperand(1); 429 if (ISD::isNON_EXTLoad(N10.Val)) 430 RModW = true; 431 else if (ISD::isNON_EXTLoad(N11.Val)) { 432 RModW = true; 433 std::swap(N10, N11); 434 } 435 RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() && 436 (N10.getOperand(1) == N2) && 437 (N10.Val->getValueType(0) == N1.getValueType()); 438 if (RModW) 439 Load = N10; 440 break; 441 } 442 case ISD::SUB: 443 case ISD::SHL: 444 case ISD::SRA: 445 case ISD::SRL: 446 case ISD::ROTL: 447 case ISD::ROTR: 448 case ISD::SUBC: 449 case ISD::SUBE: 450 case X86ISD::SHLD: 451 case X86ISD::SHRD: { 452 SDOperand N10 = N1.getOperand(0); 453 if (ISD::isNON_EXTLoad(N10.Val)) 454 RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() && 455 (N10.getOperand(1) == N2) && 456 (N10.Val->getValueType(0) == N1.getValueType()); 457 if (RModW) 458 Load = N10; 459 break; 460 } 461 } 462 463 if (RModW) { 464 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain); 465 ++NumLoadMoved; 466 } 467 } 468} 469 470 471/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 472/// nodes that target the FP stack to be store and load to the stack. This is a 473/// gross hack. We would like to simply mark these as being illegal, but when 474/// we do that, legalize produces these when it expands calls, then expands 475/// these in the same legalize pass. We would like dag combine to be able to 476/// hack on these between the call expansion and the node legalization. As such 477/// this pass basically does "really late" legalization of these inline with the 478/// X86 isel pass. 479void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) { 480 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 481 E = DAG.allnodes_end(); I != E; ) { 482 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 483 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 484 continue; 485 486 // If the source and destination are SSE registers, then this is a legal 487 // conversion that should not be lowered. 488 MVT::ValueType SrcVT = N->getOperand(0).getValueType(); 489 MVT::ValueType DstVT = N->getValueType(0); 490 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 491 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 492 if (SrcIsSSE && DstIsSSE) 493 continue; 494 495 // If this is an FPStack extension (but not a truncation), it is a noop. 496 if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND) 497 continue; 498 499 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 500 // FPStack has extload and truncstore. SSE can fold direct loads into other 501 // operations. Based on this, decide what we want to do. 502 MVT::ValueType MemVT; 503 if (N->getOpcode() == ISD::FP_ROUND) 504 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 505 else 506 MemVT = SrcIsSSE ? SrcVT : DstVT; 507 508 SDOperand MemTmp = DAG.CreateStackTemporary(MemVT); 509 510 // FIXME: optimize the case where the src/dest is a load or store? 511 SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0), 512 MemTmp, NULL, 0, MemVT); 513 SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, 514 NULL, 0, MemVT); 515 516 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 517 // extload we created. This will cause general havok on the dag because 518 // anything below the conversion could be folded into other existing nodes. 519 // To avoid invalidating 'I', back it up to the convert node. 520 --I; 521 DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result); 522 523 // Now that we did that, the node is dead. Increment the iterator to the 524 // next node to process, then delete N. 525 ++I; 526 DAG.DeleteNode(N); 527 } 528} 529 530/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 531/// when it has created a SelectionDAG for us to codegen. 532void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { 533 DEBUG(BB->dump()); 534 MachineFunction::iterator FirstMBB = BB; 535 536 if (!FastISel) 537 PreprocessForRMW(DAG); 538 539 // FIXME: This should only happen when not -fast. 540 PreprocessForFPConvert(DAG); 541 542 // Codegen the basic block. 543#ifndef NDEBUG 544 DOUT << "===== Instruction selection begins:\n"; 545 Indent = 0; 546#endif 547 DAG.setRoot(SelectRoot(DAG.getRoot())); 548#ifndef NDEBUG 549 DOUT << "===== Instruction selection ends:\n"; 550#endif 551 552 DAG.RemoveDeadNodes(); 553 554 // Emit machine code to BB. 555 ScheduleAndEmitDAG(DAG); 556 557 // If we are emitting FP stack code, scan the basic block to determine if this 558 // block defines any FP values. If so, put an FP_REG_KILL instruction before 559 // the terminator of the block. 560 561 // Note that FP stack instructions are used in all modes for long double, 562 // so we always need to do this check. 563 // Also note that it's possible for an FP stack register to be live across 564 // an instruction that produces multiple basic blocks (SSE CMOV) so we 565 // must check all the generated basic blocks. 566 567 // Scan all of the machine instructions in these MBBs, checking for FP 568 // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) 569 MachineFunction::iterator MBBI = FirstMBB; 570 do { 571 bool ContainsFPCode = false; 572 for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end(); 573 !ContainsFPCode && I != E; ++I) { 574 if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { 575 const TargetRegisterClass *clas; 576 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { 577 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && 578 TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && 579 ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == 580 X86::RFP32RegisterClass || 581 clas == X86::RFP64RegisterClass || 582 clas == X86::RFP80RegisterClass)) { 583 ContainsFPCode = true; 584 break; 585 } 586 } 587 } 588 } 589 // Check PHI nodes in successor blocks. These PHI's will be lowered to have 590 // a copy of the input value in this block. In SSE mode, we only care about 591 // 80-bit values. 592 if (!ContainsFPCode) { 593 // Final check, check LLVM BB's that are successors to the LLVM BB 594 // corresponding to BB for FP PHI nodes. 595 const BasicBlock *LLVMBB = BB->getBasicBlock(); 596 const PHINode *PN; 597 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); 598 !ContainsFPCode && SI != E; ++SI) { 599 for (BasicBlock::const_iterator II = SI->begin(); 600 (PN = dyn_cast<PHINode>(II)); ++II) { 601 if (PN->getType()==Type::X86_FP80Ty || 602 (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || 603 (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { 604 ContainsFPCode = true; 605 break; 606 } 607 } 608 } 609 } 610 // Finally, if we found any FP code, emit the FP_REG_KILL instruction. 611 if (ContainsFPCode) { 612 BuildMI(*MBBI, MBBI->getFirstTerminator(), 613 TM.getInstrInfo()->get(X86::FP_REG_KILL)); 614 ++NumFPKill; 615 } 616 } while (&*(MBBI++) != BB); 617} 618 619/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 620/// the main function. 621void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 622 MachineFrameInfo *MFI) { 623 const TargetInstrInfo *TII = TM.getInstrInfo(); 624 if (Subtarget->isTargetCygMing()) 625 BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 626} 627 628void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 629 // If this is main, emit special code for main. 630 MachineBasicBlock *BB = MF.begin(); 631 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 632 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 633} 634 635/// MatchAddress - Add the specified node to the specified addressing mode, 636/// returning true if it cannot be done. This just pattern matches for the 637/// addressing mode. 638bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, 639 bool isRoot, unsigned Depth) { 640 // Limit recursion. 641 if (Depth > 5) 642 return MatchAddressBase(N, AM, isRoot, Depth); 643 644 // RIP relative addressing: %rip + 32-bit displacement! 645 if (AM.isRIPRel) { 646 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 647 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 648 if (isInt32(AM.Disp + Val)) { 649 AM.Disp += Val; 650 return false; 651 } 652 } 653 return true; 654 } 655 656 int id = N.Val->getNodeId(); 657 bool AlreadySelected = isSelected(id); // Already selected, not yet replaced. 658 659 switch (N.getOpcode()) { 660 default: break; 661 case ISD::Constant: { 662 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 663 if (isInt32(AM.Disp + Val)) { 664 AM.Disp += Val; 665 return false; 666 } 667 break; 668 } 669 670 case X86ISD::Wrapper: { 671 bool is64Bit = Subtarget->is64Bit(); 672 // Under X86-64 non-small code model, GV (and friends) are 64-bits. 673 // Also, base and index reg must be 0 in order to use rip as base. 674 if (is64Bit && (TM.getCodeModel() != CodeModel::Small || 675 AM.Base.Reg.Val || AM.IndexReg.Val)) 676 break; 677 if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) 678 break; 679 // If value is available in a register both base and index components have 680 // been picked, we can't fit the result available in the register in the 681 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 682 if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) { 683 SDOperand N0 = N.getOperand(0); 684 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 685 GlobalValue *GV = G->getGlobal(); 686 AM.GV = GV; 687 AM.Disp += G->getOffset(); 688 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 689 Subtarget->isPICStyleRIPRel(); 690 return false; 691 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 692 AM.CP = CP->getConstVal(); 693 AM.Align = CP->getAlignment(); 694 AM.Disp += CP->getOffset(); 695 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 696 Subtarget->isPICStyleRIPRel(); 697 return false; 698 } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { 699 AM.ES = S->getSymbol(); 700 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 701 Subtarget->isPICStyleRIPRel(); 702 return false; 703 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 704 AM.JT = J->getIndex(); 705 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 706 Subtarget->isPICStyleRIPRel(); 707 return false; 708 } 709 } 710 break; 711 } 712 713 case ISD::FrameIndex: 714 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) { 715 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 716 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 717 return false; 718 } 719 break; 720 721 case ISD::SHL: 722 if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel) 723 break; 724 725 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) { 726 unsigned Val = CN->getValue(); 727 if (Val == 1 || Val == 2 || Val == 3) { 728 AM.Scale = 1 << Val; 729 SDOperand ShVal = N.Val->getOperand(0); 730 731 // Okay, we know that we have a scale by now. However, if the scaled 732 // value is an add of something and a constant, we can fold the 733 // constant into the disp field here. 734 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 735 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) { 736 AM.IndexReg = ShVal.Val->getOperand(0); 737 ConstantSDNode *AddVal = 738 cast<ConstantSDNode>(ShVal.Val->getOperand(1)); 739 uint64_t Disp = AM.Disp + (AddVal->getValue() << Val); 740 if (isInt32(Disp)) 741 AM.Disp = Disp; 742 else 743 AM.IndexReg = ShVal; 744 } else { 745 AM.IndexReg = ShVal; 746 } 747 return false; 748 } 749 break; 750 } 751 752 case ISD::SMUL_LOHI: 753 case ISD::UMUL_LOHI: 754 // A mul_lohi where we need the low part can be folded as a plain multiply. 755 if (N.ResNo != 0) break; 756 // FALL THROUGH 757 case ISD::MUL: 758 // X*[3,5,9] -> X+X*[2,4,8] 759 if (!AlreadySelected && 760 AM.BaseType == X86ISelAddressMode::RegBase && 761 AM.Base.Reg.Val == 0 && 762 AM.IndexReg.Val == 0 && 763 !AM.isRIPRel) { 764 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) 765 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { 766 AM.Scale = unsigned(CN->getValue())-1; 767 768 SDOperand MulVal = N.Val->getOperand(0); 769 SDOperand Reg; 770 771 // Okay, we know that we have a scale by now. However, if the scaled 772 // value is an add of something and a constant, we can fold the 773 // constant into the disp field here. 774 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 775 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) { 776 Reg = MulVal.Val->getOperand(0); 777 ConstantSDNode *AddVal = 778 cast<ConstantSDNode>(MulVal.Val->getOperand(1)); 779 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue(); 780 if (isInt32(Disp)) 781 AM.Disp = Disp; 782 else 783 Reg = N.Val->getOperand(0); 784 } else { 785 Reg = N.Val->getOperand(0); 786 } 787 788 AM.IndexReg = AM.Base.Reg = Reg; 789 return false; 790 } 791 } 792 break; 793 794 case ISD::ADD: 795 if (!AlreadySelected) { 796 X86ISelAddressMode Backup = AM; 797 if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) && 798 !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1)) 799 return false; 800 AM = Backup; 801 if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) && 802 !MatchAddress(N.Val->getOperand(0), AM, false, Depth+1)) 803 return false; 804 AM = Backup; 805 } 806 break; 807 808 case ISD::OR: 809 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 810 if (AlreadySelected) break; 811 812 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 813 X86ISelAddressMode Backup = AM; 814 // Start with the LHS as an addr mode. 815 if (!MatchAddress(N.getOperand(0), AM, false) && 816 // Address could not have picked a GV address for the displacement. 817 AM.GV == NULL && 818 // On x86-64, the resultant disp must fit in 32-bits. 819 isInt32(AM.Disp + CN->getSignExtended()) && 820 // Check to see if the LHS & C is zero. 821 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getValue())) { 822 AM.Disp += CN->getValue(); 823 return false; 824 } 825 AM = Backup; 826 } 827 break; 828 829 case ISD::AND: { 830 // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 831 // allows us to fold the shift into this addressing mode. 832 if (AlreadySelected) break; 833 SDOperand Shift = N.getOperand(0); 834 if (Shift.getOpcode() != ISD::SHL) break; 835 836 // Scale must not be used already. 837 if (AM.IndexReg.Val != 0 || AM.Scale != 1) break; 838 839 // Not when RIP is used as the base. 840 if (AM.isRIPRel) break; 841 842 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 843 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 844 if (!C1 || !C2) break; 845 846 // Not likely to be profitable if either the AND or SHIFT node has more 847 // than one use (unless all uses are for address computation). Besides, 848 // isel mechanism requires their node ids to be reused. 849 if (!N.hasOneUse() || !Shift.hasOneUse()) 850 break; 851 852 // Verify that the shift amount is something we can fold. 853 unsigned ShiftCst = C1->getValue(); 854 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 855 break; 856 857 // Get the new AND mask, this folds to a constant. 858 SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(), 859 SDOperand(C2, 0), SDOperand(C1, 0)); 860 SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(), 861 Shift.getOperand(0), NewANDMask); 862 NewANDMask.Val->setNodeId(Shift.Val->getNodeId()); 863 NewAND.Val->setNodeId(N.Val->getNodeId()); 864 865 AM.Scale = 1 << ShiftCst; 866 AM.IndexReg = NewAND; 867 return false; 868 } 869 } 870 871 return MatchAddressBase(N, AM, isRoot, Depth); 872} 873 874/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 875/// specified addressing mode without any further recursion. 876bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 877 bool isRoot, unsigned Depth) { 878 // Is the base register already occupied? 879 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { 880 // If so, check to see if the scale index register is set. 881 if (AM.IndexReg.Val == 0 && !AM.isRIPRel) { 882 AM.IndexReg = N; 883 AM.Scale = 1; 884 return false; 885 } 886 887 // Otherwise, we cannot select it. 888 return true; 889 } 890 891 // Default, generate it as a register. 892 AM.BaseType = X86ISelAddressMode::RegBase; 893 AM.Base.Reg = N; 894 return false; 895} 896 897/// SelectAddr - returns true if it is able pattern match an addressing mode. 898/// It returns the operands which make up the maximal addressing mode it can 899/// match by reference. 900bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 901 SDOperand &Scale, SDOperand &Index, 902 SDOperand &Disp) { 903 X86ISelAddressMode AM; 904 if (MatchAddress(N, AM)) 905 return false; 906 907 MVT::ValueType VT = N.getValueType(); 908 if (AM.BaseType == X86ISelAddressMode::RegBase) { 909 if (!AM.Base.Reg.Val) 910 AM.Base.Reg = CurDAG->getRegister(0, VT); 911 } 912 913 if (!AM.IndexReg.Val) 914 AM.IndexReg = CurDAG->getRegister(0, VT); 915 916 getAddressOperands(AM, Base, Scale, Index, Disp); 917 return true; 918} 919 920/// isZeroNode - Returns true if Elt is a constant zero or a floating point 921/// constant +0.0. 922static inline bool isZeroNode(SDOperand Elt) { 923 return ((isa<ConstantSDNode>(Elt) && 924 cast<ConstantSDNode>(Elt)->getValue() == 0) || 925 (isa<ConstantFPSDNode>(Elt) && 926 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 927} 928 929 930/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 931/// match a load whose top elements are either undef or zeros. The load flavor 932/// is derived from the type of N, which is either v4f32 or v2f64. 933bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 934 SDOperand N, SDOperand &Base, 935 SDOperand &Scale, SDOperand &Index, 936 SDOperand &Disp, SDOperand &InChain, 937 SDOperand &OutChain) { 938 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 939 InChain = N.getOperand(0).getValue(1); 940 if (ISD::isNON_EXTLoad(InChain.Val) && 941 InChain.getValue(0).hasOneUse() && 942 N.hasOneUse() && 943 CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) { 944 LoadSDNode *LD = cast<LoadSDNode>(InChain); 945 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 946 return false; 947 OutChain = LD->getChain(); 948 return true; 949 } 950 } 951 952 // Also handle the case where we explicitly require zeros in the top 953 // elements. This is a vector shuffle from the zero vector. 954 if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() && 955 // Check to see if the top elements are all zeros (or bitcast of zeros). 956 ISD::isBuildVectorAllZeros(N.getOperand(0).Val) && 957 N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR && 958 N.getOperand(1).Val->hasOneUse() && 959 ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) && 960 N.getOperand(1).getOperand(0).hasOneUse()) { 961 // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something 962 // from the LHS. 963 unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType()); 964 SDOperand ShufMask = N.getOperand(2); 965 assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!"); 966 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) { 967 if (C->getValue() == VecWidth) { 968 for (unsigned i = 1; i != VecWidth; ++i) { 969 if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) { 970 // ok. 971 } else { 972 ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i)); 973 if (C->getValue() >= VecWidth) return false; 974 } 975 } 976 } 977 978 // Okay, this is a zero extending load. Fold it. 979 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0)); 980 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 981 return false; 982 OutChain = LD->getChain(); 983 InChain = SDOperand(LD, 1); 984 return true; 985 } 986 } 987 return false; 988} 989 990 991/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 992/// mode it matches can be cost effectively emitted as an LEA instruction. 993bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N, 994 SDOperand &Base, SDOperand &Scale, 995 SDOperand &Index, SDOperand &Disp) { 996 X86ISelAddressMode AM; 997 if (MatchAddress(N, AM)) 998 return false; 999 1000 MVT::ValueType VT = N.getValueType(); 1001 unsigned Complexity = 0; 1002 if (AM.BaseType == X86ISelAddressMode::RegBase) 1003 if (AM.Base.Reg.Val) 1004 Complexity = 1; 1005 else 1006 AM.Base.Reg = CurDAG->getRegister(0, VT); 1007 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1008 Complexity = 4; 1009 1010 if (AM.IndexReg.Val) 1011 Complexity++; 1012 else 1013 AM.IndexReg = CurDAG->getRegister(0, VT); 1014 1015 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1016 // a simple shift. 1017 if (AM.Scale > 1) 1018 Complexity++; 1019 1020 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1021 // to a LEA. This is determined with some expermentation but is by no means 1022 // optimal (especially for code size consideration). LEA is nice because of 1023 // its three-address nature. Tweak the cost function again when we can run 1024 // convertToThreeAddress() at register allocation time. 1025 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { 1026 // For X86-64, we should always use lea to materialize RIP relative 1027 // addresses. 1028 if (Subtarget->is64Bit()) 1029 Complexity = 4; 1030 else 1031 Complexity += 2; 1032 } 1033 1034 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val)) 1035 Complexity++; 1036 1037 if (Complexity > 2) { 1038 getAddressOperands(AM, Base, Scale, Index, Disp); 1039 return true; 1040 } 1041 return false; 1042} 1043 1044bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N, 1045 SDOperand &Base, SDOperand &Scale, 1046 SDOperand &Index, SDOperand &Disp) { 1047 if (ISD::isNON_EXTLoad(N.Val) && 1048 N.hasOneUse() && 1049 CanBeFoldedBy(N.Val, P.Val, P.Val)) 1050 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp); 1051 return false; 1052} 1053 1054/// getGlobalBaseReg - Output the instructions required to put the 1055/// base address to use for accessing globals into a register. 1056/// 1057SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1058 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); 1059 if (!GlobalBaseReg) { 1060 // Insert the set of GlobalBaseReg into the first MBB of the function 1061 MachineFunction *MF = BB->getParent(); 1062 MachineBasicBlock &FirstMBB = MF->front(); 1063 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 1064 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 1065 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1066 1067 const TargetInstrInfo *TII = TM.getInstrInfo(); 1068 // Operand of MovePCtoStack is completely ignored by asm printer. It's 1069 // only used in JIT code emission as displacement to pc. 1070 BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0); 1071 1072 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 1073 // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external 1074 if (TM.getRelocationModel() == Reloc::PIC_ && 1075 Subtarget->isPICStyleGOT()) { 1076 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1077 BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg) 1078 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); 1079 } else { 1080 GlobalBaseReg = PC; 1081 } 1082 1083 } 1084 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val; 1085} 1086 1087static SDNode *FindCallStartFromCall(SDNode *Node) { 1088 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1089 assert(Node->getOperand(0).getValueType() == MVT::Other && 1090 "Node doesn't have a token chain argument!"); 1091 return FindCallStartFromCall(Node->getOperand(0).Val); 1092} 1093 1094SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) { 1095 SDOperand SRIdx; 1096 switch (VT) { 1097 case MVT::i8: 1098 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1099 // Ensure that the source register has an 8-bit subreg on 32-bit targets 1100 if (!Subtarget->is64Bit()) { 1101 unsigned Opc; 1102 MVT::ValueType VT; 1103 switch (N0.getValueType()) { 1104 default: assert(0 && "Unknown truncate!"); 1105 case MVT::i16: 1106 Opc = X86::MOV16to16_; 1107 VT = MVT::i16; 1108 break; 1109 case MVT::i32: 1110 Opc = X86::MOV32to32_; 1111 VT = MVT::i32; 1112 break; 1113 } 1114 N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0); 1115 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1116 VT, N0, SRIdx, N0.getValue(1)); 1117 } 1118 break; 1119 case MVT::i16: 1120 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1121 break; 1122 case MVT::i32: 1123 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1124 break; 1125 default: assert(0 && "Unknown truncate!"); break; 1126 } 1127 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx); 1128} 1129 1130 1131SDNode *X86DAGToDAGISel::Select(SDOperand N) { 1132 SDNode *Node = N.Val; 1133 MVT::ValueType NVT = Node->getValueType(0); 1134 unsigned Opc, MOpc; 1135 unsigned Opcode = Node->getOpcode(); 1136 1137#ifndef NDEBUG 1138 DOUT << std::string(Indent, ' ') << "Selecting: "; 1139 DEBUG(Node->dump(CurDAG)); 1140 DOUT << "\n"; 1141 Indent += 2; 1142#endif 1143 1144 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { 1145#ifndef NDEBUG 1146 DOUT << std::string(Indent-2, ' ') << "== "; 1147 DEBUG(Node->dump(CurDAG)); 1148 DOUT << "\n"; 1149 Indent -= 2; 1150#endif 1151 return NULL; // Already selected. 1152 } 1153 1154 switch (Opcode) { 1155 default: break; 1156 case X86ISD::GlobalBaseReg: 1157 return getGlobalBaseReg(); 1158 1159 case X86ISD::FP_GET_RESULT2: { 1160 SDOperand Chain = N.getOperand(0); 1161 SDOperand InFlag = N.getOperand(1); 1162 AddToISelQueue(Chain); 1163 AddToISelQueue(InFlag); 1164 std::vector<MVT::ValueType> Tys; 1165 Tys.push_back(MVT::f80); 1166 Tys.push_back(MVT::f80); 1167 Tys.push_back(MVT::Other); 1168 Tys.push_back(MVT::Flag); 1169 SDOperand Ops[] = { Chain, InFlag }; 1170 SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys, 1171 Ops, 2); 1172 Chain = SDOperand(ResNode, 2); 1173 InFlag = SDOperand(ResNode, 3); 1174 ReplaceUses(SDOperand(N.Val, 2), Chain); 1175 ReplaceUses(SDOperand(N.Val, 3), InFlag); 1176 return ResNode; 1177 } 1178 1179 case ISD::ADD: { 1180 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd 1181 // code and is matched first so to prevent it from being turned into 1182 // LEA32r X+c. 1183 // In 64-bit small code size mode, use LEA to take advantage of 1184 // RIP-relative addressing. 1185 if (TM.getCodeModel() != CodeModel::Small) 1186 break; 1187 MVT::ValueType PtrVT = TLI.getPointerTy(); 1188 SDOperand N0 = N.getOperand(0); 1189 SDOperand N1 = N.getOperand(1); 1190 if (N.Val->getValueType(0) == PtrVT && 1191 N0.getOpcode() == X86ISD::Wrapper && 1192 N1.getOpcode() == ISD::Constant) { 1193 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue(); 1194 SDOperand C(0, 0); 1195 // TODO: handle ExternalSymbolSDNode. 1196 if (GlobalAddressSDNode *G = 1197 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { 1198 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, 1199 G->getOffset() + Offset); 1200 } else if (ConstantPoolSDNode *CP = 1201 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { 1202 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT, 1203 CP->getAlignment(), 1204 CP->getOffset()+Offset); 1205 } 1206 1207 if (C.Val) { 1208 if (Subtarget->is64Bit()) { 1209 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), 1210 CurDAG->getRegister(0, PtrVT), C }; 1211 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4); 1212 } else 1213 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C); 1214 } 1215 } 1216 1217 // Other cases are handled by auto-generated code. 1218 break; 1219 } 1220 1221 case ISD::SMUL_LOHI: 1222 case ISD::UMUL_LOHI: { 1223 SDOperand N0 = Node->getOperand(0); 1224 SDOperand N1 = Node->getOperand(1); 1225 1226 // There are several forms of IMUL that just return the low part and 1227 // don't have fixed-register operands. If we don't need the high part, 1228 // use these instead. They can be selected with the generated ISel code. 1229 if (NVT != MVT::i8 && 1230 N.getValue(1).use_empty()) { 1231 N = CurDAG->getNode(ISD::MUL, NVT, N0, N1); 1232 break; 1233 } 1234 1235 bool isSigned = Opcode == ISD::SMUL_LOHI; 1236 if (!isSigned) 1237 switch (NVT) { 1238 default: assert(0 && "Unsupported VT!"); 1239 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1240 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1241 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1242 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1243 } 1244 else 1245 switch (NVT) { 1246 default: assert(0 && "Unsupported VT!"); 1247 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1248 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1249 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1250 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1251 } 1252 1253 unsigned LoReg, HiReg; 1254 switch (NVT) { 1255 default: assert(0 && "Unsupported VT!"); 1256 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1257 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1258 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1259 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1260 } 1261 1262 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1263 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1264 // multiplty is commmutative 1265 if (!foldedLoad) { 1266 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); 1267 if (foldedLoad) 1268 std::swap(N0, N1); 1269 } 1270 1271 AddToISelQueue(N0); 1272 SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, 1273 N0, SDOperand()).getValue(1); 1274 1275 if (foldedLoad) { 1276 AddToISelQueue(N1.getOperand(0)); 1277 AddToISelQueue(Tmp0); 1278 AddToISelQueue(Tmp1); 1279 AddToISelQueue(Tmp2); 1280 AddToISelQueue(Tmp3); 1281 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1282 SDNode *CNode = 1283 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1284 InFlag = SDOperand(CNode, 1); 1285 // Update the chain. 1286 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1287 } else { 1288 AddToISelQueue(N1); 1289 InFlag = 1290 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1291 } 1292 1293 // Copy the low half of the result, if it is needed. 1294 if (!N.getValue(0).use_empty()) { 1295 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1296 LoReg, NVT, InFlag); 1297 InFlag = Result.getValue(2); 1298 ReplaceUses(N.getValue(0), Result); 1299#ifndef NDEBUG 1300 DOUT << std::string(Indent-2, ' ') << "=> "; 1301 DEBUG(Result.Val->dump(CurDAG)); 1302 DOUT << "\n"; 1303#endif 1304 } 1305 // Copy the high half of the result, if it is needed. 1306 if (!N.getValue(1).use_empty()) { 1307 SDOperand Result; 1308 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1309 // Prevent use of AH in a REX instruction by referencing AX instead. 1310 // Shift it down 8 bits. 1311 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1312 X86::AX, MVT::i16, InFlag); 1313 InFlag = Result.getValue(2); 1314 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1315 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1316 // Then truncate it down to i8. 1317 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1318 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1319 MVT::i8, Result, SRIdx), 0); 1320 } else { 1321 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1322 HiReg, NVT, InFlag); 1323 InFlag = Result.getValue(2); 1324 } 1325 ReplaceUses(N.getValue(1), Result); 1326#ifndef NDEBUG 1327 DOUT << std::string(Indent-2, ' ') << "=> "; 1328 DEBUG(Result.Val->dump(CurDAG)); 1329 DOUT << "\n"; 1330#endif 1331 } 1332 1333#ifndef NDEBUG 1334 Indent -= 2; 1335#endif 1336 1337 return NULL; 1338 } 1339 1340 case ISD::SDIVREM: 1341 case ISD::UDIVREM: { 1342 SDOperand N0 = Node->getOperand(0); 1343 SDOperand N1 = Node->getOperand(1); 1344 1345 bool isSigned = Opcode == ISD::SDIVREM; 1346 if (!isSigned) 1347 switch (NVT) { 1348 default: assert(0 && "Unsupported VT!"); 1349 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1350 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1351 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1352 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1353 } 1354 else 1355 switch (NVT) { 1356 default: assert(0 && "Unsupported VT!"); 1357 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1358 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1359 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1360 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1361 } 1362 1363 unsigned LoReg, HiReg; 1364 unsigned ClrOpcode, SExtOpcode; 1365 switch (NVT) { 1366 default: assert(0 && "Unsupported VT!"); 1367 case MVT::i8: 1368 LoReg = X86::AL; HiReg = X86::AH; 1369 ClrOpcode = 0; 1370 SExtOpcode = X86::CBW; 1371 break; 1372 case MVT::i16: 1373 LoReg = X86::AX; HiReg = X86::DX; 1374 ClrOpcode = X86::MOV16r0; 1375 SExtOpcode = X86::CWD; 1376 break; 1377 case MVT::i32: 1378 LoReg = X86::EAX; HiReg = X86::EDX; 1379 ClrOpcode = X86::MOV32r0; 1380 SExtOpcode = X86::CDQ; 1381 break; 1382 case MVT::i64: 1383 LoReg = X86::RAX; HiReg = X86::RDX; 1384 ClrOpcode = X86::MOV64r0; 1385 SExtOpcode = X86::CQO; 1386 break; 1387 } 1388 1389 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1390 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1391 1392 SDOperand InFlag; 1393 if (NVT == MVT::i8 && !isSigned) { 1394 // Special case for div8, just use a move with zero extension to AX to 1395 // clear the upper 8 bits (AH). 1396 SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain; 1397 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) { 1398 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) }; 1399 AddToISelQueue(N0.getOperand(0)); 1400 AddToISelQueue(Tmp0); 1401 AddToISelQueue(Tmp1); 1402 AddToISelQueue(Tmp2); 1403 AddToISelQueue(Tmp3); 1404 Move = 1405 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other, 1406 Ops, 5), 0); 1407 Chain = Move.getValue(1); 1408 ReplaceUses(N0.getValue(1), Chain); 1409 } else { 1410 AddToISelQueue(N0); 1411 Move = 1412 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0); 1413 Chain = CurDAG->getEntryNode(); 1414 } 1415 Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand()); 1416 InFlag = Chain.getValue(1); 1417 } else { 1418 AddToISelQueue(N0); 1419 InFlag = 1420 CurDAG->getCopyToReg(CurDAG->getEntryNode(), 1421 LoReg, N0, SDOperand()).getValue(1); 1422 if (isSigned) { 1423 // Sign extend the low part into the high part. 1424 InFlag = 1425 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); 1426 } else { 1427 // Zero out the high part, effectively zero extending the input. 1428 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0); 1429 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, 1430 ClrNode, InFlag).getValue(1); 1431 } 1432 } 1433 1434 if (foldedLoad) { 1435 AddToISelQueue(N1.getOperand(0)); 1436 AddToISelQueue(Tmp0); 1437 AddToISelQueue(Tmp1); 1438 AddToISelQueue(Tmp2); 1439 AddToISelQueue(Tmp3); 1440 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1441 SDNode *CNode = 1442 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1443 InFlag = SDOperand(CNode, 1); 1444 // Update the chain. 1445 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1446 } else { 1447 AddToISelQueue(N1); 1448 InFlag = 1449 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1450 } 1451 1452 // Copy the division (low) result, if it is needed. 1453 if (!N.getValue(0).use_empty()) { 1454 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1455 LoReg, NVT, InFlag); 1456 InFlag = Result.getValue(2); 1457 ReplaceUses(N.getValue(0), Result); 1458#ifndef NDEBUG 1459 DOUT << std::string(Indent-2, ' ') << "=> "; 1460 DEBUG(Result.Val->dump(CurDAG)); 1461 DOUT << "\n"; 1462#endif 1463 } 1464 // Copy the remainder (high) result, if it is needed. 1465 if (!N.getValue(1).use_empty()) { 1466 SDOperand Result; 1467 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1468 // Prevent use of AH in a REX instruction by referencing AX instead. 1469 // Shift it down 8 bits. 1470 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1471 X86::AX, MVT::i16, InFlag); 1472 InFlag = Result.getValue(2); 1473 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1474 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1475 // Then truncate it down to i8. 1476 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1477 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1478 MVT::i8, Result, SRIdx), 0); 1479 } else { 1480 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1481 HiReg, NVT, InFlag); 1482 InFlag = Result.getValue(2); 1483 } 1484 ReplaceUses(N.getValue(1), Result); 1485#ifndef NDEBUG 1486 DOUT << std::string(Indent-2, ' ') << "=> "; 1487 DEBUG(Result.Val->dump(CurDAG)); 1488 DOUT << "\n"; 1489#endif 1490 } 1491 1492#ifndef NDEBUG 1493 Indent -= 2; 1494#endif 1495 1496 return NULL; 1497 } 1498 1499 case ISD::ANY_EXTEND: { 1500 SDOperand N0 = Node->getOperand(0); 1501 AddToISelQueue(N0); 1502 if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) { 1503 SDOperand SRIdx; 1504 switch(N0.getValueType()) { 1505 case MVT::i32: 1506 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1507 break; 1508 case MVT::i16: 1509 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1510 break; 1511 case MVT::i8: 1512 if (Subtarget->is64Bit()) 1513 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1514 break; 1515 default: assert(0 && "Unknown any_extend!"); 1516 } 1517 if (SRIdx.Val) { 1518 SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, 1519 NVT, N0, SRIdx); 1520 1521#ifndef NDEBUG 1522 DOUT << std::string(Indent-2, ' ') << "=> "; 1523 DEBUG(ResNode->dump(CurDAG)); 1524 DOUT << "\n"; 1525 Indent -= 2; 1526#endif 1527 return ResNode; 1528 } // Otherwise let generated ISel handle it. 1529 } 1530 break; 1531 } 1532 1533 case ISD::SIGN_EXTEND_INREG: { 1534 SDOperand N0 = Node->getOperand(0); 1535 AddToISelQueue(N0); 1536 1537 MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1538 SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0); 1539 unsigned Opc = 0; 1540 switch (NVT) { 1541 case MVT::i16: 1542 if (SVT == MVT::i8) Opc = X86::MOVSX16rr8; 1543 else assert(0 && "Unknown sign_extend_inreg!"); 1544 break; 1545 case MVT::i32: 1546 switch (SVT) { 1547 case MVT::i8: Opc = X86::MOVSX32rr8; break; 1548 case MVT::i16: Opc = X86::MOVSX32rr16; break; 1549 default: assert(0 && "Unknown sign_extend_inreg!"); 1550 } 1551 break; 1552 case MVT::i64: 1553 switch (SVT) { 1554 case MVT::i8: Opc = X86::MOVSX64rr8; break; 1555 case MVT::i16: Opc = X86::MOVSX64rr16; break; 1556 case MVT::i32: Opc = X86::MOVSX64rr32; break; 1557 default: assert(0 && "Unknown sign_extend_inreg!"); 1558 } 1559 break; 1560 default: assert(0 && "Unknown sign_extend_inreg!"); 1561 } 1562 1563 SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp); 1564 1565#ifndef NDEBUG 1566 DOUT << std::string(Indent-2, ' ') << "=> "; 1567 DEBUG(TruncOp.Val->dump(CurDAG)); 1568 DOUT << "\n"; 1569 DOUT << std::string(Indent-2, ' ') << "=> "; 1570 DEBUG(ResNode->dump(CurDAG)); 1571 DOUT << "\n"; 1572 Indent -= 2; 1573#endif 1574 return ResNode; 1575 break; 1576 } 1577 1578 case ISD::TRUNCATE: { 1579 SDOperand Input = Node->getOperand(0); 1580 AddToISelQueue(Node->getOperand(0)); 1581 SDNode *ResNode = getTruncate(Input, NVT); 1582 1583#ifndef NDEBUG 1584 DOUT << std::string(Indent-2, ' ') << "=> "; 1585 DEBUG(ResNode->dump(CurDAG)); 1586 DOUT << "\n"; 1587 Indent -= 2; 1588#endif 1589 return ResNode; 1590 break; 1591 } 1592 } 1593 1594 SDNode *ResNode = SelectCode(N); 1595 1596#ifndef NDEBUG 1597 DOUT << std::string(Indent-2, ' ') << "=> "; 1598 if (ResNode == NULL || ResNode == N.Val) 1599 DEBUG(N.Val->dump(CurDAG)); 1600 else 1601 DEBUG(ResNode->dump(CurDAG)); 1602 DOUT << "\n"; 1603 Indent -= 2; 1604#endif 1605 1606 return ResNode; 1607} 1608 1609bool X86DAGToDAGISel:: 1610SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode, 1611 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){ 1612 SDOperand Op0, Op1, Op2, Op3; 1613 switch (ConstraintCode) { 1614 case 'o': // offsetable ?? 1615 case 'v': // not offsetable ?? 1616 default: return true; 1617 case 'm': // memory 1618 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3)) 1619 return true; 1620 break; 1621 } 1622 1623 OutOps.push_back(Op0); 1624 OutOps.push_back(Op1); 1625 OutOps.push_back(Op2); 1626 OutOps.push_back(Op3); 1627 AddToISelQueue(Op0); 1628 AddToISelQueue(Op1); 1629 AddToISelQueue(Op2); 1630 AddToISelQueue(Op3); 1631 return false; 1632} 1633 1634/// createX86ISelDag - This pass converts a legalized DAG into a 1635/// X86-specific DAG, ready for instruction scheduling. 1636/// 1637FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) { 1638 return new X86DAGToDAGISel(TM, Fast); 1639} 1640