X86ISelDAGToDAG.cpp revision 07b7ea1a48a89675c22d66943b8458ebb59d8e4a
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/MathExtras.h" 39#include "llvm/ADT/Statistic.h" 40#include <queue> 41#include <set> 42using namespace llvm; 43 44STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); 45STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 46 47//===----------------------------------------------------------------------===// 48// Pattern Matcher Implementation 49//===----------------------------------------------------------------------===// 50 51namespace { 52 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 53 /// SDOperand's instead of register numbers for the leaves of the matched 54 /// tree. 55 struct X86ISelAddressMode { 56 enum { 57 RegBase, 58 FrameIndexBase 59 } BaseType; 60 61 struct { // This is really a union, discriminated by BaseType! 62 SDOperand Reg; 63 int FrameIndex; 64 } Base; 65 66 bool isRIPRel; // RIP as base? 67 unsigned Scale; 68 SDOperand IndexReg; 69 unsigned Disp; 70 GlobalValue *GV; 71 Constant *CP; 72 const char *ES; 73 int JT; 74 unsigned Align; // CP alignment. 75 76 X86ISelAddressMode() 77 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 78 GV(0), CP(0), ES(0), JT(-1), Align(0) { 79 } 80 }; 81} 82 83namespace { 84 //===--------------------------------------------------------------------===// 85 /// ISel - X86 specific code to select X86 machine instructions for 86 /// SelectionDAG operations. 87 /// 88 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 89 /// ContainsFPCode - Every instruction we select that uses or defines a FP 90 /// register should set this to true. 91 bool ContainsFPCode; 92 93 /// FastISel - Enable fast(er) instruction selection. 94 /// 95 bool FastISel; 96 97 /// TM - Keep a reference to X86TargetMachine. 98 /// 99 X86TargetMachine &TM; 100 101 /// X86Lowering - This object fully describes how to lower LLVM code to an 102 /// X86-specific SelectionDAG. 103 X86TargetLowering X86Lowering; 104 105 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 106 /// make the right decision when generating code for different targets. 107 const X86Subtarget *Subtarget; 108 109 /// GlobalBaseReg - keeps track of the virtual register mapped onto global 110 /// base register. 111 unsigned GlobalBaseReg; 112 113 public: 114 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 115 : SelectionDAGISel(X86Lowering), 116 ContainsFPCode(false), FastISel(fast), TM(tm), 117 X86Lowering(*TM.getTargetLowering()), 118 Subtarget(&TM.getSubtarget<X86Subtarget>()) {} 119 120 virtual bool runOnFunction(Function &Fn) { 121 // Make sure we re-emit a set of the global base reg if necessary 122 GlobalBaseReg = 0; 123 return SelectionDAGISel::runOnFunction(Fn); 124 } 125 126 virtual const char *getPassName() const { 127 return "X86 DAG->DAG Instruction Selection"; 128 } 129 130 /// InstructionSelectBasicBlock - This callback is invoked by 131 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 132 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); 133 134 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 135 136 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; 137 138// Include the pieces autogenerated from the target description. 139#include "X86GenDAGISel.inc" 140 141 private: 142 SDNode *Select(SDOperand N); 143 144 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, 145 bool isRoot = true, unsigned Depth = 0); 146 bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 147 bool isRoot, unsigned Depth); 148 bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 149 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 150 bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base, 151 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 152 bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 153 SDOperand N, SDOperand &Base, SDOperand &Scale, 154 SDOperand &Index, SDOperand &Disp, 155 SDOperand &InChain, SDOperand &OutChain); 156 bool TryFoldLoad(SDOperand P, SDOperand N, 157 SDOperand &Base, SDOperand &Scale, 158 SDOperand &Index, SDOperand &Disp); 159 void PreprocessForRMW(SelectionDAG &DAG); 160 void PreprocessForFPConvert(SelectionDAG &DAG); 161 162 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 163 /// inline asm expressions. 164 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op, 165 char ConstraintCode, 166 std::vector<SDOperand> &OutOps, 167 SelectionDAG &DAG); 168 169 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 170 171 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base, 172 SDOperand &Scale, SDOperand &Index, 173 SDOperand &Disp) { 174 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 175 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 176 AM.Base.Reg; 177 Scale = getI8Imm(AM.Scale); 178 Index = AM.IndexReg; 179 // These are 32-bit even in 64-bit mode since RIP relative offset 180 // is 32-bit. 181 if (AM.GV) 182 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 183 else if (AM.CP) 184 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp); 185 else if (AM.ES) 186 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 187 else if (AM.JT != -1) 188 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 189 else 190 Disp = getI32Imm(AM.Disp); 191 } 192 193 /// getI8Imm - Return a target constant with the specified value, of type 194 /// i8. 195 inline SDOperand getI8Imm(unsigned Imm) { 196 return CurDAG->getTargetConstant(Imm, MVT::i8); 197 } 198 199 /// getI16Imm - Return a target constant with the specified value, of type 200 /// i16. 201 inline SDOperand getI16Imm(unsigned Imm) { 202 return CurDAG->getTargetConstant(Imm, MVT::i16); 203 } 204 205 /// getI32Imm - Return a target constant with the specified value, of type 206 /// i32. 207 inline SDOperand getI32Imm(unsigned Imm) { 208 return CurDAG->getTargetConstant(Imm, MVT::i32); 209 } 210 211 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 212 /// base register. Return the virtual register that holds this value. 213 SDNode *getGlobalBaseReg(); 214 215 /// getTruncate - return an SDNode that implements a subreg based truncate 216 /// of the specified operand to the the specified value type. 217 SDNode *getTruncate(SDOperand N0, MVT::ValueType VT); 218 219#ifndef NDEBUG 220 unsigned Indent; 221#endif 222 }; 223} 224 225static SDNode *findFlagUse(SDNode *N) { 226 unsigned FlagResNo = N->getNumValues()-1; 227 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { 228 SDNode *User = *I; 229 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 230 SDOperand Op = User->getOperand(i); 231 if (Op.Val == N && Op.ResNo == FlagResNo) 232 return User; 233 } 234 } 235 return NULL; 236} 237 238static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, 239 SDNode *Root, SDNode *Skip, bool &found, 240 std::set<SDNode *> &Visited) { 241 if (found || 242 Use->getNodeId() > Def->getNodeId() || 243 !Visited.insert(Use).second) 244 return; 245 246 for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) { 247 SDNode *N = Use->getOperand(i).Val; 248 if (N == Skip) 249 continue; 250 if (N == Def) { 251 if (Use == ImmedUse) 252 continue; // Immediate use is ok. 253 if (Use == Root) { 254 assert(Use->getOpcode() == ISD::STORE || 255 Use->getOpcode() == X86ISD::CMP); 256 continue; 257 } 258 found = true; 259 break; 260 } 261 findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited); 262 } 263} 264 265/// isNonImmUse - Start searching from Root up the DAG to check is Def can 266/// be reached. Return true if that's the case. However, ignore direct uses 267/// by ImmedUse (which would be U in the example illustrated in 268/// CanBeFoldedBy) and by Root (which can happen in the store case). 269/// FIXME: to be really generic, we should allow direct use by any node 270/// that is being folded. But realisticly since we only fold loads which 271/// have one non-chain use, we only need to watch out for load/op/store 272/// and load/op/cmp case where the root (store / cmp) may reach the load via 273/// its chain operand. 274static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, 275 SDNode *Skip = NULL) { 276 std::set<SDNode *> Visited; 277 bool found = false; 278 findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited); 279 return found; 280} 281 282 283bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { 284 if (FastISel) return false; 285 286 // If U use can somehow reach N through another path then U can't fold N or 287 // it will create a cycle. e.g. In the following diagram, U can reach N 288 // through X. If N is folded into into U, then X is both a predecessor and 289 // a successor of U. 290 // 291 // [ N ] 292 // ^ ^ 293 // | | 294 // / \--- 295 // / [X] 296 // | ^ 297 // [U]--------| 298 299 if (isNonImmUse(Root, N, U)) 300 return false; 301 302 // If U produces a flag, then it gets (even more) interesting. Since it 303 // would have been "glued" together with its flag use, we need to check if 304 // it might reach N: 305 // 306 // [ N ] 307 // ^ ^ 308 // | | 309 // [U] \-- 310 // ^ [TF] 311 // | ^ 312 // | | 313 // \ / 314 // [FU] 315 // 316 // If FU (flag use) indirectly reach N (the load), and U fold N (call it 317 // NU), then TF is a predecessor of FU and a successor of NU. But since 318 // NU and FU are flagged together, this effectively creates a cycle. 319 bool HasFlagUse = false; 320 MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1); 321 while ((VT == MVT::Flag && !Root->use_empty())) { 322 SDNode *FU = findFlagUse(Root); 323 if (FU == NULL) 324 break; 325 else { 326 Root = FU; 327 HasFlagUse = true; 328 } 329 VT = Root->getValueType(Root->getNumValues()-1); 330 } 331 332 if (HasFlagUse) 333 return !isNonImmUse(Root, N, Root, U); 334 return true; 335} 336 337/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 338/// and move load below the TokenFactor. Replace store's chain operand with 339/// load's chain result. 340static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, 341 SDOperand Store, SDOperand TF) { 342 std::vector<SDOperand> Ops; 343 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i) 344 if (Load.Val == TF.Val->getOperand(i).Val) 345 Ops.push_back(Load.Val->getOperand(0)); 346 else 347 Ops.push_back(TF.Val->getOperand(i)); 348 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size()); 349 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 350 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 351 Store.getOperand(2), Store.getOperand(3)); 352} 353 354/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 355/// This is only run if not in -fast mode (aka -O0). 356/// This allows the instruction selector to pick more read-modify-write 357/// instructions. This is a common case: 358/// 359/// [Load chain] 360/// ^ 361/// | 362/// [Load] 363/// ^ ^ 364/// | | 365/// / \- 366/// / | 367/// [TokenFactor] [Op] 368/// ^ ^ 369/// | | 370/// \ / 371/// \ / 372/// [Store] 373/// 374/// The fact the store's chain operand != load's chain will prevent the 375/// (store (op (load))) instruction from being selected. We can transform it to: 376/// 377/// [Load chain] 378/// ^ 379/// | 380/// [TokenFactor] 381/// ^ 382/// | 383/// [Load] 384/// ^ ^ 385/// | | 386/// | \- 387/// | | 388/// | [Op] 389/// | ^ 390/// | | 391/// \ / 392/// \ / 393/// [Store] 394void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) { 395 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 396 E = DAG.allnodes_end(); I != E; ++I) { 397 if (!ISD::isNON_TRUNCStore(I)) 398 continue; 399 SDOperand Chain = I->getOperand(0); 400 if (Chain.Val->getOpcode() != ISD::TokenFactor) 401 continue; 402 403 SDOperand N1 = I->getOperand(1); 404 SDOperand N2 = I->getOperand(2); 405 if (MVT::isFloatingPoint(N1.getValueType()) || 406 MVT::isVector(N1.getValueType()) || 407 !N1.hasOneUse()) 408 continue; 409 410 bool RModW = false; 411 SDOperand Load; 412 unsigned Opcode = N1.Val->getOpcode(); 413 switch (Opcode) { 414 case ISD::ADD: 415 case ISD::MUL: 416 case ISD::AND: 417 case ISD::OR: 418 case ISD::XOR: 419 case ISD::ADDC: 420 case ISD::ADDE: { 421 SDOperand N10 = N1.getOperand(0); 422 SDOperand N11 = N1.getOperand(1); 423 if (ISD::isNON_EXTLoad(N10.Val)) 424 RModW = true; 425 else if (ISD::isNON_EXTLoad(N11.Val)) { 426 RModW = true; 427 std::swap(N10, N11); 428 } 429 RModW = RModW && N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() && 430 (N10.getOperand(1) == N2) && 431 (N10.Val->getValueType(0) == N1.getValueType()); 432 if (RModW) 433 Load = N10; 434 break; 435 } 436 case ISD::SUB: 437 case ISD::SHL: 438 case ISD::SRA: 439 case ISD::SRL: 440 case ISD::ROTL: 441 case ISD::ROTR: 442 case ISD::SUBC: 443 case ISD::SUBE: 444 case X86ISD::SHLD: 445 case X86ISD::SHRD: { 446 SDOperand N10 = N1.getOperand(0); 447 if (ISD::isNON_EXTLoad(N10.Val)) 448 RModW = N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() && 449 (N10.getOperand(1) == N2) && 450 (N10.Val->getValueType(0) == N1.getValueType()); 451 if (RModW) 452 Load = N10; 453 break; 454 } 455 } 456 457 if (RModW) { 458 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain); 459 ++NumLoadMoved; 460 } 461 } 462} 463 464 465/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 466/// nodes that target the FP stack to be store and load to the stack. This is a 467/// gross hack. We would like to simply mark these as being illegal, but when 468/// we do that, legalize produces these when it expands calls, then expands 469/// these in the same legalize pass. We would like dag combine to be able to 470/// hack on these between the call expansion and the node legalization. As such 471/// this pass basically does "really late" legalization of these inline with the 472/// X86 isel pass. 473void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) { 474 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 475 E = DAG.allnodes_end(); I != E; ) { 476 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 477 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 478 continue; 479 480 // If the source and destination are SSE registers, then this is a legal 481 // conversion that should not be lowered. 482 MVT::ValueType SrcVT = N->getOperand(0).getValueType(); 483 MVT::ValueType DstVT = N->getValueType(0); 484 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 485 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 486 if (SrcIsSSE && DstIsSSE) 487 continue; 488 489 // If this is an FPStack extension (but not a truncation), it is a noop. 490 if (!SrcIsSSE && !DstIsSSE && N->getOpcode() == ISD::FP_EXTEND) 491 continue; 492 493 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 494 // FPStack has extload and truncstore. SSE can fold direct loads into other 495 // operations. Based on this, decide what we want to do. 496 MVT::ValueType MemVT; 497 if (N->getOpcode() == ISD::FP_ROUND) 498 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 499 else 500 MemVT = SrcIsSSE ? SrcVT : DstVT; 501 502 SDOperand MemTmp = DAG.CreateStackTemporary(MemVT); 503 504 // FIXME: optimize the case where the src/dest is a load or store? 505 SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0), 506 MemTmp, NULL, 0, MemVT); 507 SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, 508 NULL, 0, MemVT); 509 510 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 511 // extload we created. This will cause general havok on the dag because 512 // anything below the conversion could be folded into other existing nodes. 513 // To avoid invalidating 'I', back it up to the convert node. 514 --I; 515 DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result); 516 517 // Now that we did that, the node is dead. Increment the iterator to the 518 // next node to process, then delete N. 519 ++I; 520 DAG.DeleteNode(N); 521 } 522} 523 524/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 525/// when it has created a SelectionDAG for us to codegen. 526void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { 527 DEBUG(BB->dump()); 528 MachineFunction::iterator FirstMBB = BB; 529 530 if (!FastISel) 531 PreprocessForRMW(DAG); 532 533 // FIXME: This should only happen when not -fast. 534 PreprocessForFPConvert(DAG); 535 536 // Codegen the basic block. 537#ifndef NDEBUG 538 DOUT << "===== Instruction selection begins:\n"; 539 Indent = 0; 540#endif 541 DAG.setRoot(SelectRoot(DAG.getRoot())); 542#ifndef NDEBUG 543 DOUT << "===== Instruction selection ends:\n"; 544#endif 545 546 DAG.RemoveDeadNodes(); 547 548 // Emit machine code to BB. 549 ScheduleAndEmitDAG(DAG); 550 551 // If we are emitting FP stack code, scan the basic block to determine if this 552 // block defines any FP values. If so, put an FP_REG_KILL instruction before 553 // the terminator of the block. 554 555 // Note that FP stack instructions are used in all modes for long double, 556 // so we always need to do this check. 557 // Also note that it's possible for an FP stack register to be live across 558 // an instruction that produces multiple basic blocks (SSE CMOV) so we 559 // must check all the generated basic blocks. 560 561 // Scan all of the machine instructions in these MBBs, checking for FP 562 // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) 563 MachineFunction::iterator MBBI = FirstMBB; 564 do { 565 bool ContainsFPCode = false; 566 for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end(); 567 !ContainsFPCode && I != E; ++I) { 568 if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { 569 const TargetRegisterClass *clas; 570 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { 571 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && 572 TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && 573 ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == 574 X86::RFP32RegisterClass || 575 clas == X86::RFP64RegisterClass || 576 clas == X86::RFP80RegisterClass)) { 577 ContainsFPCode = true; 578 break; 579 } 580 } 581 } 582 } 583 // Check PHI nodes in successor blocks. These PHI's will be lowered to have 584 // a copy of the input value in this block. In SSE mode, we only care about 585 // 80-bit values. 586 if (!ContainsFPCode) { 587 // Final check, check LLVM BB's that are successors to the LLVM BB 588 // corresponding to BB for FP PHI nodes. 589 const BasicBlock *LLVMBB = BB->getBasicBlock(); 590 const PHINode *PN; 591 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); 592 !ContainsFPCode && SI != E; ++SI) { 593 for (BasicBlock::const_iterator II = SI->begin(); 594 (PN = dyn_cast<PHINode>(II)); ++II) { 595 if (PN->getType()==Type::X86_FP80Ty || 596 (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || 597 (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { 598 ContainsFPCode = true; 599 break; 600 } 601 } 602 } 603 } 604 // Finally, if we found any FP code, emit the FP_REG_KILL instruction. 605 if (ContainsFPCode) { 606 BuildMI(*MBBI, MBBI->getFirstTerminator(), 607 TM.getInstrInfo()->get(X86::FP_REG_KILL)); 608 ++NumFPKill; 609 } 610 } while (&*(MBBI++) != BB); 611} 612 613/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 614/// the main function. 615void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 616 MachineFrameInfo *MFI) { 617 const TargetInstrInfo *TII = TM.getInstrInfo(); 618 if (Subtarget->isTargetCygMing()) 619 BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 620} 621 622void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 623 // If this is main, emit special code for main. 624 MachineBasicBlock *BB = MF.begin(); 625 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 626 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 627} 628 629/// MatchAddress - Add the specified node to the specified addressing mode, 630/// returning true if it cannot be done. This just pattern matches for the 631/// addressing mode. 632bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, 633 bool isRoot, unsigned Depth) { 634 // Limit recursion. 635 if (Depth > 5) 636 return MatchAddressBase(N, AM, isRoot, Depth); 637 638 // RIP relative addressing: %rip + 32-bit displacement! 639 if (AM.isRIPRel) { 640 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 641 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 642 if (isInt32(AM.Disp + Val)) { 643 AM.Disp += Val; 644 return false; 645 } 646 } 647 return true; 648 } 649 650 int id = N.Val->getNodeId(); 651 bool AlreadySelected = isSelected(id); // Already selected, not yet replaced. 652 653 switch (N.getOpcode()) { 654 default: break; 655 case ISD::Constant: { 656 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 657 if (isInt32(AM.Disp + Val)) { 658 AM.Disp += Val; 659 return false; 660 } 661 break; 662 } 663 664 case X86ISD::Wrapper: { 665 bool is64Bit = Subtarget->is64Bit(); 666 // Under X86-64 non-small code model, GV (and friends) are 64-bits. 667 // Also, base and index reg must be 0 in order to use rip as base. 668 if (is64Bit && (TM.getCodeModel() != CodeModel::Small || 669 AM.Base.Reg.Val || AM.IndexReg.Val)) 670 break; 671 if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) 672 break; 673 // If value is available in a register both base and index components have 674 // been picked, we can't fit the result available in the register in the 675 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 676 if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) { 677 SDOperand N0 = N.getOperand(0); 678 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 679 GlobalValue *GV = G->getGlobal(); 680 AM.GV = GV; 681 AM.Disp += G->getOffset(); 682 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 683 Subtarget->isPICStyleRIPRel(); 684 return false; 685 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 686 AM.CP = CP->getConstVal(); 687 AM.Align = CP->getAlignment(); 688 AM.Disp += CP->getOffset(); 689 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 690 Subtarget->isPICStyleRIPRel(); 691 return false; 692 } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { 693 AM.ES = S->getSymbol(); 694 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 695 Subtarget->isPICStyleRIPRel(); 696 return false; 697 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 698 AM.JT = J->getIndex(); 699 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 700 Subtarget->isPICStyleRIPRel(); 701 return false; 702 } 703 } 704 break; 705 } 706 707 case ISD::FrameIndex: 708 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) { 709 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 710 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 711 return false; 712 } 713 break; 714 715 case ISD::SHL: 716 if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel) 717 break; 718 719 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) { 720 unsigned Val = CN->getValue(); 721 if (Val == 1 || Val == 2 || Val == 3) { 722 AM.Scale = 1 << Val; 723 SDOperand ShVal = N.Val->getOperand(0); 724 725 // Okay, we know that we have a scale by now. However, if the scaled 726 // value is an add of something and a constant, we can fold the 727 // constant into the disp field here. 728 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 729 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) { 730 AM.IndexReg = ShVal.Val->getOperand(0); 731 ConstantSDNode *AddVal = 732 cast<ConstantSDNode>(ShVal.Val->getOperand(1)); 733 uint64_t Disp = AM.Disp + (AddVal->getValue() << Val); 734 if (isInt32(Disp)) 735 AM.Disp = Disp; 736 else 737 AM.IndexReg = ShVal; 738 } else { 739 AM.IndexReg = ShVal; 740 } 741 return false; 742 } 743 break; 744 } 745 746 case ISD::SMUL_LOHI: 747 case ISD::UMUL_LOHI: 748 // A mul_lohi where we need the low part can be folded as a plain multiply. 749 if (N.ResNo != 0) break; 750 // FALL THROUGH 751 case ISD::MUL: 752 // X*[3,5,9] -> X+X*[2,4,8] 753 if (!AlreadySelected && 754 AM.BaseType == X86ISelAddressMode::RegBase && 755 AM.Base.Reg.Val == 0 && 756 AM.IndexReg.Val == 0 && 757 !AM.isRIPRel) { 758 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) 759 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { 760 AM.Scale = unsigned(CN->getValue())-1; 761 762 SDOperand MulVal = N.Val->getOperand(0); 763 SDOperand Reg; 764 765 // Okay, we know that we have a scale by now. However, if the scaled 766 // value is an add of something and a constant, we can fold the 767 // constant into the disp field here. 768 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 769 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) { 770 Reg = MulVal.Val->getOperand(0); 771 ConstantSDNode *AddVal = 772 cast<ConstantSDNode>(MulVal.Val->getOperand(1)); 773 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue(); 774 if (isInt32(Disp)) 775 AM.Disp = Disp; 776 else 777 Reg = N.Val->getOperand(0); 778 } else { 779 Reg = N.Val->getOperand(0); 780 } 781 782 AM.IndexReg = AM.Base.Reg = Reg; 783 return false; 784 } 785 } 786 break; 787 788 case ISD::ADD: 789 if (!AlreadySelected) { 790 X86ISelAddressMode Backup = AM; 791 if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) && 792 !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1)) 793 return false; 794 AM = Backup; 795 if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) && 796 !MatchAddress(N.Val->getOperand(0), AM, false, Depth+1)) 797 return false; 798 AM = Backup; 799 } 800 break; 801 802 case ISD::OR: 803 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 804 if (AlreadySelected) break; 805 806 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 807 X86ISelAddressMode Backup = AM; 808 // Start with the LHS as an addr mode. 809 if (!MatchAddress(N.getOperand(0), AM, false) && 810 // Address could not have picked a GV address for the displacement. 811 AM.GV == NULL && 812 // On x86-64, the resultant disp must fit in 32-bits. 813 isInt32(AM.Disp + CN->getSignExtended()) && 814 // Check to see if the LHS & C is zero. 815 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 816 AM.Disp += CN->getValue(); 817 return false; 818 } 819 AM = Backup; 820 } 821 break; 822 823 case ISD::AND: { 824 // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 825 // allows us to fold the shift into this addressing mode. 826 if (AlreadySelected) break; 827 SDOperand Shift = N.getOperand(0); 828 if (Shift.getOpcode() != ISD::SHL) break; 829 830 // Scale must not be used already. 831 if (AM.IndexReg.Val != 0 || AM.Scale != 1) break; 832 833 // Not when RIP is used as the base. 834 if (AM.isRIPRel) break; 835 836 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 837 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 838 if (!C1 || !C2) break; 839 840 // Not likely to be profitable if either the AND or SHIFT node has more 841 // than one use (unless all uses are for address computation). Besides, 842 // isel mechanism requires their node ids to be reused. 843 if (!N.hasOneUse() || !Shift.hasOneUse()) 844 break; 845 846 // Verify that the shift amount is something we can fold. 847 unsigned ShiftCst = C1->getValue(); 848 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 849 break; 850 851 // Get the new AND mask, this folds to a constant. 852 SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(), 853 SDOperand(C2, 0), SDOperand(C1, 0)); 854 SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(), 855 Shift.getOperand(0), NewANDMask); 856 NewANDMask.Val->setNodeId(Shift.Val->getNodeId()); 857 NewAND.Val->setNodeId(N.Val->getNodeId()); 858 859 AM.Scale = 1 << ShiftCst; 860 AM.IndexReg = NewAND; 861 return false; 862 } 863 } 864 865 return MatchAddressBase(N, AM, isRoot, Depth); 866} 867 868/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 869/// specified addressing mode without any further recursion. 870bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 871 bool isRoot, unsigned Depth) { 872 // Is the base register already occupied? 873 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { 874 // If so, check to see if the scale index register is set. 875 if (AM.IndexReg.Val == 0 && !AM.isRIPRel) { 876 AM.IndexReg = N; 877 AM.Scale = 1; 878 return false; 879 } 880 881 // Otherwise, we cannot select it. 882 return true; 883 } 884 885 // Default, generate it as a register. 886 AM.BaseType = X86ISelAddressMode::RegBase; 887 AM.Base.Reg = N; 888 return false; 889} 890 891/// SelectAddr - returns true if it is able pattern match an addressing mode. 892/// It returns the operands which make up the maximal addressing mode it can 893/// match by reference. 894bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 895 SDOperand &Scale, SDOperand &Index, 896 SDOperand &Disp) { 897 X86ISelAddressMode AM; 898 if (MatchAddress(N, AM)) 899 return false; 900 901 MVT::ValueType VT = N.getValueType(); 902 if (AM.BaseType == X86ISelAddressMode::RegBase) { 903 if (!AM.Base.Reg.Val) 904 AM.Base.Reg = CurDAG->getRegister(0, VT); 905 } 906 907 if (!AM.IndexReg.Val) 908 AM.IndexReg = CurDAG->getRegister(0, VT); 909 910 getAddressOperands(AM, Base, Scale, Index, Disp); 911 return true; 912} 913 914/// isZeroNode - Returns true if Elt is a constant zero or a floating point 915/// constant +0.0. 916static inline bool isZeroNode(SDOperand Elt) { 917 return ((isa<ConstantSDNode>(Elt) && 918 cast<ConstantSDNode>(Elt)->getValue() == 0) || 919 (isa<ConstantFPSDNode>(Elt) && 920 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 921} 922 923 924/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 925/// match a load whose top elements are either undef or zeros. The load flavor 926/// is derived from the type of N, which is either v4f32 or v2f64. 927bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 928 SDOperand N, SDOperand &Base, 929 SDOperand &Scale, SDOperand &Index, 930 SDOperand &Disp, SDOperand &InChain, 931 SDOperand &OutChain) { 932 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 933 InChain = N.getOperand(0).getValue(1); 934 if (ISD::isNON_EXTLoad(InChain.Val) && 935 InChain.getValue(0).hasOneUse() && 936 N.hasOneUse() && 937 CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) { 938 LoadSDNode *LD = cast<LoadSDNode>(InChain); 939 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 940 return false; 941 OutChain = LD->getChain(); 942 return true; 943 } 944 } 945 946 // Also handle the case where we explicitly require zeros in the top 947 // elements. This is a vector shuffle from the zero vector. 948 if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() && 949 // Check to see if the top elements are all zeros (or bitcast of zeros). 950 ISD::isBuildVectorAllZeros(N.getOperand(0).Val) && 951 N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR && 952 N.getOperand(1).Val->hasOneUse() && 953 ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) && 954 N.getOperand(1).getOperand(0).hasOneUse()) { 955 // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something 956 // from the LHS. 957 unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType()); 958 SDOperand ShufMask = N.getOperand(2); 959 assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!"); 960 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) { 961 if (C->getValue() == VecWidth) { 962 for (unsigned i = 1; i != VecWidth; ++i) { 963 if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) { 964 // ok. 965 } else { 966 ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i)); 967 if (C->getValue() >= VecWidth) return false; 968 } 969 } 970 } 971 972 // Okay, this is a zero extending load. Fold it. 973 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0)); 974 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 975 return false; 976 OutChain = LD->getChain(); 977 InChain = SDOperand(LD, 1); 978 return true; 979 } 980 } 981 return false; 982} 983 984 985/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 986/// mode it matches can be cost effectively emitted as an LEA instruction. 987bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N, 988 SDOperand &Base, SDOperand &Scale, 989 SDOperand &Index, SDOperand &Disp) { 990 X86ISelAddressMode AM; 991 if (MatchAddress(N, AM)) 992 return false; 993 994 MVT::ValueType VT = N.getValueType(); 995 unsigned Complexity = 0; 996 if (AM.BaseType == X86ISelAddressMode::RegBase) 997 if (AM.Base.Reg.Val) 998 Complexity = 1; 999 else 1000 AM.Base.Reg = CurDAG->getRegister(0, VT); 1001 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1002 Complexity = 4; 1003 1004 if (AM.IndexReg.Val) 1005 Complexity++; 1006 else 1007 AM.IndexReg = CurDAG->getRegister(0, VT); 1008 1009 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1010 // a simple shift. 1011 if (AM.Scale > 1) 1012 Complexity++; 1013 1014 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1015 // to a LEA. This is determined with some expermentation but is by no means 1016 // optimal (especially for code size consideration). LEA is nice because of 1017 // its three-address nature. Tweak the cost function again when we can run 1018 // convertToThreeAddress() at register allocation time. 1019 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { 1020 // For X86-64, we should always use lea to materialize RIP relative 1021 // addresses. 1022 if (Subtarget->is64Bit()) 1023 Complexity = 4; 1024 else 1025 Complexity += 2; 1026 } 1027 1028 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val)) 1029 Complexity++; 1030 1031 if (Complexity > 2) { 1032 getAddressOperands(AM, Base, Scale, Index, Disp); 1033 return true; 1034 } 1035 return false; 1036} 1037 1038bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N, 1039 SDOperand &Base, SDOperand &Scale, 1040 SDOperand &Index, SDOperand &Disp) { 1041 if (ISD::isNON_EXTLoad(N.Val) && 1042 N.hasOneUse() && 1043 CanBeFoldedBy(N.Val, P.Val, P.Val)) 1044 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp); 1045 return false; 1046} 1047 1048/// getGlobalBaseReg - Output the instructions required to put the 1049/// base address to use for accessing globals into a register. 1050/// 1051SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1052 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); 1053 if (!GlobalBaseReg) { 1054 // Insert the set of GlobalBaseReg into the first MBB of the function 1055 MachineFunction *MF = BB->getParent(); 1056 MachineBasicBlock &FirstMBB = MF->front(); 1057 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 1058 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 1059 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1060 1061 const TargetInstrInfo *TII = TM.getInstrInfo(); 1062 // Operand of MovePCtoStack is completely ignored by asm printer. It's 1063 // only used in JIT code emission as displacement to pc. 1064 BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0); 1065 1066 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 1067 // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external 1068 if (TM.getRelocationModel() == Reloc::PIC_ && 1069 Subtarget->isPICStyleGOT()) { 1070 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1071 BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg) 1072 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); 1073 } else { 1074 GlobalBaseReg = PC; 1075 } 1076 1077 } 1078 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val; 1079} 1080 1081static SDNode *FindCallStartFromCall(SDNode *Node) { 1082 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1083 assert(Node->getOperand(0).getValueType() == MVT::Other && 1084 "Node doesn't have a token chain argument!"); 1085 return FindCallStartFromCall(Node->getOperand(0).Val); 1086} 1087 1088SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) { 1089 SDOperand SRIdx; 1090 switch (VT) { 1091 case MVT::i8: 1092 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1093 // Ensure that the source register has an 8-bit subreg on 32-bit targets 1094 if (!Subtarget->is64Bit()) { 1095 unsigned Opc; 1096 MVT::ValueType VT; 1097 switch (N0.getValueType()) { 1098 default: assert(0 && "Unknown truncate!"); 1099 case MVT::i16: 1100 Opc = X86::MOV16to16_; 1101 VT = MVT::i16; 1102 break; 1103 case MVT::i32: 1104 Opc = X86::MOV32to32_; 1105 VT = MVT::i32; 1106 break; 1107 } 1108 N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0); 1109 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1110 VT, N0, SRIdx, N0.getValue(1)); 1111 } 1112 break; 1113 case MVT::i16: 1114 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1115 break; 1116 case MVT::i32: 1117 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1118 break; 1119 default: assert(0 && "Unknown truncate!"); break; 1120 } 1121 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx); 1122} 1123 1124 1125SDNode *X86DAGToDAGISel::Select(SDOperand N) { 1126 SDNode *Node = N.Val; 1127 MVT::ValueType NVT = Node->getValueType(0); 1128 unsigned Opc, MOpc; 1129 unsigned Opcode = Node->getOpcode(); 1130 1131#ifndef NDEBUG 1132 DOUT << std::string(Indent, ' ') << "Selecting: "; 1133 DEBUG(Node->dump(CurDAG)); 1134 DOUT << "\n"; 1135 Indent += 2; 1136#endif 1137 1138 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { 1139#ifndef NDEBUG 1140 DOUT << std::string(Indent-2, ' ') << "== "; 1141 DEBUG(Node->dump(CurDAG)); 1142 DOUT << "\n"; 1143 Indent -= 2; 1144#endif 1145 return NULL; // Already selected. 1146 } 1147 1148 switch (Opcode) { 1149 default: break; 1150 case X86ISD::GlobalBaseReg: 1151 return getGlobalBaseReg(); 1152 1153 case X86ISD::FP_GET_RESULT2: { 1154 SDOperand Chain = N.getOperand(0); 1155 SDOperand InFlag = N.getOperand(1); 1156 AddToISelQueue(Chain); 1157 AddToISelQueue(InFlag); 1158 std::vector<MVT::ValueType> Tys; 1159 Tys.push_back(MVT::f80); 1160 Tys.push_back(MVT::f80); 1161 Tys.push_back(MVT::Other); 1162 Tys.push_back(MVT::Flag); 1163 SDOperand Ops[] = { Chain, InFlag }; 1164 SDNode *ResNode = CurDAG->getTargetNode(X86::FpGETRESULT80x2, Tys, 1165 Ops, 2); 1166 Chain = SDOperand(ResNode, 2); 1167 InFlag = SDOperand(ResNode, 3); 1168 ReplaceUses(SDOperand(N.Val, 2), Chain); 1169 ReplaceUses(SDOperand(N.Val, 3), InFlag); 1170 return ResNode; 1171 } 1172 1173 case ISD::ADD: { 1174 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd 1175 // code and is matched first so to prevent it from being turned into 1176 // LEA32r X+c. 1177 // In 64-bit small code size mode, use LEA to take advantage of 1178 // RIP-relative addressing. 1179 if (TM.getCodeModel() != CodeModel::Small) 1180 break; 1181 MVT::ValueType PtrVT = TLI.getPointerTy(); 1182 SDOperand N0 = N.getOperand(0); 1183 SDOperand N1 = N.getOperand(1); 1184 if (N.Val->getValueType(0) == PtrVT && 1185 N0.getOpcode() == X86ISD::Wrapper && 1186 N1.getOpcode() == ISD::Constant) { 1187 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue(); 1188 SDOperand C(0, 0); 1189 // TODO: handle ExternalSymbolSDNode. 1190 if (GlobalAddressSDNode *G = 1191 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { 1192 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, 1193 G->getOffset() + Offset); 1194 } else if (ConstantPoolSDNode *CP = 1195 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { 1196 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT, 1197 CP->getAlignment(), 1198 CP->getOffset()+Offset); 1199 } 1200 1201 if (C.Val) { 1202 if (Subtarget->is64Bit()) { 1203 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), 1204 CurDAG->getRegister(0, PtrVT), C }; 1205 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4); 1206 } else 1207 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C); 1208 } 1209 } 1210 1211 // Other cases are handled by auto-generated code. 1212 break; 1213 } 1214 1215 case ISD::SMUL_LOHI: 1216 case ISD::UMUL_LOHI: { 1217 SDOperand N0 = Node->getOperand(0); 1218 SDOperand N1 = Node->getOperand(1); 1219 1220 bool isSigned = Opcode == ISD::SMUL_LOHI; 1221 if (!isSigned) 1222 switch (NVT) { 1223 default: assert(0 && "Unsupported VT!"); 1224 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1225 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1226 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1227 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1228 } 1229 else 1230 switch (NVT) { 1231 default: assert(0 && "Unsupported VT!"); 1232 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1233 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1234 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1235 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1236 } 1237 1238 unsigned LoReg, HiReg; 1239 switch (NVT) { 1240 default: assert(0 && "Unsupported VT!"); 1241 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1242 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1243 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1244 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1245 } 1246 1247 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1248 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1249 // multiplty is commmutative 1250 if (!foldedLoad) { 1251 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); 1252 if (foldedLoad) 1253 std::swap(N0, N1); 1254 } 1255 1256 AddToISelQueue(N0); 1257 SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, 1258 N0, SDOperand()).getValue(1); 1259 1260 if (foldedLoad) { 1261 AddToISelQueue(N1.getOperand(0)); 1262 AddToISelQueue(Tmp0); 1263 AddToISelQueue(Tmp1); 1264 AddToISelQueue(Tmp2); 1265 AddToISelQueue(Tmp3); 1266 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1267 SDNode *CNode = 1268 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1269 InFlag = SDOperand(CNode, 1); 1270 // Update the chain. 1271 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1272 } else { 1273 AddToISelQueue(N1); 1274 InFlag = 1275 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1276 } 1277 1278 // Copy the low half of the result, if it is needed. 1279 if (!N.getValue(0).use_empty()) { 1280 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1281 LoReg, NVT, InFlag); 1282 InFlag = Result.getValue(2); 1283 ReplaceUses(N.getValue(0), Result); 1284#ifndef NDEBUG 1285 DOUT << std::string(Indent-2, ' ') << "=> "; 1286 DEBUG(Result.Val->dump(CurDAG)); 1287 DOUT << "\n"; 1288#endif 1289 } 1290 // Copy the high half of the result, if it is needed. 1291 if (!N.getValue(1).use_empty()) { 1292 SDOperand Result; 1293 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1294 // Prevent use of AH in a REX instruction by referencing AX instead. 1295 // Shift it down 8 bits. 1296 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1297 X86::AX, MVT::i16, InFlag); 1298 InFlag = Result.getValue(2); 1299 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1300 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1301 // Then truncate it down to i8. 1302 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1303 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1304 MVT::i8, Result, SRIdx), 0); 1305 } else { 1306 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1307 HiReg, NVT, InFlag); 1308 InFlag = Result.getValue(2); 1309 } 1310 ReplaceUses(N.getValue(1), Result); 1311#ifndef NDEBUG 1312 DOUT << std::string(Indent-2, ' ') << "=> "; 1313 DEBUG(Result.Val->dump(CurDAG)); 1314 DOUT << "\n"; 1315#endif 1316 } 1317 1318#ifndef NDEBUG 1319 Indent -= 2; 1320#endif 1321 1322 return NULL; 1323 } 1324 1325 case ISD::SDIVREM: 1326 case ISD::UDIVREM: { 1327 SDOperand N0 = Node->getOperand(0); 1328 SDOperand N1 = Node->getOperand(1); 1329 1330 bool isSigned = Opcode == ISD::SDIVREM; 1331 if (!isSigned) 1332 switch (NVT) { 1333 default: assert(0 && "Unsupported VT!"); 1334 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1335 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1336 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1337 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1338 } 1339 else 1340 switch (NVT) { 1341 default: assert(0 && "Unsupported VT!"); 1342 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1343 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1344 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1345 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1346 } 1347 1348 unsigned LoReg, HiReg; 1349 unsigned ClrOpcode, SExtOpcode; 1350 switch (NVT) { 1351 default: assert(0 && "Unsupported VT!"); 1352 case MVT::i8: 1353 LoReg = X86::AL; HiReg = X86::AH; 1354 ClrOpcode = 0; 1355 SExtOpcode = X86::CBW; 1356 break; 1357 case MVT::i16: 1358 LoReg = X86::AX; HiReg = X86::DX; 1359 ClrOpcode = X86::MOV16r0; 1360 SExtOpcode = X86::CWD; 1361 break; 1362 case MVT::i32: 1363 LoReg = X86::EAX; HiReg = X86::EDX; 1364 ClrOpcode = X86::MOV32r0; 1365 SExtOpcode = X86::CDQ; 1366 break; 1367 case MVT::i64: 1368 LoReg = X86::RAX; HiReg = X86::RDX; 1369 ClrOpcode = X86::MOV64r0; 1370 SExtOpcode = X86::CQO; 1371 break; 1372 } 1373 1374 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1375 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1376 1377 SDOperand InFlag; 1378 if (NVT == MVT::i8 && !isSigned) { 1379 // Special case for div8, just use a move with zero extension to AX to 1380 // clear the upper 8 bits (AH). 1381 SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain; 1382 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) { 1383 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) }; 1384 AddToISelQueue(N0.getOperand(0)); 1385 AddToISelQueue(Tmp0); 1386 AddToISelQueue(Tmp1); 1387 AddToISelQueue(Tmp2); 1388 AddToISelQueue(Tmp3); 1389 Move = 1390 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other, 1391 Ops, 5), 0); 1392 Chain = Move.getValue(1); 1393 ReplaceUses(N0.getValue(1), Chain); 1394 } else { 1395 AddToISelQueue(N0); 1396 Move = 1397 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0); 1398 Chain = CurDAG->getEntryNode(); 1399 } 1400 Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand()); 1401 InFlag = Chain.getValue(1); 1402 } else { 1403 AddToISelQueue(N0); 1404 InFlag = 1405 CurDAG->getCopyToReg(CurDAG->getEntryNode(), 1406 LoReg, N0, SDOperand()).getValue(1); 1407 if (isSigned) { 1408 // Sign extend the low part into the high part. 1409 InFlag = 1410 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); 1411 } else { 1412 // Zero out the high part, effectively zero extending the input. 1413 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0); 1414 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, 1415 ClrNode, InFlag).getValue(1); 1416 } 1417 } 1418 1419 if (foldedLoad) { 1420 AddToISelQueue(N1.getOperand(0)); 1421 AddToISelQueue(Tmp0); 1422 AddToISelQueue(Tmp1); 1423 AddToISelQueue(Tmp2); 1424 AddToISelQueue(Tmp3); 1425 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1426 SDNode *CNode = 1427 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1428 InFlag = SDOperand(CNode, 1); 1429 // Update the chain. 1430 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1431 } else { 1432 AddToISelQueue(N1); 1433 InFlag = 1434 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1435 } 1436 1437 // Copy the division (low) result, if it is needed. 1438 if (!N.getValue(0).use_empty()) { 1439 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1440 LoReg, NVT, InFlag); 1441 InFlag = Result.getValue(2); 1442 ReplaceUses(N.getValue(0), Result); 1443#ifndef NDEBUG 1444 DOUT << std::string(Indent-2, ' ') << "=> "; 1445 DEBUG(Result.Val->dump(CurDAG)); 1446 DOUT << "\n"; 1447#endif 1448 } 1449 // Copy the remainder (high) result, if it is needed. 1450 if (!N.getValue(1).use_empty()) { 1451 SDOperand Result; 1452 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1453 // Prevent use of AH in a REX instruction by referencing AX instead. 1454 // Shift it down 8 bits. 1455 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1456 X86::AX, MVT::i16, InFlag); 1457 InFlag = Result.getValue(2); 1458 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1459 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1460 // Then truncate it down to i8. 1461 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1462 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1463 MVT::i8, Result, SRIdx), 0); 1464 } else { 1465 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1466 HiReg, NVT, InFlag); 1467 InFlag = Result.getValue(2); 1468 } 1469 ReplaceUses(N.getValue(1), Result); 1470#ifndef NDEBUG 1471 DOUT << std::string(Indent-2, ' ') << "=> "; 1472 DEBUG(Result.Val->dump(CurDAG)); 1473 DOUT << "\n"; 1474#endif 1475 } 1476 1477#ifndef NDEBUG 1478 Indent -= 2; 1479#endif 1480 1481 return NULL; 1482 } 1483 1484 case ISD::ANY_EXTEND: { 1485 SDOperand N0 = Node->getOperand(0); 1486 AddToISelQueue(N0); 1487 if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) { 1488 SDOperand SRIdx; 1489 switch(N0.getValueType()) { 1490 case MVT::i32: 1491 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1492 break; 1493 case MVT::i16: 1494 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1495 break; 1496 case MVT::i8: 1497 if (Subtarget->is64Bit()) 1498 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1499 break; 1500 default: assert(0 && "Unknown any_extend!"); 1501 } 1502 if (SRIdx.Val) { 1503 SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, 1504 NVT, N0, SRIdx); 1505 1506#ifndef NDEBUG 1507 DOUT << std::string(Indent-2, ' ') << "=> "; 1508 DEBUG(ResNode->dump(CurDAG)); 1509 DOUT << "\n"; 1510 Indent -= 2; 1511#endif 1512 return ResNode; 1513 } // Otherwise let generated ISel handle it. 1514 } 1515 break; 1516 } 1517 1518 case ISD::SIGN_EXTEND_INREG: { 1519 SDOperand N0 = Node->getOperand(0); 1520 AddToISelQueue(N0); 1521 1522 MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1523 SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0); 1524 unsigned Opc = 0; 1525 switch (NVT) { 1526 case MVT::i16: 1527 if (SVT == MVT::i8) Opc = X86::MOVSX16rr8; 1528 else assert(0 && "Unknown sign_extend_inreg!"); 1529 break; 1530 case MVT::i32: 1531 switch (SVT) { 1532 case MVT::i8: Opc = X86::MOVSX32rr8; break; 1533 case MVT::i16: Opc = X86::MOVSX32rr16; break; 1534 default: assert(0 && "Unknown sign_extend_inreg!"); 1535 } 1536 break; 1537 case MVT::i64: 1538 switch (SVT) { 1539 case MVT::i8: Opc = X86::MOVSX64rr8; break; 1540 case MVT::i16: Opc = X86::MOVSX64rr16; break; 1541 case MVT::i32: Opc = X86::MOVSX64rr32; break; 1542 default: assert(0 && "Unknown sign_extend_inreg!"); 1543 } 1544 break; 1545 default: assert(0 && "Unknown sign_extend_inreg!"); 1546 } 1547 1548 SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp); 1549 1550#ifndef NDEBUG 1551 DOUT << std::string(Indent-2, ' ') << "=> "; 1552 DEBUG(TruncOp.Val->dump(CurDAG)); 1553 DOUT << "\n"; 1554 DOUT << std::string(Indent-2, ' ') << "=> "; 1555 DEBUG(ResNode->dump(CurDAG)); 1556 DOUT << "\n"; 1557 Indent -= 2; 1558#endif 1559 return ResNode; 1560 break; 1561 } 1562 1563 case ISD::TRUNCATE: { 1564 SDOperand Input = Node->getOperand(0); 1565 AddToISelQueue(Node->getOperand(0)); 1566 SDNode *ResNode = getTruncate(Input, NVT); 1567 1568#ifndef NDEBUG 1569 DOUT << std::string(Indent-2, ' ') << "=> "; 1570 DEBUG(ResNode->dump(CurDAG)); 1571 DOUT << "\n"; 1572 Indent -= 2; 1573#endif 1574 return ResNode; 1575 break; 1576 } 1577 } 1578 1579 SDNode *ResNode = SelectCode(N); 1580 1581#ifndef NDEBUG 1582 DOUT << std::string(Indent-2, ' ') << "=> "; 1583 if (ResNode == NULL || ResNode == N.Val) 1584 DEBUG(N.Val->dump(CurDAG)); 1585 else 1586 DEBUG(ResNode->dump(CurDAG)); 1587 DOUT << "\n"; 1588 Indent -= 2; 1589#endif 1590 1591 return ResNode; 1592} 1593 1594bool X86DAGToDAGISel:: 1595SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode, 1596 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){ 1597 SDOperand Op0, Op1, Op2, Op3; 1598 switch (ConstraintCode) { 1599 case 'o': // offsetable ?? 1600 case 'v': // not offsetable ?? 1601 default: return true; 1602 case 'm': // memory 1603 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3)) 1604 return true; 1605 break; 1606 } 1607 1608 OutOps.push_back(Op0); 1609 OutOps.push_back(Op1); 1610 OutOps.push_back(Op2); 1611 OutOps.push_back(Op3); 1612 AddToISelQueue(Op0); 1613 AddToISelQueue(Op1); 1614 AddToISelQueue(Op2); 1615 AddToISelQueue(Op3); 1616 return false; 1617} 1618 1619/// createX86ISelDag - This pass converts a legalized DAG into a 1620/// X86-specific DAG, ready for instruction scheduling. 1621/// 1622FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) { 1623 return new X86DAGToDAGISel(TM, Fast); 1624} 1625