X86ISelDAGToDAG.cpp revision 447ff68c08bc01aa040ae6d0291af69b55bb8e57
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/MathExtras.h" 39#include "llvm/ADT/Statistic.h" 40#include <queue> 41#include <set> 42using namespace llvm; 43 44STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); 45STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 46 47//===----------------------------------------------------------------------===// 48// Pattern Matcher Implementation 49//===----------------------------------------------------------------------===// 50 51namespace { 52 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 53 /// SDOperand's instead of register numbers for the leaves of the matched 54 /// tree. 55 struct X86ISelAddressMode { 56 enum { 57 RegBase, 58 FrameIndexBase 59 } BaseType; 60 61 struct { // This is really a union, discriminated by BaseType! 62 SDOperand Reg; 63 int FrameIndex; 64 } Base; 65 66 bool isRIPRel; // RIP as base? 67 unsigned Scale; 68 SDOperand IndexReg; 69 unsigned Disp; 70 GlobalValue *GV; 71 Constant *CP; 72 const char *ES; 73 int JT; 74 unsigned Align; // CP alignment. 75 76 X86ISelAddressMode() 77 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 78 GV(0), CP(0), ES(0), JT(-1), Align(0) { 79 } 80 }; 81} 82 83namespace { 84 //===--------------------------------------------------------------------===// 85 /// ISel - X86 specific code to select X86 machine instructions for 86 /// SelectionDAG operations. 87 /// 88 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 89 /// ContainsFPCode - Every instruction we select that uses or defines a FP 90 /// register should set this to true. 91 bool ContainsFPCode; 92 93 /// FastISel - Enable fast(er) instruction selection. 94 /// 95 bool FastISel; 96 97 /// TM - Keep a reference to X86TargetMachine. 98 /// 99 X86TargetMachine &TM; 100 101 /// X86Lowering - This object fully describes how to lower LLVM code to an 102 /// X86-specific SelectionDAG. 103 X86TargetLowering X86Lowering; 104 105 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 106 /// make the right decision when generating code for different targets. 107 const X86Subtarget *Subtarget; 108 109 /// GlobalBaseReg - keeps track of the virtual register mapped onto global 110 /// base register. 111 unsigned GlobalBaseReg; 112 113 public: 114 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 115 : SelectionDAGISel(X86Lowering), 116 ContainsFPCode(false), FastISel(fast), TM(tm), 117 X86Lowering(*TM.getTargetLowering()), 118 Subtarget(&TM.getSubtarget<X86Subtarget>()) {} 119 120 virtual bool runOnFunction(Function &Fn) { 121 // Make sure we re-emit a set of the global base reg if necessary 122 GlobalBaseReg = 0; 123 return SelectionDAGISel::runOnFunction(Fn); 124 } 125 126 virtual const char *getPassName() const { 127 return "X86 DAG->DAG Instruction Selection"; 128 } 129 130 /// InstructionSelectBasicBlock - This callback is invoked by 131 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 132 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); 133 134 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 135 136 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; 137 138// Include the pieces autogenerated from the target description. 139#include "X86GenDAGISel.inc" 140 141 private: 142 SDNode *Select(SDOperand N); 143 144 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, 145 bool isRoot = true, unsigned Depth = 0); 146 bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 147 bool isRoot, unsigned Depth); 148 bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 149 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 150 bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base, 151 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 152 bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 153 SDOperand N, SDOperand &Base, SDOperand &Scale, 154 SDOperand &Index, SDOperand &Disp, 155 SDOperand &InChain, SDOperand &OutChain); 156 bool TryFoldLoad(SDOperand P, SDOperand N, 157 SDOperand &Base, SDOperand &Scale, 158 SDOperand &Index, SDOperand &Disp); 159 void PreprocessForRMW(SelectionDAG &DAG); 160 void PreprocessForFPConvert(SelectionDAG &DAG); 161 162 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 163 /// inline asm expressions. 164 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op, 165 char ConstraintCode, 166 std::vector<SDOperand> &OutOps, 167 SelectionDAG &DAG); 168 169 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 170 171 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base, 172 SDOperand &Scale, SDOperand &Index, 173 SDOperand &Disp) { 174 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 175 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 176 AM.Base.Reg; 177 Scale = getI8Imm(AM.Scale); 178 Index = AM.IndexReg; 179 // These are 32-bit even in 64-bit mode since RIP relative offset 180 // is 32-bit. 181 if (AM.GV) 182 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 183 else if (AM.CP) 184 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp); 185 else if (AM.ES) 186 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 187 else if (AM.JT != -1) 188 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 189 else 190 Disp = getI32Imm(AM.Disp); 191 } 192 193 /// getI8Imm - Return a target constant with the specified value, of type 194 /// i8. 195 inline SDOperand getI8Imm(unsigned Imm) { 196 return CurDAG->getTargetConstant(Imm, MVT::i8); 197 } 198 199 /// getI16Imm - Return a target constant with the specified value, of type 200 /// i16. 201 inline SDOperand getI16Imm(unsigned Imm) { 202 return CurDAG->getTargetConstant(Imm, MVT::i16); 203 } 204 205 /// getI32Imm - Return a target constant with the specified value, of type 206 /// i32. 207 inline SDOperand getI32Imm(unsigned Imm) { 208 return CurDAG->getTargetConstant(Imm, MVT::i32); 209 } 210 211 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 212 /// base register. Return the virtual register that holds this value. 213 SDNode *getGlobalBaseReg(); 214 215 /// getTruncate - return an SDNode that implements a subreg based truncate 216 /// of the specified operand to the the specified value type. 217 SDNode *getTruncate(SDOperand N0, MVT::ValueType VT); 218 219#ifndef NDEBUG 220 unsigned Indent; 221#endif 222 }; 223} 224 225static SDNode *findFlagUse(SDNode *N) { 226 unsigned FlagResNo = N->getNumValues()-1; 227 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { 228 SDNode *User = *I; 229 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 230 SDOperand Op = User->getOperand(i); 231 if (Op.Val == N && Op.ResNo == FlagResNo) 232 return User; 233 } 234 } 235 return NULL; 236} 237 238static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, 239 SDNode *Root, SDNode *Skip, bool &found, 240 std::set<SDNode *> &Visited) { 241 if (found || 242 Use->getNodeId() > Def->getNodeId() || 243 !Visited.insert(Use).second) 244 return; 245 246 for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) { 247 SDNode *N = Use->getOperand(i).Val; 248 if (N == Skip) 249 continue; 250 if (N == Def) { 251 if (Use == ImmedUse) 252 continue; // Immediate use is ok. 253 if (Use == Root) { 254 assert(Use->getOpcode() == ISD::STORE || 255 Use->getOpcode() == X86ISD::CMP); 256 continue; 257 } 258 found = true; 259 break; 260 } 261 findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited); 262 } 263} 264 265/// isNonImmUse - Start searching from Root up the DAG to check is Def can 266/// be reached. Return true if that's the case. However, ignore direct uses 267/// by ImmedUse (which would be U in the example illustrated in 268/// CanBeFoldedBy) and by Root (which can happen in the store case). 269/// FIXME: to be really generic, we should allow direct use by any node 270/// that is being folded. But realisticly since we only fold loads which 271/// have one non-chain use, we only need to watch out for load/op/store 272/// and load/op/cmp case where the root (store / cmp) may reach the load via 273/// its chain operand. 274static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, 275 SDNode *Skip = NULL) { 276 std::set<SDNode *> Visited; 277 bool found = false; 278 findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited); 279 return found; 280} 281 282 283bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { 284 if (FastISel) return false; 285 286 // If U use can somehow reach N through another path then U can't fold N or 287 // it will create a cycle. e.g. In the following diagram, U can reach N 288 // through X. If N is folded into into U, then X is both a predecessor and 289 // a successor of U. 290 // 291 // [ N ] 292 // ^ ^ 293 // | | 294 // / \--- 295 // / [X] 296 // | ^ 297 // [U]--------| 298 299 if (isNonImmUse(Root, N, U)) 300 return false; 301 302 // If U produces a flag, then it gets (even more) interesting. Since it 303 // would have been "glued" together with its flag use, we need to check if 304 // it might reach N: 305 // 306 // [ N ] 307 // ^ ^ 308 // | | 309 // [U] \-- 310 // ^ [TF] 311 // | ^ 312 // | | 313 // \ / 314 // [FU] 315 // 316 // If FU (flag use) indirectly reach N (the load), and U fold N (call it 317 // NU), then TF is a predecessor of FU and a successor of NU. But since 318 // NU and FU are flagged together, this effectively creates a cycle. 319 bool HasFlagUse = false; 320 MVT::ValueType VT = Root->getValueType(Root->getNumValues()-1); 321 while ((VT == MVT::Flag && !Root->use_empty())) { 322 SDNode *FU = findFlagUse(Root); 323 if (FU == NULL) 324 break; 325 else { 326 Root = FU; 327 HasFlagUse = true; 328 } 329 VT = Root->getValueType(Root->getNumValues()-1); 330 } 331 332 if (HasFlagUse) 333 return !isNonImmUse(Root, N, Root, U); 334 return true; 335} 336 337/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 338/// and move load below the TokenFactor. Replace store's chain operand with 339/// load's chain result. 340static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, 341 SDOperand Store, SDOperand TF) { 342 std::vector<SDOperand> Ops; 343 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i) 344 if (Load.Val == TF.Val->getOperand(i).Val) 345 Ops.push_back(Load.Val->getOperand(0)); 346 else 347 Ops.push_back(TF.Val->getOperand(i)); 348 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size()); 349 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 350 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 351 Store.getOperand(2), Store.getOperand(3)); 352} 353 354/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 355/// This is only run if not in -fast mode (aka -O0). 356/// This allows the instruction selector to pick more read-modify-write 357/// instructions. This is a common case: 358/// 359/// [Load chain] 360/// ^ 361/// | 362/// [Load] 363/// ^ ^ 364/// | | 365/// / \- 366/// / | 367/// [TokenFactor] [Op] 368/// ^ ^ 369/// | | 370/// \ / 371/// \ / 372/// [Store] 373/// 374/// The fact the store's chain operand != load's chain will prevent the 375/// (store (op (load))) instruction from being selected. We can transform it to: 376/// 377/// [Load chain] 378/// ^ 379/// | 380/// [TokenFactor] 381/// ^ 382/// | 383/// [Load] 384/// ^ ^ 385/// | | 386/// | \- 387/// | | 388/// | [Op] 389/// | ^ 390/// | | 391/// \ / 392/// \ / 393/// [Store] 394void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) { 395 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 396 E = DAG.allnodes_end(); I != E; ++I) { 397 if (!ISD::isNON_TRUNCStore(I)) 398 continue; 399 SDOperand Chain = I->getOperand(0); 400 if (Chain.Val->getOpcode() != ISD::TokenFactor) 401 continue; 402 403 SDOperand N1 = I->getOperand(1); 404 SDOperand N2 = I->getOperand(2); 405 if (MVT::isFloatingPoint(N1.getValueType()) || 406 MVT::isVector(N1.getValueType()) || 407 !N1.hasOneUse()) 408 continue; 409 410 bool RModW = false; 411 SDOperand Load; 412 unsigned Opcode = N1.Val->getOpcode(); 413 switch (Opcode) { 414 case ISD::ADD: 415 case ISD::MUL: 416 case ISD::AND: 417 case ISD::OR: 418 case ISD::XOR: 419 case ISD::ADDC: 420 case ISD::ADDE: { 421 SDOperand N10 = N1.getOperand(0); 422 SDOperand N11 = N1.getOperand(1); 423 if (ISD::isNON_EXTLoad(N10.Val)) 424 RModW = true; 425 else if (ISD::isNON_EXTLoad(N11.Val)) { 426 RModW = true; 427 std::swap(N10, N11); 428 } 429 RModW = RModW && N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() && 430 (N10.getOperand(1) == N2) && 431 (N10.Val->getValueType(0) == N1.getValueType()); 432 if (RModW) 433 Load = N10; 434 break; 435 } 436 case ISD::SUB: 437 case ISD::SHL: 438 case ISD::SRA: 439 case ISD::SRL: 440 case ISD::ROTL: 441 case ISD::ROTR: 442 case ISD::SUBC: 443 case ISD::SUBE: 444 case X86ISD::SHLD: 445 case X86ISD::SHRD: { 446 SDOperand N10 = N1.getOperand(0); 447 if (ISD::isNON_EXTLoad(N10.Val)) 448 RModW = N10.Val->isOperandOf(Chain.Val) && N10.hasOneUse() && 449 (N10.getOperand(1) == N2) && 450 (N10.Val->getValueType(0) == N1.getValueType()); 451 if (RModW) 452 Load = N10; 453 break; 454 } 455 } 456 457 if (RModW) { 458 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain); 459 ++NumLoadMoved; 460 } 461 } 462} 463 464 465/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 466/// nodes that target the FP stack to be store and load to the stack. This is a 467/// gross hack. We would like to simply mark these as being illegal, but when 468/// we do that, legalize produces these when it expands calls, then expands 469/// these in the same legalize pass. We would like dag combine to be able to 470/// hack on these between the call expansion and the node legalization. As such 471/// this pass basically does "really late" legalization of these inline with the 472/// X86 isel pass. 473void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) { 474 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 475 E = DAG.allnodes_end(); I != E; ) { 476 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 477 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 478 continue; 479 480 // If the source and destination are SSE registers, then this is a legal 481 // conversion that should not be lowered. 482 MVT::ValueType SrcVT = N->getOperand(0).getValueType(); 483 MVT::ValueType DstVT = N->getValueType(0); 484 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 485 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 486 if (SrcIsSSE && DstIsSSE) 487 continue; 488 489 if (!SrcIsSSE && !DstIsSSE) { 490 // If this is an FPStack extension, it is a noop. 491 if (N->getOpcode() == ISD::FP_EXTEND) 492 continue; 493 // If this is a value-preserving FPStack truncation, it is a noop. 494 if (N->getConstantOperandVal(1)) 495 continue; 496 } 497 498 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 499 // FPStack has extload and truncstore. SSE can fold direct loads into other 500 // operations. Based on this, decide what we want to do. 501 MVT::ValueType MemVT; 502 if (N->getOpcode() == ISD::FP_ROUND) 503 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 504 else 505 MemVT = SrcIsSSE ? SrcVT : DstVT; 506 507 SDOperand MemTmp = DAG.CreateStackTemporary(MemVT); 508 509 // FIXME: optimize the case where the src/dest is a load or store? 510 SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0), 511 MemTmp, NULL, 0, MemVT); 512 SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, 513 NULL, 0, MemVT); 514 515 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 516 // extload we created. This will cause general havok on the dag because 517 // anything below the conversion could be folded into other existing nodes. 518 // To avoid invalidating 'I', back it up to the convert node. 519 --I; 520 DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result); 521 522 // Now that we did that, the node is dead. Increment the iterator to the 523 // next node to process, then delete N. 524 ++I; 525 DAG.DeleteNode(N); 526 } 527} 528 529/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 530/// when it has created a SelectionDAG for us to codegen. 531void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { 532 DEBUG(BB->dump()); 533 MachineFunction::iterator FirstMBB = BB; 534 535 if (!FastISel) 536 PreprocessForRMW(DAG); 537 538 // FIXME: This should only happen when not -fast. 539 PreprocessForFPConvert(DAG); 540 541 // Codegen the basic block. 542#ifndef NDEBUG 543 DOUT << "===== Instruction selection begins:\n"; 544 Indent = 0; 545#endif 546 DAG.setRoot(SelectRoot(DAG.getRoot())); 547#ifndef NDEBUG 548 DOUT << "===== Instruction selection ends:\n"; 549#endif 550 551 DAG.RemoveDeadNodes(); 552 553 // Emit machine code to BB. This can change 'BB' to the last block being 554 // inserted into. 555 ScheduleAndEmitDAG(DAG); 556 557 // If we are emitting FP stack code, scan the basic block to determine if this 558 // block defines any FP values. If so, put an FP_REG_KILL instruction before 559 // the terminator of the block. 560 561 // Note that FP stack instructions are used in all modes for long double, 562 // so we always need to do this check. 563 // Also note that it's possible for an FP stack register to be live across 564 // an instruction that produces multiple basic blocks (SSE CMOV) so we 565 // must check all the generated basic blocks. 566 567 // Scan all of the machine instructions in these MBBs, checking for FP 568 // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) 569 MachineFunction::iterator MBBI = FirstMBB; 570 MachineFunction::iterator EndMBB = BB; ++EndMBB; 571 for (; MBBI != EndMBB; ++MBBI) { 572 MachineBasicBlock *MBB = MBBI; 573 574 // If this block returns, ignore it. We don't want to insert an FP_REG_KILL 575 // before the return. 576 if (!MBB->empty()) { 577 MachineBasicBlock::iterator EndI = MBB->end(); 578 --EndI; 579 if (EndI->getDesc().isReturn()) 580 continue; 581 } 582 583 bool ContainsFPCode = false; 584 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 585 !ContainsFPCode && I != E; ++I) { 586 if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { 587 const TargetRegisterClass *clas; 588 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { 589 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && 590 TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && 591 ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == 592 X86::RFP32RegisterClass || 593 clas == X86::RFP64RegisterClass || 594 clas == X86::RFP80RegisterClass)) { 595 ContainsFPCode = true; 596 break; 597 } 598 } 599 } 600 } 601 // Check PHI nodes in successor blocks. These PHI's will be lowered to have 602 // a copy of the input value in this block. In SSE mode, we only care about 603 // 80-bit values. 604 if (!ContainsFPCode) { 605 // Final check, check LLVM BB's that are successors to the LLVM BB 606 // corresponding to BB for FP PHI nodes. 607 const BasicBlock *LLVMBB = BB->getBasicBlock(); 608 const PHINode *PN; 609 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); 610 !ContainsFPCode && SI != E; ++SI) { 611 for (BasicBlock::const_iterator II = SI->begin(); 612 (PN = dyn_cast<PHINode>(II)); ++II) { 613 if (PN->getType()==Type::X86_FP80Ty || 614 (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || 615 (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { 616 ContainsFPCode = true; 617 break; 618 } 619 } 620 } 621 } 622 // Finally, if we found any FP code, emit the FP_REG_KILL instruction. 623 if (ContainsFPCode) { 624 BuildMI(*MBB, MBBI->getFirstTerminator(), 625 TM.getInstrInfo()->get(X86::FP_REG_KILL)); 626 ++NumFPKill; 627 } 628 } 629} 630 631/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 632/// the main function. 633void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 634 MachineFrameInfo *MFI) { 635 const TargetInstrInfo *TII = TM.getInstrInfo(); 636 if (Subtarget->isTargetCygMing()) 637 BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 638} 639 640void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 641 // If this is main, emit special code for main. 642 MachineBasicBlock *BB = MF.begin(); 643 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 644 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 645} 646 647/// MatchAddress - Add the specified node to the specified addressing mode, 648/// returning true if it cannot be done. This just pattern matches for the 649/// addressing mode. 650bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, 651 bool isRoot, unsigned Depth) { 652 // Limit recursion. 653 if (Depth > 5) 654 return MatchAddressBase(N, AM, isRoot, Depth); 655 656 // RIP relative addressing: %rip + 32-bit displacement! 657 if (AM.isRIPRel) { 658 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 659 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 660 if (isInt32(AM.Disp + Val)) { 661 AM.Disp += Val; 662 return false; 663 } 664 } 665 return true; 666 } 667 668 int id = N.Val->getNodeId(); 669 bool AlreadySelected = isSelected(id); // Already selected, not yet replaced. 670 671 switch (N.getOpcode()) { 672 default: break; 673 case ISD::Constant: { 674 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 675 if (isInt32(AM.Disp + Val)) { 676 AM.Disp += Val; 677 return false; 678 } 679 break; 680 } 681 682 case X86ISD::Wrapper: { 683 bool is64Bit = Subtarget->is64Bit(); 684 // Under X86-64 non-small code model, GV (and friends) are 64-bits. 685 // Also, base and index reg must be 0 in order to use rip as base. 686 if (is64Bit && (TM.getCodeModel() != CodeModel::Small || 687 AM.Base.Reg.Val || AM.IndexReg.Val)) 688 break; 689 if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) 690 break; 691 // If value is available in a register both base and index components have 692 // been picked, we can't fit the result available in the register in the 693 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 694 if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) { 695 SDOperand N0 = N.getOperand(0); 696 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 697 GlobalValue *GV = G->getGlobal(); 698 AM.GV = GV; 699 AM.Disp += G->getOffset(); 700 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 701 Subtarget->isPICStyleRIPRel(); 702 return false; 703 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 704 AM.CP = CP->getConstVal(); 705 AM.Align = CP->getAlignment(); 706 AM.Disp += CP->getOffset(); 707 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 708 Subtarget->isPICStyleRIPRel(); 709 return false; 710 } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { 711 AM.ES = S->getSymbol(); 712 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 713 Subtarget->isPICStyleRIPRel(); 714 return false; 715 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 716 AM.JT = J->getIndex(); 717 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 718 Subtarget->isPICStyleRIPRel(); 719 return false; 720 } 721 } 722 break; 723 } 724 725 case ISD::FrameIndex: 726 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) { 727 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 728 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 729 return false; 730 } 731 break; 732 733 case ISD::SHL: 734 if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel) 735 break; 736 737 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) { 738 unsigned Val = CN->getValue(); 739 if (Val == 1 || Val == 2 || Val == 3) { 740 AM.Scale = 1 << Val; 741 SDOperand ShVal = N.Val->getOperand(0); 742 743 // Okay, we know that we have a scale by now. However, if the scaled 744 // value is an add of something and a constant, we can fold the 745 // constant into the disp field here. 746 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 747 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) { 748 AM.IndexReg = ShVal.Val->getOperand(0); 749 ConstantSDNode *AddVal = 750 cast<ConstantSDNode>(ShVal.Val->getOperand(1)); 751 uint64_t Disp = AM.Disp + (AddVal->getValue() << Val); 752 if (isInt32(Disp)) 753 AM.Disp = Disp; 754 else 755 AM.IndexReg = ShVal; 756 } else { 757 AM.IndexReg = ShVal; 758 } 759 return false; 760 } 761 break; 762 } 763 764 case ISD::SMUL_LOHI: 765 case ISD::UMUL_LOHI: 766 // A mul_lohi where we need the low part can be folded as a plain multiply. 767 if (N.ResNo != 0) break; 768 // FALL THROUGH 769 case ISD::MUL: 770 // X*[3,5,9] -> X+X*[2,4,8] 771 if (!AlreadySelected && 772 AM.BaseType == X86ISelAddressMode::RegBase && 773 AM.Base.Reg.Val == 0 && 774 AM.IndexReg.Val == 0 && 775 !AM.isRIPRel) { 776 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) 777 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { 778 AM.Scale = unsigned(CN->getValue())-1; 779 780 SDOperand MulVal = N.Val->getOperand(0); 781 SDOperand Reg; 782 783 // Okay, we know that we have a scale by now. However, if the scaled 784 // value is an add of something and a constant, we can fold the 785 // constant into the disp field here. 786 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 787 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) { 788 Reg = MulVal.Val->getOperand(0); 789 ConstantSDNode *AddVal = 790 cast<ConstantSDNode>(MulVal.Val->getOperand(1)); 791 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue(); 792 if (isInt32(Disp)) 793 AM.Disp = Disp; 794 else 795 Reg = N.Val->getOperand(0); 796 } else { 797 Reg = N.Val->getOperand(0); 798 } 799 800 AM.IndexReg = AM.Base.Reg = Reg; 801 return false; 802 } 803 } 804 break; 805 806 case ISD::ADD: 807 if (!AlreadySelected) { 808 X86ISelAddressMode Backup = AM; 809 if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) && 810 !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1)) 811 return false; 812 AM = Backup; 813 if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) && 814 !MatchAddress(N.Val->getOperand(0), AM, false, Depth+1)) 815 return false; 816 AM = Backup; 817 } 818 break; 819 820 case ISD::OR: 821 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 822 if (AlreadySelected) break; 823 824 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 825 X86ISelAddressMode Backup = AM; 826 // Start with the LHS as an addr mode. 827 if (!MatchAddress(N.getOperand(0), AM, false) && 828 // Address could not have picked a GV address for the displacement. 829 AM.GV == NULL && 830 // On x86-64, the resultant disp must fit in 32-bits. 831 isInt32(AM.Disp + CN->getSignExtended()) && 832 // Check to see if the LHS & C is zero. 833 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 834 AM.Disp += CN->getValue(); 835 return false; 836 } 837 AM = Backup; 838 } 839 break; 840 841 case ISD::AND: { 842 // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 843 // allows us to fold the shift into this addressing mode. 844 if (AlreadySelected) break; 845 SDOperand Shift = N.getOperand(0); 846 if (Shift.getOpcode() != ISD::SHL) break; 847 848 // Scale must not be used already. 849 if (AM.IndexReg.Val != 0 || AM.Scale != 1) break; 850 851 // Not when RIP is used as the base. 852 if (AM.isRIPRel) break; 853 854 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 855 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 856 if (!C1 || !C2) break; 857 858 // Not likely to be profitable if either the AND or SHIFT node has more 859 // than one use (unless all uses are for address computation). Besides, 860 // isel mechanism requires their node ids to be reused. 861 if (!N.hasOneUse() || !Shift.hasOneUse()) 862 break; 863 864 // Verify that the shift amount is something we can fold. 865 unsigned ShiftCst = C1->getValue(); 866 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 867 break; 868 869 // Get the new AND mask, this folds to a constant. 870 SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(), 871 SDOperand(C2, 0), SDOperand(C1, 0)); 872 SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(), 873 Shift.getOperand(0), NewANDMask); 874 NewANDMask.Val->setNodeId(Shift.Val->getNodeId()); 875 NewAND.Val->setNodeId(N.Val->getNodeId()); 876 877 AM.Scale = 1 << ShiftCst; 878 AM.IndexReg = NewAND; 879 return false; 880 } 881 } 882 883 return MatchAddressBase(N, AM, isRoot, Depth); 884} 885 886/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 887/// specified addressing mode without any further recursion. 888bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 889 bool isRoot, unsigned Depth) { 890 // Is the base register already occupied? 891 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { 892 // If so, check to see if the scale index register is set. 893 if (AM.IndexReg.Val == 0 && !AM.isRIPRel) { 894 AM.IndexReg = N; 895 AM.Scale = 1; 896 return false; 897 } 898 899 // Otherwise, we cannot select it. 900 return true; 901 } 902 903 // Default, generate it as a register. 904 AM.BaseType = X86ISelAddressMode::RegBase; 905 AM.Base.Reg = N; 906 return false; 907} 908 909/// SelectAddr - returns true if it is able pattern match an addressing mode. 910/// It returns the operands which make up the maximal addressing mode it can 911/// match by reference. 912bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 913 SDOperand &Scale, SDOperand &Index, 914 SDOperand &Disp) { 915 X86ISelAddressMode AM; 916 if (MatchAddress(N, AM)) 917 return false; 918 919 MVT::ValueType VT = N.getValueType(); 920 if (AM.BaseType == X86ISelAddressMode::RegBase) { 921 if (!AM.Base.Reg.Val) 922 AM.Base.Reg = CurDAG->getRegister(0, VT); 923 } 924 925 if (!AM.IndexReg.Val) 926 AM.IndexReg = CurDAG->getRegister(0, VT); 927 928 getAddressOperands(AM, Base, Scale, Index, Disp); 929 return true; 930} 931 932/// isZeroNode - Returns true if Elt is a constant zero or a floating point 933/// constant +0.0. 934static inline bool isZeroNode(SDOperand Elt) { 935 return ((isa<ConstantSDNode>(Elt) && 936 cast<ConstantSDNode>(Elt)->getValue() == 0) || 937 (isa<ConstantFPSDNode>(Elt) && 938 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 939} 940 941 942/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 943/// match a load whose top elements are either undef or zeros. The load flavor 944/// is derived from the type of N, which is either v4f32 or v2f64. 945bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 946 SDOperand N, SDOperand &Base, 947 SDOperand &Scale, SDOperand &Index, 948 SDOperand &Disp, SDOperand &InChain, 949 SDOperand &OutChain) { 950 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 951 InChain = N.getOperand(0).getValue(1); 952 if (ISD::isNON_EXTLoad(InChain.Val) && 953 InChain.getValue(0).hasOneUse() && 954 N.hasOneUse() && 955 CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) { 956 LoadSDNode *LD = cast<LoadSDNode>(InChain); 957 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 958 return false; 959 OutChain = LD->getChain(); 960 return true; 961 } 962 } 963 964 // Also handle the case where we explicitly require zeros in the top 965 // elements. This is a vector shuffle from the zero vector. 966 if (N.getOpcode() == ISD::VECTOR_SHUFFLE && N.Val->hasOneUse() && 967 // Check to see if the top elements are all zeros (or bitcast of zeros). 968 ISD::isBuildVectorAllZeros(N.getOperand(0).Val) && 969 N.getOperand(1).getOpcode() == ISD::SCALAR_TO_VECTOR && 970 N.getOperand(1).Val->hasOneUse() && 971 ISD::isNON_EXTLoad(N.getOperand(1).getOperand(0).Val) && 972 N.getOperand(1).getOperand(0).hasOneUse()) { 973 // Check to see if the shuffle mask is 4/L/L/L or 2/L, where L is something 974 // from the LHS. 975 unsigned VecWidth=MVT::getVectorNumElements(N.getOperand(0).getValueType()); 976 SDOperand ShufMask = N.getOperand(2); 977 assert(ShufMask.getOpcode() == ISD::BUILD_VECTOR && "Invalid shuf mask!"); 978 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ShufMask.getOperand(0))) { 979 if (C->getValue() == VecWidth) { 980 for (unsigned i = 1; i != VecWidth; ++i) { 981 if (ShufMask.getOperand(i).getOpcode() == ISD::UNDEF) { 982 // ok. 983 } else { 984 ConstantSDNode *C = cast<ConstantSDNode>(ShufMask.getOperand(i)); 985 if (C->getValue() >= VecWidth) return false; 986 } 987 } 988 } 989 990 // Okay, this is a zero extending load. Fold it. 991 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(1).getOperand(0)); 992 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 993 return false; 994 OutChain = LD->getChain(); 995 InChain = SDOperand(LD, 1); 996 return true; 997 } 998 } 999 return false; 1000} 1001 1002 1003/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1004/// mode it matches can be cost effectively emitted as an LEA instruction. 1005bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N, 1006 SDOperand &Base, SDOperand &Scale, 1007 SDOperand &Index, SDOperand &Disp) { 1008 X86ISelAddressMode AM; 1009 if (MatchAddress(N, AM)) 1010 return false; 1011 1012 MVT::ValueType VT = N.getValueType(); 1013 unsigned Complexity = 0; 1014 if (AM.BaseType == X86ISelAddressMode::RegBase) 1015 if (AM.Base.Reg.Val) 1016 Complexity = 1; 1017 else 1018 AM.Base.Reg = CurDAG->getRegister(0, VT); 1019 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1020 Complexity = 4; 1021 1022 if (AM.IndexReg.Val) 1023 Complexity++; 1024 else 1025 AM.IndexReg = CurDAG->getRegister(0, VT); 1026 1027 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1028 // a simple shift. 1029 if (AM.Scale > 1) 1030 Complexity++; 1031 1032 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1033 // to a LEA. This is determined with some expermentation but is by no means 1034 // optimal (especially for code size consideration). LEA is nice because of 1035 // its three-address nature. Tweak the cost function again when we can run 1036 // convertToThreeAddress() at register allocation time. 1037 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { 1038 // For X86-64, we should always use lea to materialize RIP relative 1039 // addresses. 1040 if (Subtarget->is64Bit()) 1041 Complexity = 4; 1042 else 1043 Complexity += 2; 1044 } 1045 1046 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val)) 1047 Complexity++; 1048 1049 if (Complexity > 2) { 1050 getAddressOperands(AM, Base, Scale, Index, Disp); 1051 return true; 1052 } 1053 return false; 1054} 1055 1056bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N, 1057 SDOperand &Base, SDOperand &Scale, 1058 SDOperand &Index, SDOperand &Disp) { 1059 if (ISD::isNON_EXTLoad(N.Val) && 1060 N.hasOneUse() && 1061 CanBeFoldedBy(N.Val, P.Val, P.Val)) 1062 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp); 1063 return false; 1064} 1065 1066/// getGlobalBaseReg - Output the instructions required to put the 1067/// base address to use for accessing globals into a register. 1068/// 1069SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1070 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); 1071 if (!GlobalBaseReg) { 1072 // Insert the set of GlobalBaseReg into the first MBB of the function 1073 MachineFunction *MF = BB->getParent(); 1074 MachineBasicBlock &FirstMBB = MF->front(); 1075 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 1076 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 1077 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1078 1079 const TargetInstrInfo *TII = TM.getInstrInfo(); 1080 // Operand of MovePCtoStack is completely ignored by asm printer. It's 1081 // only used in JIT code emission as displacement to pc. 1082 BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0); 1083 1084 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 1085 // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external 1086 if (TM.getRelocationModel() == Reloc::PIC_ && 1087 Subtarget->isPICStyleGOT()) { 1088 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1089 BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg) 1090 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); 1091 } else { 1092 GlobalBaseReg = PC; 1093 } 1094 1095 } 1096 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val; 1097} 1098 1099static SDNode *FindCallStartFromCall(SDNode *Node) { 1100 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1101 assert(Node->getOperand(0).getValueType() == MVT::Other && 1102 "Node doesn't have a token chain argument!"); 1103 return FindCallStartFromCall(Node->getOperand(0).Val); 1104} 1105 1106SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT::ValueType VT) { 1107 SDOperand SRIdx; 1108 switch (VT) { 1109 case MVT::i8: 1110 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1111 // Ensure that the source register has an 8-bit subreg on 32-bit targets 1112 if (!Subtarget->is64Bit()) { 1113 unsigned Opc; 1114 MVT::ValueType VT; 1115 switch (N0.getValueType()) { 1116 default: assert(0 && "Unknown truncate!"); 1117 case MVT::i16: 1118 Opc = X86::MOV16to16_; 1119 VT = MVT::i16; 1120 break; 1121 case MVT::i32: 1122 Opc = X86::MOV32to32_; 1123 VT = MVT::i32; 1124 break; 1125 } 1126 N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0); 1127 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1128 VT, N0, SRIdx, N0.getValue(1)); 1129 } 1130 break; 1131 case MVT::i16: 1132 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1133 break; 1134 case MVT::i32: 1135 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1136 break; 1137 default: assert(0 && "Unknown truncate!"); break; 1138 } 1139 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx); 1140} 1141 1142 1143SDNode *X86DAGToDAGISel::Select(SDOperand N) { 1144 SDNode *Node = N.Val; 1145 MVT::ValueType NVT = Node->getValueType(0); 1146 unsigned Opc, MOpc; 1147 unsigned Opcode = Node->getOpcode(); 1148 1149#ifndef NDEBUG 1150 DOUT << std::string(Indent, ' ') << "Selecting: "; 1151 DEBUG(Node->dump(CurDAG)); 1152 DOUT << "\n"; 1153 Indent += 2; 1154#endif 1155 1156 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { 1157#ifndef NDEBUG 1158 DOUT << std::string(Indent-2, ' ') << "== "; 1159 DEBUG(Node->dump(CurDAG)); 1160 DOUT << "\n"; 1161 Indent -= 2; 1162#endif 1163 return NULL; // Already selected. 1164 } 1165 1166 switch (Opcode) { 1167 default: break; 1168 case X86ISD::GlobalBaseReg: 1169 return getGlobalBaseReg(); 1170 1171 // FIXME: This is a workaround for a tblgen problem: rdar://5791600 1172 case X86ISD::RET_FLAG: 1173 if (ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1174 if (Amt->getSignExtended() != 0) break; 1175 1176 // Match (X86retflag 0). 1177 SDOperand Chain = N.getOperand(0); 1178 bool HasInFlag = N.getOperand(N.getNumOperands()-1).getValueType() 1179 == MVT::Flag; 1180 SmallVector<SDOperand, 8> Ops0; 1181 AddToISelQueue(Chain); 1182 SDOperand InFlag(0, 0); 1183 if (HasInFlag) { 1184 InFlag = N.getOperand(N.getNumOperands()-1); 1185 AddToISelQueue(InFlag); 1186 } 1187 for (unsigned i = 2, e = N.getNumOperands()-(HasInFlag?1:0); i != e; 1188 ++i) { 1189 AddToISelQueue(N.getOperand(i)); 1190 Ops0.push_back(N.getOperand(i)); 1191 } 1192 Ops0.push_back(Chain); 1193 if (HasInFlag) 1194 Ops0.push_back(InFlag); 1195 return CurDAG->getTargetNode(X86::RET, MVT::Other, 1196 &Ops0[0], Ops0.size()); 1197 } 1198 break; 1199 1200 case X86ISD::FP_GET_ST0_ST1: { 1201 SDOperand Chain = N.getOperand(0); 1202 SDOperand InFlag = N.getOperand(1); 1203 AddToISelQueue(Chain); 1204 AddToISelQueue(InFlag); 1205 std::vector<MVT::ValueType> Tys; 1206 Tys.push_back(MVT::f80); 1207 Tys.push_back(MVT::f80); 1208 Tys.push_back(MVT::Other); 1209 Tys.push_back(MVT::Flag); 1210 SDOperand Ops[] = { Chain, InFlag }; 1211 SDNode *ResNode = CurDAG->getTargetNode(X86::FpGET_ST0_ST1, Tys, 1212 Ops, 2); 1213 Chain = SDOperand(ResNode, 2); 1214 InFlag = SDOperand(ResNode, 3); 1215 ReplaceUses(SDOperand(N.Val, 2), Chain); 1216 ReplaceUses(SDOperand(N.Val, 3), InFlag); 1217 return ResNode; 1218 } 1219 1220 case ISD::ADD: { 1221 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd 1222 // code and is matched first so to prevent it from being turned into 1223 // LEA32r X+c. 1224 // In 64-bit small code size mode, use LEA to take advantage of 1225 // RIP-relative addressing. 1226 if (TM.getCodeModel() != CodeModel::Small) 1227 break; 1228 MVT::ValueType PtrVT = TLI.getPointerTy(); 1229 SDOperand N0 = N.getOperand(0); 1230 SDOperand N1 = N.getOperand(1); 1231 if (N.Val->getValueType(0) == PtrVT && 1232 N0.getOpcode() == X86ISD::Wrapper && 1233 N1.getOpcode() == ISD::Constant) { 1234 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue(); 1235 SDOperand C(0, 0); 1236 // TODO: handle ExternalSymbolSDNode. 1237 if (GlobalAddressSDNode *G = 1238 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { 1239 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, 1240 G->getOffset() + Offset); 1241 } else if (ConstantPoolSDNode *CP = 1242 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { 1243 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT, 1244 CP->getAlignment(), 1245 CP->getOffset()+Offset); 1246 } 1247 1248 if (C.Val) { 1249 if (Subtarget->is64Bit()) { 1250 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), 1251 CurDAG->getRegister(0, PtrVT), C }; 1252 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4); 1253 } else 1254 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C); 1255 } 1256 } 1257 1258 // Other cases are handled by auto-generated code. 1259 break; 1260 } 1261 1262 case ISD::SMUL_LOHI: 1263 case ISD::UMUL_LOHI: { 1264 SDOperand N0 = Node->getOperand(0); 1265 SDOperand N1 = Node->getOperand(1); 1266 1267 bool isSigned = Opcode == ISD::SMUL_LOHI; 1268 if (!isSigned) 1269 switch (NVT) { 1270 default: assert(0 && "Unsupported VT!"); 1271 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1272 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1273 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1274 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1275 } 1276 else 1277 switch (NVT) { 1278 default: assert(0 && "Unsupported VT!"); 1279 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1280 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1281 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1282 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1283 } 1284 1285 unsigned LoReg, HiReg; 1286 switch (NVT) { 1287 default: assert(0 && "Unsupported VT!"); 1288 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1289 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1290 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1291 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1292 } 1293 1294 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1295 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1296 // multiplty is commmutative 1297 if (!foldedLoad) { 1298 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); 1299 if (foldedLoad) 1300 std::swap(N0, N1); 1301 } 1302 1303 AddToISelQueue(N0); 1304 SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, 1305 N0, SDOperand()).getValue(1); 1306 1307 if (foldedLoad) { 1308 AddToISelQueue(N1.getOperand(0)); 1309 AddToISelQueue(Tmp0); 1310 AddToISelQueue(Tmp1); 1311 AddToISelQueue(Tmp2); 1312 AddToISelQueue(Tmp3); 1313 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1314 SDNode *CNode = 1315 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1316 InFlag = SDOperand(CNode, 1); 1317 // Update the chain. 1318 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1319 } else { 1320 AddToISelQueue(N1); 1321 InFlag = 1322 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1323 } 1324 1325 // Copy the low half of the result, if it is needed. 1326 if (!N.getValue(0).use_empty()) { 1327 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1328 LoReg, NVT, InFlag); 1329 InFlag = Result.getValue(2); 1330 ReplaceUses(N.getValue(0), Result); 1331#ifndef NDEBUG 1332 DOUT << std::string(Indent-2, ' ') << "=> "; 1333 DEBUG(Result.Val->dump(CurDAG)); 1334 DOUT << "\n"; 1335#endif 1336 } 1337 // Copy the high half of the result, if it is needed. 1338 if (!N.getValue(1).use_empty()) { 1339 SDOperand Result; 1340 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1341 // Prevent use of AH in a REX instruction by referencing AX instead. 1342 // Shift it down 8 bits. 1343 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1344 X86::AX, MVT::i16, InFlag); 1345 InFlag = Result.getValue(2); 1346 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1347 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1348 // Then truncate it down to i8. 1349 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1350 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1351 MVT::i8, Result, SRIdx), 0); 1352 } else { 1353 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1354 HiReg, NVT, InFlag); 1355 InFlag = Result.getValue(2); 1356 } 1357 ReplaceUses(N.getValue(1), Result); 1358#ifndef NDEBUG 1359 DOUT << std::string(Indent-2, ' ') << "=> "; 1360 DEBUG(Result.Val->dump(CurDAG)); 1361 DOUT << "\n"; 1362#endif 1363 } 1364 1365#ifndef NDEBUG 1366 Indent -= 2; 1367#endif 1368 1369 return NULL; 1370 } 1371 1372 case ISD::SDIVREM: 1373 case ISD::UDIVREM: { 1374 SDOperand N0 = Node->getOperand(0); 1375 SDOperand N1 = Node->getOperand(1); 1376 1377 bool isSigned = Opcode == ISD::SDIVREM; 1378 if (!isSigned) 1379 switch (NVT) { 1380 default: assert(0 && "Unsupported VT!"); 1381 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1382 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1383 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1384 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1385 } 1386 else 1387 switch (NVT) { 1388 default: assert(0 && "Unsupported VT!"); 1389 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1390 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1391 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1392 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1393 } 1394 1395 unsigned LoReg, HiReg; 1396 unsigned ClrOpcode, SExtOpcode; 1397 switch (NVT) { 1398 default: assert(0 && "Unsupported VT!"); 1399 case MVT::i8: 1400 LoReg = X86::AL; HiReg = X86::AH; 1401 ClrOpcode = 0; 1402 SExtOpcode = X86::CBW; 1403 break; 1404 case MVT::i16: 1405 LoReg = X86::AX; HiReg = X86::DX; 1406 ClrOpcode = X86::MOV16r0; 1407 SExtOpcode = X86::CWD; 1408 break; 1409 case MVT::i32: 1410 LoReg = X86::EAX; HiReg = X86::EDX; 1411 ClrOpcode = X86::MOV32r0; 1412 SExtOpcode = X86::CDQ; 1413 break; 1414 case MVT::i64: 1415 LoReg = X86::RAX; HiReg = X86::RDX; 1416 ClrOpcode = X86::MOV64r0; 1417 SExtOpcode = X86::CQO; 1418 break; 1419 } 1420 1421 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1422 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1423 1424 SDOperand InFlag; 1425 if (NVT == MVT::i8 && !isSigned) { 1426 // Special case for div8, just use a move with zero extension to AX to 1427 // clear the upper 8 bits (AH). 1428 SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain; 1429 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) { 1430 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) }; 1431 AddToISelQueue(N0.getOperand(0)); 1432 AddToISelQueue(Tmp0); 1433 AddToISelQueue(Tmp1); 1434 AddToISelQueue(Tmp2); 1435 AddToISelQueue(Tmp3); 1436 Move = 1437 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other, 1438 Ops, 5), 0); 1439 Chain = Move.getValue(1); 1440 ReplaceUses(N0.getValue(1), Chain); 1441 } else { 1442 AddToISelQueue(N0); 1443 Move = 1444 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0); 1445 Chain = CurDAG->getEntryNode(); 1446 } 1447 Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand()); 1448 InFlag = Chain.getValue(1); 1449 } else { 1450 AddToISelQueue(N0); 1451 InFlag = 1452 CurDAG->getCopyToReg(CurDAG->getEntryNode(), 1453 LoReg, N0, SDOperand()).getValue(1); 1454 if (isSigned) { 1455 // Sign extend the low part into the high part. 1456 InFlag = 1457 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); 1458 } else { 1459 // Zero out the high part, effectively zero extending the input. 1460 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0); 1461 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, 1462 ClrNode, InFlag).getValue(1); 1463 } 1464 } 1465 1466 if (foldedLoad) { 1467 AddToISelQueue(N1.getOperand(0)); 1468 AddToISelQueue(Tmp0); 1469 AddToISelQueue(Tmp1); 1470 AddToISelQueue(Tmp2); 1471 AddToISelQueue(Tmp3); 1472 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1473 SDNode *CNode = 1474 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1475 InFlag = SDOperand(CNode, 1); 1476 // Update the chain. 1477 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1478 } else { 1479 AddToISelQueue(N1); 1480 InFlag = 1481 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1482 } 1483 1484 // Copy the division (low) result, if it is needed. 1485 if (!N.getValue(0).use_empty()) { 1486 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1487 LoReg, NVT, InFlag); 1488 InFlag = Result.getValue(2); 1489 ReplaceUses(N.getValue(0), Result); 1490#ifndef NDEBUG 1491 DOUT << std::string(Indent-2, ' ') << "=> "; 1492 DEBUG(Result.Val->dump(CurDAG)); 1493 DOUT << "\n"; 1494#endif 1495 } 1496 // Copy the remainder (high) result, if it is needed. 1497 if (!N.getValue(1).use_empty()) { 1498 SDOperand Result; 1499 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1500 // Prevent use of AH in a REX instruction by referencing AX instead. 1501 // Shift it down 8 bits. 1502 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1503 X86::AX, MVT::i16, InFlag); 1504 InFlag = Result.getValue(2); 1505 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1506 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1507 // Then truncate it down to i8. 1508 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1509 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1510 MVT::i8, Result, SRIdx), 0); 1511 } else { 1512 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1513 HiReg, NVT, InFlag); 1514 InFlag = Result.getValue(2); 1515 } 1516 ReplaceUses(N.getValue(1), Result); 1517#ifndef NDEBUG 1518 DOUT << std::string(Indent-2, ' ') << "=> "; 1519 DEBUG(Result.Val->dump(CurDAG)); 1520 DOUT << "\n"; 1521#endif 1522 } 1523 1524#ifndef NDEBUG 1525 Indent -= 2; 1526#endif 1527 1528 return NULL; 1529 } 1530 1531 case ISD::ANY_EXTEND: { 1532 SDOperand N0 = Node->getOperand(0); 1533 AddToISelQueue(N0); 1534 if (NVT == MVT::i64 || NVT == MVT::i32 || NVT == MVT::i16) { 1535 SDOperand SRIdx; 1536 switch(N0.getValueType()) { 1537 case MVT::i32: 1538 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1539 break; 1540 case MVT::i16: 1541 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1542 break; 1543 case MVT::i8: 1544 if (Subtarget->is64Bit()) 1545 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1546 break; 1547 default: assert(0 && "Unknown any_extend!"); 1548 } 1549 if (SRIdx.Val) { 1550 SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, 1551 NVT, N0, SRIdx); 1552 1553#ifndef NDEBUG 1554 DOUT << std::string(Indent-2, ' ') << "=> "; 1555 DEBUG(ResNode->dump(CurDAG)); 1556 DOUT << "\n"; 1557 Indent -= 2; 1558#endif 1559 return ResNode; 1560 } // Otherwise let generated ISel handle it. 1561 } 1562 break; 1563 } 1564 1565 case ISD::SIGN_EXTEND_INREG: { 1566 SDOperand N0 = Node->getOperand(0); 1567 AddToISelQueue(N0); 1568 1569 MVT::ValueType SVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1570 SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0); 1571 unsigned Opc = 0; 1572 switch (NVT) { 1573 case MVT::i16: 1574 if (SVT == MVT::i8) Opc = X86::MOVSX16rr8; 1575 else assert(0 && "Unknown sign_extend_inreg!"); 1576 break; 1577 case MVT::i32: 1578 switch (SVT) { 1579 case MVT::i8: Opc = X86::MOVSX32rr8; break; 1580 case MVT::i16: Opc = X86::MOVSX32rr16; break; 1581 default: assert(0 && "Unknown sign_extend_inreg!"); 1582 } 1583 break; 1584 case MVT::i64: 1585 switch (SVT) { 1586 case MVT::i8: Opc = X86::MOVSX64rr8; break; 1587 case MVT::i16: Opc = X86::MOVSX64rr16; break; 1588 case MVT::i32: Opc = X86::MOVSX64rr32; break; 1589 default: assert(0 && "Unknown sign_extend_inreg!"); 1590 } 1591 break; 1592 default: assert(0 && "Unknown sign_extend_inreg!"); 1593 } 1594 1595 SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp); 1596 1597#ifndef NDEBUG 1598 DOUT << std::string(Indent-2, ' ') << "=> "; 1599 DEBUG(TruncOp.Val->dump(CurDAG)); 1600 DOUT << "\n"; 1601 DOUT << std::string(Indent-2, ' ') << "=> "; 1602 DEBUG(ResNode->dump(CurDAG)); 1603 DOUT << "\n"; 1604 Indent -= 2; 1605#endif 1606 return ResNode; 1607 break; 1608 } 1609 1610 case ISD::TRUNCATE: { 1611 SDOperand Input = Node->getOperand(0); 1612 AddToISelQueue(Node->getOperand(0)); 1613 SDNode *ResNode = getTruncate(Input, NVT); 1614 1615#ifndef NDEBUG 1616 DOUT << std::string(Indent-2, ' ') << "=> "; 1617 DEBUG(ResNode->dump(CurDAG)); 1618 DOUT << "\n"; 1619 Indent -= 2; 1620#endif 1621 return ResNode; 1622 break; 1623 } 1624 } 1625 1626 SDNode *ResNode = SelectCode(N); 1627 1628#ifndef NDEBUG 1629 DOUT << std::string(Indent-2, ' ') << "=> "; 1630 if (ResNode == NULL || ResNode == N.Val) 1631 DEBUG(N.Val->dump(CurDAG)); 1632 else 1633 DEBUG(ResNode->dump(CurDAG)); 1634 DOUT << "\n"; 1635 Indent -= 2; 1636#endif 1637 1638 return ResNode; 1639} 1640 1641bool X86DAGToDAGISel:: 1642SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode, 1643 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){ 1644 SDOperand Op0, Op1, Op2, Op3; 1645 switch (ConstraintCode) { 1646 case 'o': // offsetable ?? 1647 case 'v': // not offsetable ?? 1648 default: return true; 1649 case 'm': // memory 1650 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3)) 1651 return true; 1652 break; 1653 } 1654 1655 OutOps.push_back(Op0); 1656 OutOps.push_back(Op1); 1657 OutOps.push_back(Op2); 1658 OutOps.push_back(Op3); 1659 AddToISelQueue(Op0); 1660 AddToISelQueue(Op1); 1661 AddToISelQueue(Op2); 1662 AddToISelQueue(Op3); 1663 return false; 1664} 1665 1666/// createX86ISelDag - This pass converts a legalized DAG into a 1667/// X86-specific DAG, ready for instruction scheduling. 1668/// 1669FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) { 1670 return new X86DAGToDAGISel(TM, Fast); 1671} 1672