X86ISelDAGToDAG.cpp revision f1c6428164f6f5e07cbc88c1c1440efbf29c0d5f
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15// Force NDEBUG on in any optimized build on Darwin. 16// 17// FIXME: This is a huge hack, to work around ridiculously awful compile times 18// on this file with gcc-4.2 on Darwin, in Release mode. 19#if (!defined(__llvm__) && defined(__APPLE__) && \ 20 defined(__OPTIMIZE__) && !defined(NDEBUG)) 21#define NDEBUG 22#endif 23 24#define DEBUG_TYPE "x86-isel" 25#include "X86.h" 26#include "X86InstrBuilder.h" 27#include "X86ISelLowering.h" 28#include "X86MachineFunctionInfo.h" 29#include "X86RegisterInfo.h" 30#include "X86Subtarget.h" 31#include "X86TargetMachine.h" 32#include "llvm/GlobalValue.h" 33#include "llvm/Instructions.h" 34#include "llvm/Intrinsics.h" 35#include "llvm/Support/CFG.h" 36#include "llvm/Type.h" 37#include "llvm/CodeGen/MachineConstantPool.h" 38#include "llvm/CodeGen/MachineFunction.h" 39#include "llvm/CodeGen/MachineFrameInfo.h" 40#include "llvm/CodeGen/MachineInstrBuilder.h" 41#include "llvm/CodeGen/MachineRegisterInfo.h" 42#include "llvm/CodeGen/SelectionDAGISel.h" 43#include "llvm/Target/TargetMachine.h" 44#include "llvm/Target/TargetOptions.h" 45#include "llvm/Support/Debug.h" 46#include "llvm/Support/ErrorHandling.h" 47#include "llvm/Support/MathExtras.h" 48#include "llvm/Support/raw_ostream.h" 49#include "llvm/ADT/SmallPtrSet.h" 50#include "llvm/ADT/Statistic.h" 51using namespace llvm; 52 53STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 54 55//===----------------------------------------------------------------------===// 56// Pattern Matcher Implementation 57//===----------------------------------------------------------------------===// 58 59namespace { 60 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 61 /// SDValue's instead of register numbers for the leaves of the matched 62 /// tree. 63 struct X86ISelAddressMode { 64 enum { 65 RegBase, 66 FrameIndexBase 67 } BaseType; 68 69 struct { // This is really a union, discriminated by BaseType! 70 SDValue Reg; 71 int FrameIndex; 72 } Base; 73 74 unsigned Scale; 75 SDValue IndexReg; 76 int32_t Disp; 77 SDValue Segment; 78 GlobalValue *GV; 79 Constant *CP; 80 BlockAddress *BlockAddr; 81 const char *ES; 82 int JT; 83 unsigned Align; // CP alignment. 84 unsigned char SymbolFlags; // X86II::MO_* 85 86 X86ISelAddressMode() 87 : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), 88 Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), 89 SymbolFlags(X86II::MO_NO_FLAG) { 90 } 91 92 bool hasSymbolicDisplacement() const { 93 return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; 94 } 95 96 bool hasBaseOrIndexReg() const { 97 return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; 98 } 99 100 /// isRIPRelative - Return true if this addressing mode is already RIP 101 /// relative. 102 bool isRIPRelative() const { 103 if (BaseType != RegBase) return false; 104 if (RegisterSDNode *RegNode = 105 dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) 106 return RegNode->getReg() == X86::RIP; 107 return false; 108 } 109 110 void setBaseReg(SDValue Reg) { 111 BaseType = RegBase; 112 Base.Reg = Reg; 113 } 114 115 void dump() { 116 dbgs() << "X86ISelAddressMode " << this << '\n'; 117 dbgs() << "Base.Reg "; 118 if (Base.Reg.getNode() != 0) 119 Base.Reg.getNode()->dump(); 120 else 121 dbgs() << "nul"; 122 dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' 123 << " Scale" << Scale << '\n' 124 << "IndexReg "; 125 if (IndexReg.getNode() != 0) 126 IndexReg.getNode()->dump(); 127 else 128 dbgs() << "nul"; 129 dbgs() << " Disp " << Disp << '\n' 130 << "GV "; 131 if (GV) 132 GV->dump(); 133 else 134 dbgs() << "nul"; 135 dbgs() << " CP "; 136 if (CP) 137 CP->dump(); 138 else 139 dbgs() << "nul"; 140 dbgs() << '\n' 141 << "ES "; 142 if (ES) 143 dbgs() << ES; 144 else 145 dbgs() << "nul"; 146 dbgs() << " JT" << JT << " Align" << Align << '\n'; 147 } 148 }; 149} 150 151namespace { 152 //===--------------------------------------------------------------------===// 153 /// ISel - X86 specific code to select X86 machine instructions for 154 /// SelectionDAG operations. 155 /// 156 class X86DAGToDAGISel : public SelectionDAGISel { 157 /// X86Lowering - This object fully describes how to lower LLVM code to an 158 /// X86-specific SelectionDAG. 159 X86TargetLowering &X86Lowering; 160 161 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 162 /// make the right decision when generating code for different targets. 163 const X86Subtarget *Subtarget; 164 165 /// OptForSize - If true, selector should try to optimize for code size 166 /// instead of performance. 167 bool OptForSize; 168 169 public: 170 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 171 : SelectionDAGISel(tm, OptLevel), 172 X86Lowering(*tm.getTargetLowering()), 173 Subtarget(&tm.getSubtarget<X86Subtarget>()), 174 OptForSize(false) {} 175 176 virtual const char *getPassName() const { 177 return "X86 DAG->DAG Instruction Selection"; 178 } 179 180 /// InstructionSelect - This callback is invoked by 181 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 182 virtual void InstructionSelect(); 183 184 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 185 186 virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; 187 188 virtual bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const; 189 190// Include the pieces autogenerated from the target description. 191#include "X86GenDAGISel.inc" 192 193 private: 194 SDNode *Select(SDNode *N); 195 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 196 SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); 197 198 bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 199 bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 200 bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 201 bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 202 bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 203 unsigned Depth); 204 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 205 bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, 206 SDValue &Scale, SDValue &Index, SDValue &Disp, 207 SDValue &Segment); 208 bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, 209 SDValue &Scale, SDValue &Index, SDValue &Disp); 210 bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, 211 SDValue &Scale, SDValue &Index, SDValue &Disp); 212 bool SelectScalarSSELoadXXX(SDNode *Root, SDValue N, 213 SDValue &Base, SDValue &Scale, 214 SDValue &Index, SDValue &Disp, 215 SDValue &Segment, 216 SDValue &NodeWithChain); 217 218 // FIXME: Remove this hacky wrapper. 219 bool SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, 220 SDValue &Scale, SDValue &Index, 221 SDValue &Disp, SDValue &Segment, 222 SDValue &PatternChainResult, 223 SDValue &PatternInputChain) { 224 SDValue Tmp; 225 if (!SelectScalarSSELoadXXX(Root, N, Base, Scale, Index, Disp, Segment, 226 Tmp)) 227 return false; 228 PatternInputChain = Tmp.getOperand(0); 229 PatternChainResult = Tmp.getValue(1); 230 return true; 231 } 232 bool TryFoldLoad(SDNode *P, SDValue N, 233 SDValue &Base, SDValue &Scale, 234 SDValue &Index, SDValue &Disp, 235 SDValue &Segment); 236 void PreprocessForRMW(); 237 void PreprocessForFPConvert(); 238 239 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 240 /// inline asm expressions. 241 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 242 char ConstraintCode, 243 std::vector<SDValue> &OutOps); 244 245 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 246 247 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 248 SDValue &Scale, SDValue &Index, 249 SDValue &Disp, SDValue &Segment) { 250 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 251 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 252 AM.Base.Reg; 253 Scale = getI8Imm(AM.Scale); 254 Index = AM.IndexReg; 255 // These are 32-bit even in 64-bit mode since RIP relative offset 256 // is 32-bit. 257 if (AM.GV) 258 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, 259 AM.SymbolFlags); 260 else if (AM.CP) 261 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 262 AM.Align, AM.Disp, AM.SymbolFlags); 263 else if (AM.ES) 264 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 265 else if (AM.JT != -1) 266 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 267 else if (AM.BlockAddr) 268 Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, 269 true, AM.SymbolFlags); 270 else 271 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 272 273 if (AM.Segment.getNode()) 274 Segment = AM.Segment; 275 else 276 Segment = CurDAG->getRegister(0, MVT::i32); 277 } 278 279 /// getI8Imm - Return a target constant with the specified value, of type 280 /// i8. 281 inline SDValue getI8Imm(unsigned Imm) { 282 return CurDAG->getTargetConstant(Imm, MVT::i8); 283 } 284 285 /// getI16Imm - Return a target constant with the specified value, of type 286 /// i16. 287 inline SDValue getI16Imm(unsigned Imm) { 288 return CurDAG->getTargetConstant(Imm, MVT::i16); 289 } 290 291 /// getI32Imm - Return a target constant with the specified value, of type 292 /// i32. 293 inline SDValue getI32Imm(unsigned Imm) { 294 return CurDAG->getTargetConstant(Imm, MVT::i32); 295 } 296 297 /// getGlobalBaseReg - Return an SDNode that returns the value of 298 /// the global base register. Output instructions required to 299 /// initialize the global base register, if necessary. 300 /// 301 SDNode *getGlobalBaseReg(); 302 303 /// getTargetMachine - Return a reference to the TargetMachine, casted 304 /// to the target-specific type. 305 const X86TargetMachine &getTargetMachine() { 306 return static_cast<const X86TargetMachine &>(TM); 307 } 308 309 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 310 /// to the target-specific type. 311 const X86InstrInfo *getInstrInfo() { 312 return getTargetMachine().getInstrInfo(); 313 } 314 315#ifndef NDEBUG 316 unsigned Indent; 317#endif 318 }; 319} 320 321 322bool 323X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { 324 if (OptLevel == CodeGenOpt::None) return false; 325 326 if (!N.hasOneUse()) 327 return false; 328 329 if (N.getOpcode() != ISD::LOAD) 330 return true; 331 332 // If N is a load, do additional profitability checks. 333 if (U == Root) { 334 switch (U->getOpcode()) { 335 default: break; 336 case X86ISD::ADD: 337 case X86ISD::SUB: 338 case X86ISD::AND: 339 case X86ISD::XOR: 340 case X86ISD::OR: 341 case ISD::ADD: 342 case ISD::ADDC: 343 case ISD::ADDE: 344 case ISD::AND: 345 case ISD::OR: 346 case ISD::XOR: { 347 SDValue Op1 = U->getOperand(1); 348 349 // If the other operand is a 8-bit immediate we should fold the immediate 350 // instead. This reduces code size. 351 // e.g. 352 // movl 4(%esp), %eax 353 // addl $4, %eax 354 // vs. 355 // movl $4, %eax 356 // addl 4(%esp), %eax 357 // The former is 2 bytes shorter. In case where the increment is 1, then 358 // the saving can be 4 bytes (by using incl %eax). 359 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 360 if (Imm->getAPIntValue().isSignedIntN(8)) 361 return false; 362 363 // If the other operand is a TLS address, we should fold it instead. 364 // This produces 365 // movl %gs:0, %eax 366 // leal i@NTPOFF(%eax), %eax 367 // instead of 368 // movl $i@NTPOFF, %eax 369 // addl %gs:0, %eax 370 // if the block also has an access to a second TLS address this will save 371 // a load. 372 // FIXME: This is probably also true for non TLS addresses. 373 if (Op1.getOpcode() == X86ISD::Wrapper) { 374 SDValue Val = Op1.getOperand(0); 375 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 376 return false; 377 } 378 } 379 } 380 } 381 382 return true; 383} 384 385 386bool X86DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const { 387 if (OptLevel == CodeGenOpt::None) return false; 388 389 // Proceed to 'generic' cycle finder code 390 return SelectionDAGISel::IsLegalToFold(N, U, Root); 391} 392 393/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 394/// and move load below the TokenFactor. Replace store's chain operand with 395/// load's chain result. 396static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 397 SDValue Store, SDValue TF) { 398 SmallVector<SDValue, 4> Ops; 399 for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 400 if (Load.getNode() == TF.getOperand(i).getNode()) 401 Ops.push_back(Load.getOperand(0)); 402 else 403 Ops.push_back(TF.getOperand(i)); 404 SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 405 SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF, 406 Load.getOperand(1), 407 Load.getOperand(2)); 408 CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1), 409 Store.getOperand(2), Store.getOperand(3)); 410} 411 412/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The 413/// chain produced by the load must only be used by the store's chain operand, 414/// otherwise this may produce a cycle in the DAG. 415/// 416static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 417 SDValue &Load) { 418 if (N.getOpcode() == ISD::BIT_CONVERT) { 419 if (!N.hasOneUse()) 420 return false; 421 N = N.getOperand(0); 422 } 423 424 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 425 if (!LD || LD->isVolatile()) 426 return false; 427 if (LD->getAddressingMode() != ISD::UNINDEXED) 428 return false; 429 430 ISD::LoadExtType ExtType = LD->getExtensionType(); 431 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 432 return false; 433 434 if (N.hasOneUse() && 435 LD->hasNUsesOfValue(1, 1) && 436 N.getOperand(1) == Address && 437 LD->isOperandOf(Chain.getNode())) { 438 Load = N; 439 return true; 440 } 441 return false; 442} 443 444/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 445/// operand and move load below the call's chain operand. 446static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 447 SDValue Call, SDValue CallSeqStart) { 448 SmallVector<SDValue, 8> Ops; 449 SDValue Chain = CallSeqStart.getOperand(0); 450 if (Chain.getNode() == Load.getNode()) 451 Ops.push_back(Load.getOperand(0)); 452 else { 453 assert(Chain.getOpcode() == ISD::TokenFactor && 454 "Unexpected CallSeqStart chain operand"); 455 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 456 if (Chain.getOperand(i).getNode() == Load.getNode()) 457 Ops.push_back(Load.getOperand(0)); 458 else 459 Ops.push_back(Chain.getOperand(i)); 460 SDValue NewChain = 461 CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 462 MVT::Other, &Ops[0], Ops.size()); 463 Ops.clear(); 464 Ops.push_back(NewChain); 465 } 466 for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) 467 Ops.push_back(CallSeqStart.getOperand(i)); 468 CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); 469 CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 470 Load.getOperand(1), Load.getOperand(2)); 471 Ops.clear(); 472 Ops.push_back(SDValue(Load.getNode(), 1)); 473 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 474 Ops.push_back(Call.getOperand(i)); 475 CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 476} 477 478/// isCalleeLoad - Return true if call address is a load and it can be 479/// moved below CALLSEQ_START and the chains leading up to the call. 480/// Return the CALLSEQ_START by reference as a second output. 481static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 482 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 483 return false; 484 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 485 if (!LD || 486 LD->isVolatile() || 487 LD->getAddressingMode() != ISD::UNINDEXED || 488 LD->getExtensionType() != ISD::NON_EXTLOAD) 489 return false; 490 491 // Now let's find the callseq_start. 492 while (Chain.getOpcode() != ISD::CALLSEQ_START) { 493 if (!Chain.hasOneUse()) 494 return false; 495 Chain = Chain.getOperand(0); 496 } 497 498 if (Chain.getOperand(0).getNode() == Callee.getNode()) 499 return true; 500 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 501 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 502 Callee.getValue(1).hasOneUse()) 503 return true; 504 return false; 505} 506 507 508/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 509/// This is only run if not in -O0 mode. 510/// This allows the instruction selector to pick more read-modify-write 511/// instructions. This is a common case: 512/// 513/// [Load chain] 514/// ^ 515/// | 516/// [Load] 517/// ^ ^ 518/// | | 519/// / \- 520/// / | 521/// [TokenFactor] [Op] 522/// ^ ^ 523/// | | 524/// \ / 525/// \ / 526/// [Store] 527/// 528/// The fact the store's chain operand != load's chain will prevent the 529/// (store (op (load))) instruction from being selected. We can transform it to: 530/// 531/// [Load chain] 532/// ^ 533/// | 534/// [TokenFactor] 535/// ^ 536/// | 537/// [Load] 538/// ^ ^ 539/// | | 540/// | \- 541/// | | 542/// | [Op] 543/// | ^ 544/// | | 545/// \ / 546/// \ / 547/// [Store] 548void X86DAGToDAGISel::PreprocessForRMW() { 549 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 550 E = CurDAG->allnodes_end(); I != E; ++I) { 551 if (I->getOpcode() == X86ISD::CALL) { 552 /// Also try moving call address load from outside callseq_start to just 553 /// before the call to allow it to be folded. 554 /// 555 /// [Load chain] 556 /// ^ 557 /// | 558 /// [Load] 559 /// ^ ^ 560 /// | | 561 /// / \-- 562 /// / | 563 ///[CALLSEQ_START] | 564 /// ^ | 565 /// | | 566 /// [LOAD/C2Reg] | 567 /// | | 568 /// \ / 569 /// \ / 570 /// [CALL] 571 SDValue Chain = I->getOperand(0); 572 SDValue Load = I->getOperand(1); 573 if (!isCalleeLoad(Load, Chain)) 574 continue; 575 MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 576 ++NumLoadMoved; 577 continue; 578 } 579 580 if (!ISD::isNON_TRUNCStore(I)) 581 continue; 582 SDValue Chain = I->getOperand(0); 583 584 if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 585 continue; 586 587 SDValue N1 = I->getOperand(1); 588 SDValue N2 = I->getOperand(2); 589 if ((N1.getValueType().isFloatingPoint() && 590 !N1.getValueType().isVector()) || 591 !N1.hasOneUse()) 592 continue; 593 594 bool RModW = false; 595 SDValue Load; 596 unsigned Opcode = N1.getNode()->getOpcode(); 597 switch (Opcode) { 598 case ISD::ADD: 599 case ISD::MUL: 600 case ISD::AND: 601 case ISD::OR: 602 case ISD::XOR: 603 case ISD::ADDC: 604 case ISD::ADDE: 605 case ISD::VECTOR_SHUFFLE: { 606 SDValue N10 = N1.getOperand(0); 607 SDValue N11 = N1.getOperand(1); 608 RModW = isRMWLoad(N10, Chain, N2, Load); 609 if (!RModW) 610 RModW = isRMWLoad(N11, Chain, N2, Load); 611 break; 612 } 613 case ISD::SUB: 614 case ISD::SHL: 615 case ISD::SRA: 616 case ISD::SRL: 617 case ISD::ROTL: 618 case ISD::ROTR: 619 case ISD::SUBC: 620 case ISD::SUBE: 621 case X86ISD::SHLD: 622 case X86ISD::SHRD: { 623 SDValue N10 = N1.getOperand(0); 624 RModW = isRMWLoad(N10, Chain, N2, Load); 625 break; 626 } 627 } 628 629 if (RModW) { 630 MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 631 ++NumLoadMoved; 632 checkForCycles(I); 633 } 634 } 635} 636 637 638/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 639/// nodes that target the FP stack to be store and load to the stack. This is a 640/// gross hack. We would like to simply mark these as being illegal, but when 641/// we do that, legalize produces these when it expands calls, then expands 642/// these in the same legalize pass. We would like dag combine to be able to 643/// hack on these between the call expansion and the node legalization. As such 644/// this pass basically does "really late" legalization of these inline with the 645/// X86 isel pass. 646void X86DAGToDAGISel::PreprocessForFPConvert() { 647 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 648 E = CurDAG->allnodes_end(); I != E; ) { 649 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 650 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 651 continue; 652 653 // If the source and destination are SSE registers, then this is a legal 654 // conversion that should not be lowered. 655 EVT SrcVT = N->getOperand(0).getValueType(); 656 EVT DstVT = N->getValueType(0); 657 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 658 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 659 if (SrcIsSSE && DstIsSSE) 660 continue; 661 662 if (!SrcIsSSE && !DstIsSSE) { 663 // If this is an FPStack extension, it is a noop. 664 if (N->getOpcode() == ISD::FP_EXTEND) 665 continue; 666 // If this is a value-preserving FPStack truncation, it is a noop. 667 if (N->getConstantOperandVal(1)) 668 continue; 669 } 670 671 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 672 // FPStack has extload and truncstore. SSE can fold direct loads into other 673 // operations. Based on this, decide what we want to do. 674 EVT MemVT; 675 if (N->getOpcode() == ISD::FP_ROUND) 676 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 677 else 678 MemVT = SrcIsSSE ? SrcVT : DstVT; 679 680 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 681 DebugLoc dl = N->getDebugLoc(); 682 683 // FIXME: optimize the case where the src/dest is a load or store? 684 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 685 N->getOperand(0), 686 MemTmp, NULL, 0, MemVT, 687 false, false, 0); 688 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 689 NULL, 0, MemVT, false, false, 0); 690 691 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 692 // extload we created. This will cause general havok on the dag because 693 // anything below the conversion could be folded into other existing nodes. 694 // To avoid invalidating 'I', back it up to the convert node. 695 --I; 696 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 697 698 // Now that we did that, the node is dead. Increment the iterator to the 699 // next node to process, then delete N. 700 ++I; 701 CurDAG->DeleteNode(N); 702 } 703} 704 705/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 706/// when it has created a SelectionDAG for us to codegen. 707void X86DAGToDAGISel::InstructionSelect() { 708 const Function *F = MF->getFunction(); 709 OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 710 711 if (OptLevel != CodeGenOpt::None) 712 PreprocessForRMW(); 713 714 // FIXME: This should only happen when not compiled with -O0. 715 PreprocessForFPConvert(); 716 717 // Codegen the basic block. 718#ifndef NDEBUG 719 DEBUG(dbgs() << "===== Instruction selection begins:\n"); 720 Indent = 0; 721#endif 722 SelectRoot(*CurDAG); 723#ifndef NDEBUG 724 DEBUG(dbgs() << "===== Instruction selection ends:\n"); 725#endif 726 727 CurDAG->RemoveDeadNodes(); 728} 729 730/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 731/// the main function. 732void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 733 MachineFrameInfo *MFI) { 734 const TargetInstrInfo *TII = TM.getInstrInfo(); 735 if (Subtarget->isTargetCygMing()) 736 BuildMI(BB, DebugLoc::getUnknownLoc(), 737 TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 738} 739 740void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 741 // If this is main, emit special code for main. 742 MachineBasicBlock *BB = MF.begin(); 743 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 744 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 745} 746 747 748bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 749 X86ISelAddressMode &AM) { 750 assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 751 SDValue Segment = N.getOperand(0); 752 753 if (AM.Segment.getNode() == 0) { 754 AM.Segment = Segment; 755 return false; 756 } 757 758 return true; 759} 760 761bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 762 // This optimization is valid because the GNU TLS model defines that 763 // gs:0 (or fs:0 on X86-64) contains its own address. 764 // For more information see http://people.redhat.com/drepper/tls.pdf 765 766 SDValue Address = N.getOperand(1); 767 if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 768 !MatchSegmentBaseAddress (Address, AM)) 769 return false; 770 771 return true; 772} 773 774/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 775/// into an addressing mode. These wrap things that will resolve down into a 776/// symbol reference. If no match is possible, this returns true, otherwise it 777/// returns false. 778bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 779 // If the addressing mode already has a symbol as the displacement, we can 780 // never match another symbol. 781 if (AM.hasSymbolicDisplacement()) 782 return true; 783 784 SDValue N0 = N.getOperand(0); 785 CodeModel::Model M = TM.getCodeModel(); 786 787 // Handle X86-64 rip-relative addresses. We check this before checking direct 788 // folding because RIP is preferable to non-RIP accesses. 789 if (Subtarget->is64Bit() && 790 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 791 // they cannot be folded into immediate fields. 792 // FIXME: This can be improved for kernel and other models? 793 (M == CodeModel::Small || M == CodeModel::Kernel) && 794 // Base and index reg must be 0 in order to use %rip as base and lowering 795 // must allow RIP. 796 !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 797 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 798 int64_t Offset = AM.Disp + G->getOffset(); 799 if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 800 AM.GV = G->getGlobal(); 801 AM.Disp = Offset; 802 AM.SymbolFlags = G->getTargetFlags(); 803 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 804 int64_t Offset = AM.Disp + CP->getOffset(); 805 if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 806 AM.CP = CP->getConstVal(); 807 AM.Align = CP->getAlignment(); 808 AM.Disp = Offset; 809 AM.SymbolFlags = CP->getTargetFlags(); 810 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 811 AM.ES = S->getSymbol(); 812 AM.SymbolFlags = S->getTargetFlags(); 813 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 814 AM.JT = J->getIndex(); 815 AM.SymbolFlags = J->getTargetFlags(); 816 } else { 817 AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 818 AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 819 } 820 821 if (N.getOpcode() == X86ISD::WrapperRIP) 822 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 823 return false; 824 } 825 826 // Handle the case when globals fit in our immediate field: This is true for 827 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 828 // mode, this results in a non-RIP-relative computation. 829 if (!Subtarget->is64Bit() || 830 ((M == CodeModel::Small || M == CodeModel::Kernel) && 831 TM.getRelocationModel() == Reloc::Static)) { 832 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 833 AM.GV = G->getGlobal(); 834 AM.Disp += G->getOffset(); 835 AM.SymbolFlags = G->getTargetFlags(); 836 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 837 AM.CP = CP->getConstVal(); 838 AM.Align = CP->getAlignment(); 839 AM.Disp += CP->getOffset(); 840 AM.SymbolFlags = CP->getTargetFlags(); 841 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 842 AM.ES = S->getSymbol(); 843 AM.SymbolFlags = S->getTargetFlags(); 844 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 845 AM.JT = J->getIndex(); 846 AM.SymbolFlags = J->getTargetFlags(); 847 } else { 848 AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 849 AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 850 } 851 return false; 852 } 853 854 return true; 855} 856 857/// MatchAddress - Add the specified node to the specified addressing mode, 858/// returning true if it cannot be done. This just pattern matches for the 859/// addressing mode. 860bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 861 if (MatchAddressRecursively(N, AM, 0)) 862 return true; 863 864 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 865 // a smaller encoding and avoids a scaled-index. 866 if (AM.Scale == 2 && 867 AM.BaseType == X86ISelAddressMode::RegBase && 868 AM.Base.Reg.getNode() == 0) { 869 AM.Base.Reg = AM.IndexReg; 870 AM.Scale = 1; 871 } 872 873 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 874 // because it has a smaller encoding. 875 // TODO: Which other code models can use this? 876 if (TM.getCodeModel() == CodeModel::Small && 877 Subtarget->is64Bit() && 878 AM.Scale == 1 && 879 AM.BaseType == X86ISelAddressMode::RegBase && 880 AM.Base.Reg.getNode() == 0 && 881 AM.IndexReg.getNode() == 0 && 882 AM.SymbolFlags == X86II::MO_NO_FLAG && 883 AM.hasSymbolicDisplacement()) 884 AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 885 886 return false; 887} 888 889bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 890 unsigned Depth) { 891 bool is64Bit = Subtarget->is64Bit(); 892 DebugLoc dl = N.getDebugLoc(); 893 DEBUG({ 894 dbgs() << "MatchAddress: "; 895 AM.dump(); 896 }); 897 // Limit recursion. 898 if (Depth > 5) 899 return MatchAddressBase(N, AM); 900 901 CodeModel::Model M = TM.getCodeModel(); 902 903 // If this is already a %rip relative address, we can only merge immediates 904 // into it. Instead of handling this in every case, we handle it here. 905 // RIP relative addressing: %rip + 32-bit displacement! 906 if (AM.isRIPRelative()) { 907 // FIXME: JumpTable and ExternalSymbol address currently don't like 908 // displacements. It isn't very important, but this should be fixed for 909 // consistency. 910 if (!AM.ES && AM.JT != -1) return true; 911 912 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 913 int64_t Val = AM.Disp + Cst->getSExtValue(); 914 if (X86::isOffsetSuitableForCodeModel(Val, M, 915 AM.hasSymbolicDisplacement())) { 916 AM.Disp = Val; 917 return false; 918 } 919 } 920 return true; 921 } 922 923 switch (N.getOpcode()) { 924 default: break; 925 case ISD::Constant: { 926 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 927 if (!is64Bit || 928 X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, 929 AM.hasSymbolicDisplacement())) { 930 AM.Disp += Val; 931 return false; 932 } 933 break; 934 } 935 936 case X86ISD::SegmentBaseAddress: 937 if (!MatchSegmentBaseAddress(N, AM)) 938 return false; 939 break; 940 941 case X86ISD::Wrapper: 942 case X86ISD::WrapperRIP: 943 if (!MatchWrapper(N, AM)) 944 return false; 945 break; 946 947 case ISD::LOAD: 948 if (!MatchLoad(N, AM)) 949 return false; 950 break; 951 952 case ISD::FrameIndex: 953 if (AM.BaseType == X86ISelAddressMode::RegBase 954 && AM.Base.Reg.getNode() == 0) { 955 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 956 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 957 return false; 958 } 959 break; 960 961 case ISD::SHL: 962 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 963 break; 964 965 if (ConstantSDNode 966 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 967 unsigned Val = CN->getZExtValue(); 968 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 969 // that the base operand remains free for further matching. If 970 // the base doesn't end up getting used, a post-processing step 971 // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 972 if (Val == 1 || Val == 2 || Val == 3) { 973 AM.Scale = 1 << Val; 974 SDValue ShVal = N.getNode()->getOperand(0); 975 976 // Okay, we know that we have a scale by now. However, if the scaled 977 // value is an add of something and a constant, we can fold the 978 // constant into the disp field here. 979 if (ShVal.getNode()->getOpcode() == ISD::ADD && 980 isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 981 AM.IndexReg = ShVal.getNode()->getOperand(0); 982 ConstantSDNode *AddVal = 983 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 984 uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 985 if (!is64Bit || 986 X86::isOffsetSuitableForCodeModel(Disp, M, 987 AM.hasSymbolicDisplacement())) 988 AM.Disp = Disp; 989 else 990 AM.IndexReg = ShVal; 991 } else { 992 AM.IndexReg = ShVal; 993 } 994 return false; 995 } 996 break; 997 } 998 999 case ISD::SMUL_LOHI: 1000 case ISD::UMUL_LOHI: 1001 // A mul_lohi where we need the low part can be folded as a plain multiply. 1002 if (N.getResNo() != 0) break; 1003 // FALL THROUGH 1004 case ISD::MUL: 1005 case X86ISD::MUL_IMM: 1006 // X*[3,5,9] -> X+X*[2,4,8] 1007 if (AM.BaseType == X86ISelAddressMode::RegBase && 1008 AM.Base.Reg.getNode() == 0 && 1009 AM.IndexReg.getNode() == 0) { 1010 if (ConstantSDNode 1011 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 1012 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 1013 CN->getZExtValue() == 9) { 1014 AM.Scale = unsigned(CN->getZExtValue())-1; 1015 1016 SDValue MulVal = N.getNode()->getOperand(0); 1017 SDValue Reg; 1018 1019 // Okay, we know that we have a scale by now. However, if the scaled 1020 // value is an add of something and a constant, we can fold the 1021 // constant into the disp field here. 1022 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 1023 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 1024 Reg = MulVal.getNode()->getOperand(0); 1025 ConstantSDNode *AddVal = 1026 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 1027 uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 1028 CN->getZExtValue(); 1029 if (!is64Bit || 1030 X86::isOffsetSuitableForCodeModel(Disp, M, 1031 AM.hasSymbolicDisplacement())) 1032 AM.Disp = Disp; 1033 else 1034 Reg = N.getNode()->getOperand(0); 1035 } else { 1036 Reg = N.getNode()->getOperand(0); 1037 } 1038 1039 AM.IndexReg = AM.Base.Reg = Reg; 1040 return false; 1041 } 1042 } 1043 break; 1044 1045 case ISD::SUB: { 1046 // Given A-B, if A can be completely folded into the address and 1047 // the index field with the index field unused, use -B as the index. 1048 // This is a win if a has multiple parts that can be folded into 1049 // the address. Also, this saves a mov if the base register has 1050 // other uses, since it avoids a two-address sub instruction, however 1051 // it costs an additional mov if the index register has other uses. 1052 1053 // Test if the LHS of the sub can be folded. 1054 X86ISelAddressMode Backup = AM; 1055 if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 1056 AM = Backup; 1057 break; 1058 } 1059 // Test if the index field is free for use. 1060 if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 1061 AM = Backup; 1062 break; 1063 } 1064 int Cost = 0; 1065 SDValue RHS = N.getNode()->getOperand(1); 1066 // If the RHS involves a register with multiple uses, this 1067 // transformation incurs an extra mov, due to the neg instruction 1068 // clobbering its operand. 1069 if (!RHS.getNode()->hasOneUse() || 1070 RHS.getNode()->getOpcode() == ISD::CopyFromReg || 1071 RHS.getNode()->getOpcode() == ISD::TRUNCATE || 1072 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 1073 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 1074 RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 1075 ++Cost; 1076 // If the base is a register with multiple uses, this 1077 // transformation may save a mov. 1078 if ((AM.BaseType == X86ISelAddressMode::RegBase && 1079 AM.Base.Reg.getNode() && 1080 !AM.Base.Reg.getNode()->hasOneUse()) || 1081 AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1082 --Cost; 1083 // If the folded LHS was interesting, this transformation saves 1084 // address arithmetic. 1085 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 1086 ((AM.Disp != 0) && (Backup.Disp == 0)) + 1087 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 1088 --Cost; 1089 // If it doesn't look like it may be an overall win, don't do it. 1090 if (Cost >= 0) { 1091 AM = Backup; 1092 break; 1093 } 1094 1095 // Ok, the transformation is legal and appears profitable. Go for it. 1096 SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1097 SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1098 AM.IndexReg = Neg; 1099 AM.Scale = 1; 1100 1101 // Insert the new nodes into the topological ordering. 1102 if (Zero.getNode()->getNodeId() == -1 || 1103 Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1104 CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 1105 Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 1106 } 1107 if (Neg.getNode()->getNodeId() == -1 || 1108 Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1109 CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 1110 Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 1111 } 1112 return false; 1113 } 1114 1115 case ISD::ADD: { 1116 X86ISelAddressMode Backup = AM; 1117 if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && 1118 !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) 1119 return false; 1120 AM = Backup; 1121 if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && 1122 !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) 1123 return false; 1124 AM = Backup; 1125 1126 // If we couldn't fold both operands into the address at the same time, 1127 // see if we can just put each operand into a register and fold at least 1128 // the add. 1129 if (AM.BaseType == X86ISelAddressMode::RegBase && 1130 !AM.Base.Reg.getNode() && 1131 !AM.IndexReg.getNode()) { 1132 AM.Base.Reg = N.getNode()->getOperand(0); 1133 AM.IndexReg = N.getNode()->getOperand(1); 1134 AM.Scale = 1; 1135 return false; 1136 } 1137 break; 1138 } 1139 1140 case ISD::OR: 1141 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1142 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1143 X86ISelAddressMode Backup = AM; 1144 uint64_t Offset = CN->getSExtValue(); 1145 // Start with the LHS as an addr mode. 1146 if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1147 // Address could not have picked a GV address for the displacement. 1148 AM.GV == NULL && 1149 // On x86-64, the resultant disp must fit in 32-bits. 1150 (!is64Bit || 1151 X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, 1152 AM.hasSymbolicDisplacement())) && 1153 // Check to see if the LHS & C is zero. 1154 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 1155 AM.Disp += Offset; 1156 return false; 1157 } 1158 AM = Backup; 1159 } 1160 break; 1161 1162 case ISD::AND: { 1163 // Perform some heroic transforms on an and of a constant-count shift 1164 // with a constant to enable use of the scaled offset field. 1165 1166 SDValue Shift = N.getOperand(0); 1167 if (Shift.getNumOperands() != 2) break; 1168 1169 // Scale must not be used already. 1170 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1171 1172 SDValue X = Shift.getOperand(0); 1173 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1174 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1175 if (!C1 || !C2) break; 1176 1177 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1178 // allows us to convert the shift and and into an h-register extract and 1179 // a scaled index. 1180 if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1181 unsigned ScaleLog = 8 - C1->getZExtValue(); 1182 if (ScaleLog > 0 && ScaleLog < 4 && 1183 C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1184 SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1185 SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1186 SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1187 X, Eight); 1188 SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1189 Srl, Mask); 1190 SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1191 SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1192 And, ShlCount); 1193 1194 // Insert the new nodes into the topological ordering. 1195 if (Eight.getNode()->getNodeId() == -1 || 1196 Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1197 CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1198 Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1199 } 1200 if (Mask.getNode()->getNodeId() == -1 || 1201 Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1202 CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1203 Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1204 } 1205 if (Srl.getNode()->getNodeId() == -1 || 1206 Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1207 CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1208 Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1209 } 1210 if (And.getNode()->getNodeId() == -1 || 1211 And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1212 CurDAG->RepositionNode(N.getNode(), And.getNode()); 1213 And.getNode()->setNodeId(N.getNode()->getNodeId()); 1214 } 1215 if (ShlCount.getNode()->getNodeId() == -1 || 1216 ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1217 CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1218 ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1219 } 1220 if (Shl.getNode()->getNodeId() == -1 || 1221 Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1222 CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1223 Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1224 } 1225 CurDAG->ReplaceAllUsesWith(N, Shl); 1226 AM.IndexReg = And; 1227 AM.Scale = (1 << ScaleLog); 1228 return false; 1229 } 1230 } 1231 1232 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1233 // allows us to fold the shift into this addressing mode. 1234 if (Shift.getOpcode() != ISD::SHL) break; 1235 1236 // Not likely to be profitable if either the AND or SHIFT node has more 1237 // than one use (unless all uses are for address computation). Besides, 1238 // isel mechanism requires their node ids to be reused. 1239 if (!N.hasOneUse() || !Shift.hasOneUse()) 1240 break; 1241 1242 // Verify that the shift amount is something we can fold. 1243 unsigned ShiftCst = C1->getZExtValue(); 1244 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1245 break; 1246 1247 // Get the new AND mask, this folds to a constant. 1248 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1249 SDValue(C2, 0), SDValue(C1, 0)); 1250 SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1251 NewANDMask); 1252 SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1253 NewAND, SDValue(C1, 0)); 1254 1255 // Insert the new nodes into the topological ordering. 1256 if (C1->getNodeId() > X.getNode()->getNodeId()) { 1257 CurDAG->RepositionNode(X.getNode(), C1); 1258 C1->setNodeId(X.getNode()->getNodeId()); 1259 } 1260 if (NewANDMask.getNode()->getNodeId() == -1 || 1261 NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1262 CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1263 NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1264 } 1265 if (NewAND.getNode()->getNodeId() == -1 || 1266 NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1267 CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1268 NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1269 } 1270 if (NewSHIFT.getNode()->getNodeId() == -1 || 1271 NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1272 CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1273 NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1274 } 1275 1276 CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1277 1278 AM.Scale = 1 << ShiftCst; 1279 AM.IndexReg = NewAND; 1280 return false; 1281 } 1282 } 1283 1284 return MatchAddressBase(N, AM); 1285} 1286 1287/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1288/// specified addressing mode without any further recursion. 1289bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1290 // Is the base register already occupied? 1291 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1292 // If so, check to see if the scale index register is set. 1293 if (AM.IndexReg.getNode() == 0) { 1294 AM.IndexReg = N; 1295 AM.Scale = 1; 1296 return false; 1297 } 1298 1299 // Otherwise, we cannot select it. 1300 return true; 1301 } 1302 1303 // Default, generate it as a register. 1304 AM.BaseType = X86ISelAddressMode::RegBase; 1305 AM.Base.Reg = N; 1306 return false; 1307} 1308 1309/// SelectAddr - returns true if it is able pattern match an addressing mode. 1310/// It returns the operands which make up the maximal addressing mode it can 1311/// match by reference. 1312bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, 1313 SDValue &Scale, SDValue &Index, 1314 SDValue &Disp, SDValue &Segment) { 1315 X86ISelAddressMode AM; 1316 if (MatchAddress(N, AM)) 1317 return false; 1318 1319 EVT VT = N.getValueType(); 1320 if (AM.BaseType == X86ISelAddressMode::RegBase) { 1321 if (!AM.Base.Reg.getNode()) 1322 AM.Base.Reg = CurDAG->getRegister(0, VT); 1323 } 1324 1325 if (!AM.IndexReg.getNode()) 1326 AM.IndexReg = CurDAG->getRegister(0, VT); 1327 1328 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1329 return true; 1330} 1331 1332/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1333/// match a load whose top elements are either undef or zeros. The load flavor 1334/// is derived from the type of N, which is either v4f32 or v2f64. 1335/// 1336/// We also return: 1337/// PatternChainNode: this is the matched node that has a chain input and 1338/// output. 1339bool X86DAGToDAGISel::SelectScalarSSELoadXXX(SDNode *Root, 1340 SDValue N, SDValue &Base, 1341 SDValue &Scale, SDValue &Index, 1342 SDValue &Disp, SDValue &Segment, 1343 SDValue &PatternNodeWithChain) { 1344 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1345 PatternNodeWithChain = N.getOperand(0); 1346 if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && 1347 PatternNodeWithChain.hasOneUse() && 1348 IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1349 IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { 1350 LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); 1351 if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) 1352 return false; 1353 return true; 1354 } 1355 } 1356 1357 // Also handle the case where we explicitly require zeros in the top 1358 // elements. This is a vector shuffle from the zero vector. 1359 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1360 // Check to see if the top elements are all zeros (or bitcast of zeros). 1361 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1362 N.getOperand(0).getNode()->hasOneUse() && 1363 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1364 N.getOperand(0).getOperand(0).hasOneUse() && 1365 IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1366 IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { 1367 // Okay, this is a zero extending load. Fold it. 1368 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1369 if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1370 return false; 1371 PatternNodeWithChain = SDValue(LD, 0); 1372 return true; 1373 } 1374 return false; 1375} 1376 1377 1378/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1379/// mode it matches can be cost effectively emitted as an LEA instruction. 1380bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, 1381 SDValue &Base, SDValue &Scale, 1382 SDValue &Index, SDValue &Disp) { 1383 X86ISelAddressMode AM; 1384 1385 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1386 // segments. 1387 SDValue Copy = AM.Segment; 1388 SDValue T = CurDAG->getRegister(0, MVT::i32); 1389 AM.Segment = T; 1390 if (MatchAddress(N, AM)) 1391 return false; 1392 assert (T == AM.Segment); 1393 AM.Segment = Copy; 1394 1395 EVT VT = N.getValueType(); 1396 unsigned Complexity = 0; 1397 if (AM.BaseType == X86ISelAddressMode::RegBase) 1398 if (AM.Base.Reg.getNode()) 1399 Complexity = 1; 1400 else 1401 AM.Base.Reg = CurDAG->getRegister(0, VT); 1402 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1403 Complexity = 4; 1404 1405 if (AM.IndexReg.getNode()) 1406 Complexity++; 1407 else 1408 AM.IndexReg = CurDAG->getRegister(0, VT); 1409 1410 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1411 // a simple shift. 1412 if (AM.Scale > 1) 1413 Complexity++; 1414 1415 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1416 // to a LEA. This is determined with some expermentation but is by no means 1417 // optimal (especially for code size consideration). LEA is nice because of 1418 // its three-address nature. Tweak the cost function again when we can run 1419 // convertToThreeAddress() at register allocation time. 1420 if (AM.hasSymbolicDisplacement()) { 1421 // For X86-64, we should always use lea to materialize RIP relative 1422 // addresses. 1423 if (Subtarget->is64Bit()) 1424 Complexity = 4; 1425 else 1426 Complexity += 2; 1427 } 1428 1429 if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1430 Complexity++; 1431 1432 // If it isn't worth using an LEA, reject it. 1433 if (Complexity <= 2) 1434 return false; 1435 1436 SDValue Segment; 1437 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1438 return true; 1439} 1440 1441/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1442bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, 1443 SDValue &Scale, SDValue &Index, 1444 SDValue &Disp) { 1445 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1446 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1447 1448 X86ISelAddressMode AM; 1449 AM.GV = GA->getGlobal(); 1450 AM.Disp += GA->getOffset(); 1451 AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); 1452 AM.SymbolFlags = GA->getTargetFlags(); 1453 1454 if (N.getValueType() == MVT::i32) { 1455 AM.Scale = 1; 1456 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1457 } else { 1458 AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1459 } 1460 1461 SDValue Segment; 1462 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1463 return true; 1464} 1465 1466 1467bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, 1468 SDValue &Base, SDValue &Scale, 1469 SDValue &Index, SDValue &Disp, 1470 SDValue &Segment) { 1471 if (ISD::isNON_EXTLoad(N.getNode()) && 1472 IsProfitableToFold(N, P, P) && 1473 IsLegalToFold(N, P, P)) 1474 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1475 return false; 1476} 1477 1478/// getGlobalBaseReg - Return an SDNode that returns the value of 1479/// the global base register. Output instructions required to 1480/// initialize the global base register, if necessary. 1481/// 1482SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1483 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1484 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1485} 1486 1487static SDNode *FindCallStartFromCall(SDNode *Node) { 1488 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1489 assert(Node->getOperand(0).getValueType() == MVT::Other && 1490 "Node doesn't have a token chain argument!"); 1491 return FindCallStartFromCall(Node->getOperand(0).getNode()); 1492} 1493 1494SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1495 SDValue Chain = Node->getOperand(0); 1496 SDValue In1 = Node->getOperand(1); 1497 SDValue In2L = Node->getOperand(2); 1498 SDValue In2H = Node->getOperand(3); 1499 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1500 if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1501 return NULL; 1502 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1503 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1504 const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1505 SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), 1506 MVT::i32, MVT::i32, MVT::Other, Ops, 1507 array_lengthof(Ops)); 1508 cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1509 return ResNode; 1510} 1511 1512SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { 1513 if (Node->hasAnyUseOfValue(0)) 1514 return 0; 1515 1516 // Optimize common patterns for __sync_add_and_fetch and 1517 // __sync_sub_and_fetch where the result is not used. This allows us 1518 // to use "lock" version of add, sub, inc, dec instructions. 1519 // FIXME: Do not use special instructions but instead add the "lock" 1520 // prefix to the target node somehow. The extra information will then be 1521 // transferred to machine instruction and it denotes the prefix. 1522 SDValue Chain = Node->getOperand(0); 1523 SDValue Ptr = Node->getOperand(1); 1524 SDValue Val = Node->getOperand(2); 1525 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1526 if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1527 return 0; 1528 1529 bool isInc = false, isDec = false, isSub = false, isCN = false; 1530 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); 1531 if (CN) { 1532 isCN = true; 1533 int64_t CNVal = CN->getSExtValue(); 1534 if (CNVal == 1) 1535 isInc = true; 1536 else if (CNVal == -1) 1537 isDec = true; 1538 else if (CNVal >= 0) 1539 Val = CurDAG->getTargetConstant(CNVal, NVT); 1540 else { 1541 isSub = true; 1542 Val = CurDAG->getTargetConstant(-CNVal, NVT); 1543 } 1544 } else if (Val.hasOneUse() && 1545 Val.getOpcode() == ISD::SUB && 1546 X86::isZeroNode(Val.getOperand(0))) { 1547 isSub = true; 1548 Val = Val.getOperand(1); 1549 } 1550 1551 unsigned Opc = 0; 1552 switch (NVT.getSimpleVT().SimpleTy) { 1553 default: return 0; 1554 case MVT::i8: 1555 if (isInc) 1556 Opc = X86::LOCK_INC8m; 1557 else if (isDec) 1558 Opc = X86::LOCK_DEC8m; 1559 else if (isSub) { 1560 if (isCN) 1561 Opc = X86::LOCK_SUB8mi; 1562 else 1563 Opc = X86::LOCK_SUB8mr; 1564 } else { 1565 if (isCN) 1566 Opc = X86::LOCK_ADD8mi; 1567 else 1568 Opc = X86::LOCK_ADD8mr; 1569 } 1570 break; 1571 case MVT::i16: 1572 if (isInc) 1573 Opc = X86::LOCK_INC16m; 1574 else if (isDec) 1575 Opc = X86::LOCK_DEC16m; 1576 else if (isSub) { 1577 if (isCN) { 1578 if (Predicate_i16immSExt8(Val.getNode())) 1579 Opc = X86::LOCK_SUB16mi8; 1580 else 1581 Opc = X86::LOCK_SUB16mi; 1582 } else 1583 Opc = X86::LOCK_SUB16mr; 1584 } else { 1585 if (isCN) { 1586 if (Predicate_i16immSExt8(Val.getNode())) 1587 Opc = X86::LOCK_ADD16mi8; 1588 else 1589 Opc = X86::LOCK_ADD16mi; 1590 } else 1591 Opc = X86::LOCK_ADD16mr; 1592 } 1593 break; 1594 case MVT::i32: 1595 if (isInc) 1596 Opc = X86::LOCK_INC32m; 1597 else if (isDec) 1598 Opc = X86::LOCK_DEC32m; 1599 else if (isSub) { 1600 if (isCN) { 1601 if (Predicate_i32immSExt8(Val.getNode())) 1602 Opc = X86::LOCK_SUB32mi8; 1603 else 1604 Opc = X86::LOCK_SUB32mi; 1605 } else 1606 Opc = X86::LOCK_SUB32mr; 1607 } else { 1608 if (isCN) { 1609 if (Predicate_i32immSExt8(Val.getNode())) 1610 Opc = X86::LOCK_ADD32mi8; 1611 else 1612 Opc = X86::LOCK_ADD32mi; 1613 } else 1614 Opc = X86::LOCK_ADD32mr; 1615 } 1616 break; 1617 case MVT::i64: 1618 if (isInc) 1619 Opc = X86::LOCK_INC64m; 1620 else if (isDec) 1621 Opc = X86::LOCK_DEC64m; 1622 else if (isSub) { 1623 Opc = X86::LOCK_SUB64mr; 1624 if (isCN) { 1625 if (Predicate_i64immSExt8(Val.getNode())) 1626 Opc = X86::LOCK_SUB64mi8; 1627 else if (Predicate_i64immSExt32(Val.getNode())) 1628 Opc = X86::LOCK_SUB64mi32; 1629 } 1630 } else { 1631 Opc = X86::LOCK_ADD64mr; 1632 if (isCN) { 1633 if (Predicate_i64immSExt8(Val.getNode())) 1634 Opc = X86::LOCK_ADD64mi8; 1635 else if (Predicate_i64immSExt32(Val.getNode())) 1636 Opc = X86::LOCK_ADD64mi32; 1637 } 1638 } 1639 break; 1640 } 1641 1642 DebugLoc dl = Node->getDebugLoc(); 1643 SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 1644 dl, NVT), 0); 1645 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1646 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1647 if (isInc || isDec) { 1648 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1649 SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); 1650 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1651 SDValue RetVals[] = { Undef, Ret }; 1652 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1653 } else { 1654 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1655 SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); 1656 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1657 SDValue RetVals[] = { Undef, Ret }; 1658 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1659 } 1660} 1661 1662/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1663/// any uses which require the SF or OF bits to be accurate. 1664static bool HasNoSignedComparisonUses(SDNode *N) { 1665 // Examine each user of the node. 1666 for (SDNode::use_iterator UI = N->use_begin(), 1667 UE = N->use_end(); UI != UE; ++UI) { 1668 // Only examine CopyToReg uses. 1669 if (UI->getOpcode() != ISD::CopyToReg) 1670 return false; 1671 // Only examine CopyToReg uses that copy to EFLAGS. 1672 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1673 X86::EFLAGS) 1674 return false; 1675 // Examine each user of the CopyToReg use. 1676 for (SDNode::use_iterator FlagUI = UI->use_begin(), 1677 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1678 // Only examine the Flag result. 1679 if (FlagUI.getUse().getResNo() != 1) continue; 1680 // Anything unusual: assume conservatively. 1681 if (!FlagUI->isMachineOpcode()) return false; 1682 // Examine the opcode of the user. 1683 switch (FlagUI->getMachineOpcode()) { 1684 // These comparisons don't treat the most significant bit specially. 1685 case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1686 case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1687 case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1688 case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1689 case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: 1690 case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: 1691 case X86::CMOVA16rr: case X86::CMOVA16rm: 1692 case X86::CMOVA32rr: case X86::CMOVA32rm: 1693 case X86::CMOVA64rr: case X86::CMOVA64rm: 1694 case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1695 case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1696 case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1697 case X86::CMOVB16rr: case X86::CMOVB16rm: 1698 case X86::CMOVB32rr: case X86::CMOVB32rm: 1699 case X86::CMOVB64rr: case X86::CMOVB64rm: 1700 case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1701 case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1702 case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1703 case X86::CMOVE16rr: case X86::CMOVE16rm: 1704 case X86::CMOVE32rr: case X86::CMOVE32rm: 1705 case X86::CMOVE64rr: case X86::CMOVE64rm: 1706 case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1707 case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1708 case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1709 case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1710 case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1711 case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1712 case X86::CMOVP16rr: case X86::CMOVP16rm: 1713 case X86::CMOVP32rr: case X86::CMOVP32rm: 1714 case X86::CMOVP64rr: case X86::CMOVP64rm: 1715 continue; 1716 // Anything else: assume conservatively. 1717 default: return false; 1718 } 1719 } 1720 } 1721 return true; 1722} 1723 1724SDNode *X86DAGToDAGISel::Select(SDNode *Node) { 1725 EVT NVT = Node->getValueType(0); 1726 unsigned Opc, MOpc; 1727 unsigned Opcode = Node->getOpcode(); 1728 DebugLoc dl = Node->getDebugLoc(); 1729 1730#ifndef NDEBUG 1731 DEBUG({ 1732 dbgs() << std::string(Indent, ' ') << "Selecting: "; 1733 Node->dump(CurDAG); 1734 dbgs() << '\n'; 1735 }); 1736 Indent += 2; 1737#endif 1738 1739 if (Node->isMachineOpcode()) { 1740#ifndef NDEBUG 1741 DEBUG({ 1742 dbgs() << std::string(Indent-2, ' ') << "== "; 1743 Node->dump(CurDAG); 1744 dbgs() << '\n'; 1745 }); 1746 Indent -= 2; 1747#endif 1748 return NULL; // Already selected. 1749 } 1750 1751 switch (Opcode) { 1752 default: break; 1753 case X86ISD::GlobalBaseReg: 1754 return getGlobalBaseReg(); 1755 1756 case X86ISD::ATOMOR64_DAG: 1757 return SelectAtomic64(Node, X86::ATOMOR6432); 1758 case X86ISD::ATOMXOR64_DAG: 1759 return SelectAtomic64(Node, X86::ATOMXOR6432); 1760 case X86ISD::ATOMADD64_DAG: 1761 return SelectAtomic64(Node, X86::ATOMADD6432); 1762 case X86ISD::ATOMSUB64_DAG: 1763 return SelectAtomic64(Node, X86::ATOMSUB6432); 1764 case X86ISD::ATOMNAND64_DAG: 1765 return SelectAtomic64(Node, X86::ATOMNAND6432); 1766 case X86ISD::ATOMAND64_DAG: 1767 return SelectAtomic64(Node, X86::ATOMAND6432); 1768 case X86ISD::ATOMSWAP64_DAG: 1769 return SelectAtomic64(Node, X86::ATOMSWAP6432); 1770 1771 case ISD::ATOMIC_LOAD_ADD: { 1772 SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); 1773 if (RetVal) 1774 return RetVal; 1775 break; 1776 } 1777 1778 case ISD::SMUL_LOHI: 1779 case ISD::UMUL_LOHI: { 1780 SDValue N0 = Node->getOperand(0); 1781 SDValue N1 = Node->getOperand(1); 1782 1783 bool isSigned = Opcode == ISD::SMUL_LOHI; 1784 if (!isSigned) { 1785 switch (NVT.getSimpleVT().SimpleTy) { 1786 default: llvm_unreachable("Unsupported VT!"); 1787 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1788 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1789 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1790 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1791 } 1792 } else { 1793 switch (NVT.getSimpleVT().SimpleTy) { 1794 default: llvm_unreachable("Unsupported VT!"); 1795 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1796 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1797 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1798 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1799 } 1800 } 1801 1802 unsigned LoReg, HiReg; 1803 switch (NVT.getSimpleVT().SimpleTy) { 1804 default: llvm_unreachable("Unsupported VT!"); 1805 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1806 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1807 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1808 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1809 } 1810 1811 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1812 bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1813 // Multiply is commmutative. 1814 if (!foldedLoad) { 1815 foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1816 if (foldedLoad) 1817 std::swap(N0, N1); 1818 } 1819 1820 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1821 N0, SDValue()).getValue(1); 1822 1823 if (foldedLoad) { 1824 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1825 InFlag }; 1826 SDNode *CNode = 1827 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1828 array_lengthof(Ops)); 1829 InFlag = SDValue(CNode, 1); 1830 // Update the chain. 1831 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1832 } else { 1833 InFlag = 1834 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1835 } 1836 1837 // Copy the low half of the result, if it is needed. 1838 if (!SDValue(Node, 0).use_empty()) { 1839 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1840 LoReg, NVT, InFlag); 1841 InFlag = Result.getValue(2); 1842 ReplaceUses(SDValue(Node, 0), Result); 1843#ifndef NDEBUG 1844 DEBUG({ 1845 dbgs() << std::string(Indent-2, ' ') << "=> "; 1846 Result.getNode()->dump(CurDAG); 1847 dbgs() << '\n'; 1848 }); 1849#endif 1850 } 1851 // Copy the high half of the result, if it is needed. 1852 if (!SDValue(Node, 1).use_empty()) { 1853 SDValue Result; 1854 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1855 // Prevent use of AH in a REX instruction by referencing AX instead. 1856 // Shift it down 8 bits. 1857 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1858 X86::AX, MVT::i16, InFlag); 1859 InFlag = Result.getValue(2); 1860 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 1861 Result, 1862 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1863 // Then truncate it down to i8. 1864 Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 1865 MVT::i8, Result); 1866 } else { 1867 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1868 HiReg, NVT, InFlag); 1869 InFlag = Result.getValue(2); 1870 } 1871 ReplaceUses(SDValue(Node, 1), Result); 1872#ifndef NDEBUG 1873 DEBUG({ 1874 dbgs() << std::string(Indent-2, ' ') << "=> "; 1875 Result.getNode()->dump(CurDAG); 1876 dbgs() << '\n'; 1877 }); 1878#endif 1879 } 1880 1881#ifndef NDEBUG 1882 Indent -= 2; 1883#endif 1884 1885 return NULL; 1886 } 1887 1888 case ISD::SDIVREM: 1889 case ISD::UDIVREM: { 1890 SDValue N0 = Node->getOperand(0); 1891 SDValue N1 = Node->getOperand(1); 1892 1893 bool isSigned = Opcode == ISD::SDIVREM; 1894 if (!isSigned) { 1895 switch (NVT.getSimpleVT().SimpleTy) { 1896 default: llvm_unreachable("Unsupported VT!"); 1897 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1898 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1899 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1900 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1901 } 1902 } else { 1903 switch (NVT.getSimpleVT().SimpleTy) { 1904 default: llvm_unreachable("Unsupported VT!"); 1905 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1906 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1907 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1908 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1909 } 1910 } 1911 1912 unsigned LoReg, HiReg, ClrReg; 1913 unsigned ClrOpcode, SExtOpcode; 1914 switch (NVT.getSimpleVT().SimpleTy) { 1915 default: llvm_unreachable("Unsupported VT!"); 1916 case MVT::i8: 1917 LoReg = X86::AL; ClrReg = HiReg = X86::AH; 1918 ClrOpcode = 0; 1919 SExtOpcode = X86::CBW; 1920 break; 1921 case MVT::i16: 1922 LoReg = X86::AX; HiReg = X86::DX; 1923 ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; 1924 SExtOpcode = X86::CWD; 1925 break; 1926 case MVT::i32: 1927 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; 1928 ClrOpcode = X86::MOV32r0; 1929 SExtOpcode = X86::CDQ; 1930 break; 1931 case MVT::i64: 1932 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; 1933 ClrOpcode = X86::MOV64r0; 1934 SExtOpcode = X86::CQO; 1935 break; 1936 } 1937 1938 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1939 bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1940 bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1941 1942 SDValue InFlag; 1943 if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1944 // Special case for div8, just use a move with zero extension to AX to 1945 // clear the upper 8 bits (AH). 1946 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1947 if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1948 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1949 Move = 1950 SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, 1951 MVT::Other, Ops, 1952 array_lengthof(Ops)), 0); 1953 Chain = Move.getValue(1); 1954 ReplaceUses(N0.getValue(1), Chain); 1955 } else { 1956 Move = 1957 SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1958 Chain = CurDAG->getEntryNode(); 1959 } 1960 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1961 InFlag = Chain.getValue(1); 1962 } else { 1963 InFlag = 1964 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1965 LoReg, N0, SDValue()).getValue(1); 1966 if (isSigned && !signBitIsZero) { 1967 // Sign extend the low part into the high part. 1968 InFlag = 1969 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1970 } else { 1971 // Zero out the high part, effectively zero extending the input. 1972 SDValue ClrNode = 1973 SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); 1974 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, 1975 ClrNode, InFlag).getValue(1); 1976 } 1977 } 1978 1979 if (foldedLoad) { 1980 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1981 InFlag }; 1982 SDNode *CNode = 1983 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1984 array_lengthof(Ops)); 1985 InFlag = SDValue(CNode, 1); 1986 // Update the chain. 1987 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1988 } else { 1989 InFlag = 1990 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1991 } 1992 1993 // Copy the division (low) result, if it is needed. 1994 if (!SDValue(Node, 0).use_empty()) { 1995 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1996 LoReg, NVT, InFlag); 1997 InFlag = Result.getValue(2); 1998 ReplaceUses(SDValue(Node, 0), Result); 1999#ifndef NDEBUG 2000 DEBUG({ 2001 dbgs() << std::string(Indent-2, ' ') << "=> "; 2002 Result.getNode()->dump(CurDAG); 2003 dbgs() << '\n'; 2004 }); 2005#endif 2006 } 2007 // Copy the remainder (high) result, if it is needed. 2008 if (!SDValue(Node, 1).use_empty()) { 2009 SDValue Result; 2010 if (HiReg == X86::AH && Subtarget->is64Bit()) { 2011 // Prevent use of AH in a REX instruction by referencing AX instead. 2012 // Shift it down 8 bits. 2013 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2014 X86::AX, MVT::i16, InFlag); 2015 InFlag = Result.getValue(2); 2016 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 2017 Result, 2018 CurDAG->getTargetConstant(8, MVT::i8)), 2019 0); 2020 // Then truncate it down to i8. 2021 Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2022 MVT::i8, Result); 2023 } else { 2024 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 2025 HiReg, NVT, InFlag); 2026 InFlag = Result.getValue(2); 2027 } 2028 ReplaceUses(SDValue(Node, 1), Result); 2029#ifndef NDEBUG 2030 DEBUG({ 2031 dbgs() << std::string(Indent-2, ' ') << "=> "; 2032 Result.getNode()->dump(CurDAG); 2033 dbgs() << '\n'; 2034 }); 2035#endif 2036 } 2037 2038#ifndef NDEBUG 2039 Indent -= 2; 2040#endif 2041 2042 return NULL; 2043 } 2044 2045 case X86ISD::CMP: { 2046 SDValue N0 = Node->getOperand(0); 2047 SDValue N1 = Node->getOperand(1); 2048 2049 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 2050 // use a smaller encoding. 2051 if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 2052 N0.getValueType() != MVT::i8 && 2053 X86::isZeroNode(N1)) { 2054 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 2055 if (!C) break; 2056 2057 // For example, convert "testl %eax, $8" to "testb %al, $8" 2058 if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 2059 (!(C->getZExtValue() & 0x80) || 2060 HasNoSignedComparisonUses(Node))) { 2061 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 2062 SDValue Reg = N0.getNode()->getOperand(0); 2063 2064 // On x86-32, only the ABCD registers have 8-bit subregisters. 2065 if (!Subtarget->is64Bit()) { 2066 TargetRegisterClass *TRC = 0; 2067 switch (N0.getValueType().getSimpleVT().SimpleTy) { 2068 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2069 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2070 default: llvm_unreachable("Unsupported TEST operand type!"); 2071 } 2072 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2073 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2074 Reg.getValueType(), Reg, RC), 0); 2075 } 2076 2077 // Extract the l-register. 2078 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl, 2079 MVT::i8, Reg); 2080 2081 // Emit a testb. 2082 return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); 2083 } 2084 2085 // For example, "testl %eax, $2048" to "testb %ah, $8". 2086 if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 2087 (!(C->getZExtValue() & 0x8000) || 2088 HasNoSignedComparisonUses(Node))) { 2089 // Shift the immediate right by 8 bits. 2090 SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 2091 MVT::i8); 2092 SDValue Reg = N0.getNode()->getOperand(0); 2093 2094 // Put the value in an ABCD register. 2095 TargetRegisterClass *TRC = 0; 2096 switch (N0.getValueType().getSimpleVT().SimpleTy) { 2097 case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 2098 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 2099 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 2100 default: llvm_unreachable("Unsupported TEST operand type!"); 2101 } 2102 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 2103 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 2104 Reg.getValueType(), Reg, RC), 0); 2105 2106 // Extract the h-register. 2107 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl, 2108 MVT::i8, Reg); 2109 2110 // Emit a testb. No special NOREX tricks are needed since there's 2111 // only one GPR operand! 2112 return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 2113 Subreg, ShiftedImm); 2114 } 2115 2116 // For example, "testl %eax, $32776" to "testw %ax, $32776". 2117 if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 2118 N0.getValueType() != MVT::i16 && 2119 (!(C->getZExtValue() & 0x8000) || 2120 HasNoSignedComparisonUses(Node))) { 2121 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 2122 SDValue Reg = N0.getNode()->getOperand(0); 2123 2124 // Extract the 16-bit subregister. 2125 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl, 2126 MVT::i16, Reg); 2127 2128 // Emit a testw. 2129 return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); 2130 } 2131 2132 // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 2133 if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 2134 N0.getValueType() == MVT::i64 && 2135 (!(C->getZExtValue() & 0x80000000) || 2136 HasNoSignedComparisonUses(Node))) { 2137 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 2138 SDValue Reg = N0.getNode()->getOperand(0); 2139 2140 // Extract the 32-bit subregister. 2141 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl, 2142 MVT::i32, Reg); 2143 2144 // Emit a testl. 2145 return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); 2146 } 2147 } 2148 break; 2149 } 2150 } 2151 2152 SDNode *ResNode = SelectCode(Node); 2153 2154#ifndef NDEBUG 2155 DEBUG({ 2156 dbgs() << std::string(Indent-2, ' ') << "=> "; 2157 if (ResNode == NULL || ResNode == Node) 2158 Node->dump(CurDAG); 2159 else 2160 ResNode->dump(CurDAG); 2161 dbgs() << '\n'; 2162 }); 2163 Indent -= 2; 2164#endif 2165 2166 return ResNode; 2167} 2168 2169bool X86DAGToDAGISel:: 2170SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 2171 std::vector<SDValue> &OutOps) { 2172 SDValue Op0, Op1, Op2, Op3, Op4; 2173 switch (ConstraintCode) { 2174 case 'o': // offsetable ?? 2175 case 'v': // not offsetable ?? 2176 default: return true; 2177 case 'm': // memory 2178 if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) 2179 return true; 2180 break; 2181 } 2182 2183 OutOps.push_back(Op0); 2184 OutOps.push_back(Op1); 2185 OutOps.push_back(Op2); 2186 OutOps.push_back(Op3); 2187 OutOps.push_back(Op4); 2188 return false; 2189} 2190 2191/// createX86ISelDag - This pass converts a legalized DAG into a 2192/// X86-specific DAG, ready for instruction scheduling. 2193/// 2194FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 2195 llvm::CodeGenOpt::Level OptLevel) { 2196 return new X86DAGToDAGISel(TM, OptLevel); 2197} 2198