X86ISelDAGToDAG.cpp revision 7571eb5015ad07d4b849cd97a5f820be19523a66
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/ErrorHandling.h" 39#include "llvm/Support/MathExtras.h" 40#include "llvm/Support/Streams.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/ADT/SmallPtrSet.h" 43#include "llvm/ADT/Statistic.h" 44using namespace llvm; 45 46#include "llvm/Support/CommandLine.h" 47static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); 48 49STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 50 51//===----------------------------------------------------------------------===// 52// Pattern Matcher Implementation 53//===----------------------------------------------------------------------===// 54 55namespace { 56 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 57 /// SDValue's instead of register numbers for the leaves of the matched 58 /// tree. 59 struct X86ISelAddressMode { 60 enum { 61 RegBase, 62 FrameIndexBase 63 } BaseType; 64 65 struct { // This is really a union, discriminated by BaseType! 66 SDValue Reg; 67 int FrameIndex; 68 } Base; 69 70 unsigned Scale; 71 SDValue IndexReg; 72 int32_t Disp; 73 SDValue Segment; 74 GlobalValue *GV; 75 Constant *CP; 76 const char *ES; 77 int JT; 78 unsigned Align; // CP alignment. 79 unsigned char SymbolFlags; // X86II::MO_* 80 81 X86ISelAddressMode() 82 : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), 83 Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) { 84 } 85 86 bool hasSymbolicDisplacement() const { 87 return GV != 0 || CP != 0 || ES != 0 || JT != -1; 88 } 89 90 bool hasBaseOrIndexReg() const { 91 return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; 92 } 93 94 /// isRIPRelative - Return true if this addressing mode is already RIP 95 /// relative. 96 bool isRIPRelative() const { 97 if (BaseType != RegBase) return false; 98 if (RegisterSDNode *RegNode = 99 dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) 100 return RegNode->getReg() == X86::RIP; 101 return false; 102 } 103 104 void setBaseReg(SDValue Reg) { 105 BaseType = RegBase; 106 Base.Reg = Reg; 107 } 108 109 void dump() { 110 cerr << "X86ISelAddressMode " << this << "\n"; 111 cerr << "Base.Reg "; 112 if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 113 else cerr << "nul"; 114 cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; 115 cerr << " Scale" << Scale << "\n"; 116 cerr << "IndexReg "; 117 if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); 118 else cerr << "nul"; 119 cerr << " Disp " << Disp << "\n"; 120 cerr << "GV "; if (GV) GV->dump(); 121 else cerr << "nul"; 122 cerr << " CP "; if (CP) CP->dump(); 123 else cerr << "nul"; 124 cerr << "\n"; 125 cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; 126 cerr << " JT" << JT << " Align" << Align << "\n"; 127 } 128 }; 129} 130 131namespace { 132 //===--------------------------------------------------------------------===// 133 /// ISel - X86 specific code to select X86 machine instructions for 134 /// SelectionDAG operations. 135 /// 136 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 137 /// X86Lowering - This object fully describes how to lower LLVM code to an 138 /// X86-specific SelectionDAG. 139 X86TargetLowering &X86Lowering; 140 141 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 142 /// make the right decision when generating code for different targets. 143 const X86Subtarget *Subtarget; 144 145 /// OptForSize - If true, selector should try to optimize for code size 146 /// instead of performance. 147 bool OptForSize; 148 149 public: 150 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 151 : SelectionDAGISel(tm, OptLevel), 152 X86Lowering(*tm.getTargetLowering()), 153 Subtarget(&tm.getSubtarget<X86Subtarget>()), 154 OptForSize(false) {} 155 156 virtual const char *getPassName() const { 157 return "X86 DAG->DAG Instruction Selection"; 158 } 159 160 /// InstructionSelect - This callback is invoked by 161 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 162 virtual void InstructionSelect(); 163 164 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 165 166 virtual 167 bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; 168 169// Include the pieces autogenerated from the target description. 170#include "X86GenDAGISel.inc" 171 172 private: 173 SDNode *Select(SDValue N); 174 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 175 SDNode *SelectAtomicLoadAdd(SDNode *Node, MVT NVT); 176 177 bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 178 bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 179 bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 180 bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 181 bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 182 unsigned Depth); 183 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 184 bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 185 SDValue &Scale, SDValue &Index, SDValue &Disp, 186 SDValue &Segment); 187 bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 188 SDValue &Scale, SDValue &Index, SDValue &Disp); 189 bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 190 SDValue &Scale, SDValue &Index, SDValue &Disp); 191 bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 192 SDValue N, SDValue &Base, SDValue &Scale, 193 SDValue &Index, SDValue &Disp, 194 SDValue &Segment, 195 SDValue &InChain, SDValue &OutChain); 196 bool TryFoldLoad(SDValue P, SDValue N, 197 SDValue &Base, SDValue &Scale, 198 SDValue &Index, SDValue &Disp, 199 SDValue &Segment); 200 void PreprocessForRMW(); 201 void PreprocessForFPConvert(); 202 203 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 204 /// inline asm expressions. 205 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 206 char ConstraintCode, 207 std::vector<SDValue> &OutOps); 208 209 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 210 211 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 212 SDValue &Scale, SDValue &Index, 213 SDValue &Disp, SDValue &Segment) { 214 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 215 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 216 AM.Base.Reg; 217 Scale = getI8Imm(AM.Scale); 218 Index = AM.IndexReg; 219 // These are 32-bit even in 64-bit mode since RIP relative offset 220 // is 32-bit. 221 if (AM.GV) 222 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, 223 AM.SymbolFlags); 224 else if (AM.CP) 225 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 226 AM.Align, AM.Disp, AM.SymbolFlags); 227 else if (AM.ES) 228 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 229 else if (AM.JT != -1) 230 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 231 else 232 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 233 234 if (AM.Segment.getNode()) 235 Segment = AM.Segment; 236 else 237 Segment = CurDAG->getRegister(0, MVT::i32); 238 } 239 240 /// getI8Imm - Return a target constant with the specified value, of type 241 /// i8. 242 inline SDValue getI8Imm(unsigned Imm) { 243 return CurDAG->getTargetConstant(Imm, MVT::i8); 244 } 245 246 /// getI16Imm - Return a target constant with the specified value, of type 247 /// i16. 248 inline SDValue getI16Imm(unsigned Imm) { 249 return CurDAG->getTargetConstant(Imm, MVT::i16); 250 } 251 252 /// getI32Imm - Return a target constant with the specified value, of type 253 /// i32. 254 inline SDValue getI32Imm(unsigned Imm) { 255 return CurDAG->getTargetConstant(Imm, MVT::i32); 256 } 257 258 /// getGlobalBaseReg - Return an SDNode that returns the value of 259 /// the global base register. Output instructions required to 260 /// initialize the global base register, if necessary. 261 /// 262 SDNode *getGlobalBaseReg(); 263 264 /// getTargetMachine - Return a reference to the TargetMachine, casted 265 /// to the target-specific type. 266 const X86TargetMachine &getTargetMachine() { 267 return static_cast<const X86TargetMachine &>(TM); 268 } 269 270 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 271 /// to the target-specific type. 272 const X86InstrInfo *getInstrInfo() { 273 return getTargetMachine().getInstrInfo(); 274 } 275 276#ifndef NDEBUG 277 unsigned Indent; 278#endif 279 }; 280} 281 282 283bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, 284 SDNode *Root) const { 285 if (OptLevel == CodeGenOpt::None) return false; 286 287 if (U == Root) 288 switch (U->getOpcode()) { 289 default: break; 290 case ISD::ADD: 291 case ISD::ADDC: 292 case ISD::ADDE: 293 case ISD::AND: 294 case ISD::OR: 295 case ISD::XOR: { 296 SDValue Op1 = U->getOperand(1); 297 298 // If the other operand is a 8-bit immediate we should fold the immediate 299 // instead. This reduces code size. 300 // e.g. 301 // movl 4(%esp), %eax 302 // addl $4, %eax 303 // vs. 304 // movl $4, %eax 305 // addl 4(%esp), %eax 306 // The former is 2 bytes shorter. In case where the increment is 1, then 307 // the saving can be 4 bytes (by using incl %eax). 308 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 309 if (Imm->getAPIntValue().isSignedIntN(8)) 310 return false; 311 312 // If the other operand is a TLS address, we should fold it instead. 313 // This produces 314 // movl %gs:0, %eax 315 // leal i@NTPOFF(%eax), %eax 316 // instead of 317 // movl $i@NTPOFF, %eax 318 // addl %gs:0, %eax 319 // if the block also has an access to a second TLS address this will save 320 // a load. 321 // FIXME: This is probably also true for non TLS addresses. 322 if (Op1.getOpcode() == X86ISD::Wrapper) { 323 SDValue Val = Op1.getOperand(0); 324 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 325 return false; 326 } 327 } 328 } 329 330 // Proceed to 'generic' cycle finder code 331 return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); 332} 333 334/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 335/// and move load below the TokenFactor. Replace store's chain operand with 336/// load's chain result. 337static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 338 SDValue Store, SDValue TF) { 339 SmallVector<SDValue, 4> Ops; 340 for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 341 if (Load.getNode() == TF.getOperand(i).getNode()) 342 Ops.push_back(Load.getOperand(0)); 343 else 344 Ops.push_back(TF.getOperand(i)); 345 CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 346 CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 347 CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 348 Store.getOperand(2), Store.getOperand(3)); 349} 350 351/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. 352/// 353static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 354 SDValue &Load) { 355 if (N.getOpcode() == ISD::BIT_CONVERT) 356 N = N.getOperand(0); 357 358 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 359 if (!LD || LD->isVolatile()) 360 return false; 361 if (LD->getAddressingMode() != ISD::UNINDEXED) 362 return false; 363 364 ISD::LoadExtType ExtType = LD->getExtensionType(); 365 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 366 return false; 367 368 if (N.hasOneUse() && 369 N.getOperand(1) == Address && 370 N.getNode()->isOperandOf(Chain.getNode())) { 371 Load = N; 372 return true; 373 } 374 return false; 375} 376 377/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 378/// operand and move load below the call's chain operand. 379static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 380 SDValue Call, SDValue CallSeqStart) { 381 SmallVector<SDValue, 8> Ops; 382 SDValue Chain = CallSeqStart.getOperand(0); 383 if (Chain.getNode() == Load.getNode()) 384 Ops.push_back(Load.getOperand(0)); 385 else { 386 assert(Chain.getOpcode() == ISD::TokenFactor && 387 "Unexpected CallSeqStart chain operand"); 388 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 389 if (Chain.getOperand(i).getNode() == Load.getNode()) 390 Ops.push_back(Load.getOperand(0)); 391 else 392 Ops.push_back(Chain.getOperand(i)); 393 SDValue NewChain = 394 CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 395 MVT::Other, &Ops[0], Ops.size()); 396 Ops.clear(); 397 Ops.push_back(NewChain); 398 } 399 for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) 400 Ops.push_back(CallSeqStart.getOperand(i)); 401 CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); 402 CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 403 Load.getOperand(1), Load.getOperand(2)); 404 Ops.clear(); 405 Ops.push_back(SDValue(Load.getNode(), 1)); 406 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 407 Ops.push_back(Call.getOperand(i)); 408 CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 409} 410 411/// isCalleeLoad - Return true if call address is a load and it can be 412/// moved below CALLSEQ_START and the chains leading up to the call. 413/// Return the CALLSEQ_START by reference as a second output. 414static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 415 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 416 return false; 417 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 418 if (!LD || 419 LD->isVolatile() || 420 LD->getAddressingMode() != ISD::UNINDEXED || 421 LD->getExtensionType() != ISD::NON_EXTLOAD) 422 return false; 423 424 // Now let's find the callseq_start. 425 while (Chain.getOpcode() != ISD::CALLSEQ_START) { 426 if (!Chain.hasOneUse()) 427 return false; 428 Chain = Chain.getOperand(0); 429 } 430 431 if (Chain.getOperand(0).getNode() == Callee.getNode()) 432 return true; 433 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 434 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode())) 435 return true; 436 return false; 437} 438 439 440/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 441/// This is only run if not in -O0 mode. 442/// This allows the instruction selector to pick more read-modify-write 443/// instructions. This is a common case: 444/// 445/// [Load chain] 446/// ^ 447/// | 448/// [Load] 449/// ^ ^ 450/// | | 451/// / \- 452/// / | 453/// [TokenFactor] [Op] 454/// ^ ^ 455/// | | 456/// \ / 457/// \ / 458/// [Store] 459/// 460/// The fact the store's chain operand != load's chain will prevent the 461/// (store (op (load))) instruction from being selected. We can transform it to: 462/// 463/// [Load chain] 464/// ^ 465/// | 466/// [TokenFactor] 467/// ^ 468/// | 469/// [Load] 470/// ^ ^ 471/// | | 472/// | \- 473/// | | 474/// | [Op] 475/// | ^ 476/// | | 477/// \ / 478/// \ / 479/// [Store] 480void X86DAGToDAGISel::PreprocessForRMW() { 481 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 482 E = CurDAG->allnodes_end(); I != E; ++I) { 483 if (I->getOpcode() == X86ISD::CALL) { 484 /// Also try moving call address load from outside callseq_start to just 485 /// before the call to allow it to be folded. 486 /// 487 /// [Load chain] 488 /// ^ 489 /// | 490 /// [Load] 491 /// ^ ^ 492 /// | | 493 /// / \-- 494 /// / | 495 ///[CALLSEQ_START] | 496 /// ^ | 497 /// | | 498 /// [LOAD/C2Reg] | 499 /// | | 500 /// \ / 501 /// \ / 502 /// [CALL] 503 SDValue Chain = I->getOperand(0); 504 SDValue Load = I->getOperand(1); 505 if (!isCalleeLoad(Load, Chain)) 506 continue; 507 MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 508 ++NumLoadMoved; 509 continue; 510 } 511 512 if (!ISD::isNON_TRUNCStore(I)) 513 continue; 514 SDValue Chain = I->getOperand(0); 515 516 if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 517 continue; 518 519 SDValue N1 = I->getOperand(1); 520 SDValue N2 = I->getOperand(2); 521 if ((N1.getValueType().isFloatingPoint() && 522 !N1.getValueType().isVector()) || 523 !N1.hasOneUse()) 524 continue; 525 526 bool RModW = false; 527 SDValue Load; 528 unsigned Opcode = N1.getNode()->getOpcode(); 529 switch (Opcode) { 530 case ISD::ADD: 531 case ISD::MUL: 532 case ISD::AND: 533 case ISD::OR: 534 case ISD::XOR: 535 case ISD::ADDC: 536 case ISD::ADDE: 537 case ISD::VECTOR_SHUFFLE: { 538 SDValue N10 = N1.getOperand(0); 539 SDValue N11 = N1.getOperand(1); 540 RModW = isRMWLoad(N10, Chain, N2, Load); 541 if (!RModW) 542 RModW = isRMWLoad(N11, Chain, N2, Load); 543 break; 544 } 545 case ISD::SUB: 546 case ISD::SHL: 547 case ISD::SRA: 548 case ISD::SRL: 549 case ISD::ROTL: 550 case ISD::ROTR: 551 case ISD::SUBC: 552 case ISD::SUBE: 553 case X86ISD::SHLD: 554 case X86ISD::SHRD: { 555 SDValue N10 = N1.getOperand(0); 556 RModW = isRMWLoad(N10, Chain, N2, Load); 557 break; 558 } 559 } 560 561 if (RModW) { 562 MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 563 ++NumLoadMoved; 564 } 565 } 566} 567 568 569/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 570/// nodes that target the FP stack to be store and load to the stack. This is a 571/// gross hack. We would like to simply mark these as being illegal, but when 572/// we do that, legalize produces these when it expands calls, then expands 573/// these in the same legalize pass. We would like dag combine to be able to 574/// hack on these between the call expansion and the node legalization. As such 575/// this pass basically does "really late" legalization of these inline with the 576/// X86 isel pass. 577void X86DAGToDAGISel::PreprocessForFPConvert() { 578 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 579 E = CurDAG->allnodes_end(); I != E; ) { 580 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 581 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 582 continue; 583 584 // If the source and destination are SSE registers, then this is a legal 585 // conversion that should not be lowered. 586 MVT SrcVT = N->getOperand(0).getValueType(); 587 MVT DstVT = N->getValueType(0); 588 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 589 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 590 if (SrcIsSSE && DstIsSSE) 591 continue; 592 593 if (!SrcIsSSE && !DstIsSSE) { 594 // If this is an FPStack extension, it is a noop. 595 if (N->getOpcode() == ISD::FP_EXTEND) 596 continue; 597 // If this is a value-preserving FPStack truncation, it is a noop. 598 if (N->getConstantOperandVal(1)) 599 continue; 600 } 601 602 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 603 // FPStack has extload and truncstore. SSE can fold direct loads into other 604 // operations. Based on this, decide what we want to do. 605 MVT MemVT; 606 if (N->getOpcode() == ISD::FP_ROUND) 607 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 608 else 609 MemVT = SrcIsSSE ? SrcVT : DstVT; 610 611 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 612 DebugLoc dl = N->getDebugLoc(); 613 614 // FIXME: optimize the case where the src/dest is a load or store? 615 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 616 N->getOperand(0), 617 MemTmp, NULL, 0, MemVT); 618 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 619 NULL, 0, MemVT); 620 621 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 622 // extload we created. This will cause general havok on the dag because 623 // anything below the conversion could be folded into other existing nodes. 624 // To avoid invalidating 'I', back it up to the convert node. 625 --I; 626 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 627 628 // Now that we did that, the node is dead. Increment the iterator to the 629 // next node to process, then delete N. 630 ++I; 631 CurDAG->DeleteNode(N); 632 } 633} 634 635/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 636/// when it has created a SelectionDAG for us to codegen. 637void X86DAGToDAGISel::InstructionSelect() { 638 const Function *F = MF->getFunction(); 639 OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 640 641 DEBUG(BB->dump()); 642 if (OptLevel != CodeGenOpt::None) 643 PreprocessForRMW(); 644 645 // FIXME: This should only happen when not compiled with -O0. 646 PreprocessForFPConvert(); 647 648 // Codegen the basic block. 649#ifndef NDEBUG 650 DOUT << "===== Instruction selection begins:\n"; 651 Indent = 0; 652#endif 653 SelectRoot(*CurDAG); 654#ifndef NDEBUG 655 DOUT << "===== Instruction selection ends:\n"; 656#endif 657 658 CurDAG->RemoveDeadNodes(); 659} 660 661/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 662/// the main function. 663void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 664 MachineFrameInfo *MFI) { 665 const TargetInstrInfo *TII = TM.getInstrInfo(); 666 if (Subtarget->isTargetCygMing()) 667 BuildMI(BB, DebugLoc::getUnknownLoc(), 668 TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 669} 670 671void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 672 // If this is main, emit special code for main. 673 MachineBasicBlock *BB = MF.begin(); 674 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 675 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 676} 677 678 679bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 680 X86ISelAddressMode &AM) { 681 assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 682 SDValue Segment = N.getOperand(0); 683 684 if (AM.Segment.getNode() == 0) { 685 AM.Segment = Segment; 686 return false; 687 } 688 689 return true; 690} 691 692bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 693 // This optimization is valid because the GNU TLS model defines that 694 // gs:0 (or fs:0 on X86-64) contains its own address. 695 // For more information see http://people.redhat.com/drepper/tls.pdf 696 697 SDValue Address = N.getOperand(1); 698 if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 699 !MatchSegmentBaseAddress (Address, AM)) 700 return false; 701 702 return true; 703} 704 705/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 706/// into an addressing mode. These wrap things that will resolve down into a 707/// symbol reference. If no match is possible, this returns true, otherwise it 708/// returns false. 709bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 710 // If the addressing mode already has a symbol as the displacement, we can 711 // never match another symbol. 712 if (AM.hasSymbolicDisplacement()) 713 return true; 714 715 SDValue N0 = N.getOperand(0); 716 717 // Handle X86-64 rip-relative addresses. We check this before checking direct 718 // folding because RIP is preferable to non-RIP accesses. 719 if (Subtarget->is64Bit() && 720 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 721 // they cannot be folded into immediate fields. 722 // FIXME: This can be improved for kernel and other models? 723 TM.getCodeModel() == CodeModel::Small && 724 725 // Base and index reg must be 0 in order to use %rip as base and lowering 726 // must allow RIP. 727 !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 728 729 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 730 int64_t Offset = AM.Disp + G->getOffset(); 731 if (!isInt32(Offset)) return true; 732 AM.GV = G->getGlobal(); 733 AM.Disp = Offset; 734 AM.SymbolFlags = G->getTargetFlags(); 735 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 736 int64_t Offset = AM.Disp + CP->getOffset(); 737 if (!isInt32(Offset)) return true; 738 AM.CP = CP->getConstVal(); 739 AM.Align = CP->getAlignment(); 740 AM.Disp = Offset; 741 AM.SymbolFlags = CP->getTargetFlags(); 742 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 743 AM.ES = S->getSymbol(); 744 AM.SymbolFlags = S->getTargetFlags(); 745 } else { 746 JumpTableSDNode *J = cast<JumpTableSDNode>(N0); 747 AM.JT = J->getIndex(); 748 AM.SymbolFlags = J->getTargetFlags(); 749 } 750 751 if (N.getOpcode() == X86ISD::WrapperRIP) 752 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 753 return false; 754 } 755 756 // Handle the case when globals fit in our immediate field: This is true for 757 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 758 // mode, this results in a non-RIP-relative computation. 759 if (!Subtarget->is64Bit() || 760 (TM.getCodeModel() == CodeModel::Small && 761 TM.getRelocationModel() == Reloc::Static)) { 762 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 763 AM.GV = G->getGlobal(); 764 AM.Disp += G->getOffset(); 765 AM.SymbolFlags = G->getTargetFlags(); 766 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 767 AM.CP = CP->getConstVal(); 768 AM.Align = CP->getAlignment(); 769 AM.Disp += CP->getOffset(); 770 AM.SymbolFlags = CP->getTargetFlags(); 771 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 772 AM.ES = S->getSymbol(); 773 AM.SymbolFlags = S->getTargetFlags(); 774 } else { 775 JumpTableSDNode *J = cast<JumpTableSDNode>(N0); 776 AM.JT = J->getIndex(); 777 AM.SymbolFlags = J->getTargetFlags(); 778 } 779 return false; 780 } 781 782 return true; 783} 784 785/// MatchAddress - Add the specified node to the specified addressing mode, 786/// returning true if it cannot be done. This just pattern matches for the 787/// addressing mode. 788bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 789 if (MatchAddressRecursively(N, AM, 0)) 790 return true; 791 792 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 793 // a smaller encoding and avoids a scaled-index. 794 if (AM.Scale == 2 && 795 AM.BaseType == X86ISelAddressMode::RegBase && 796 AM.Base.Reg.getNode() == 0) { 797 AM.Base.Reg = AM.IndexReg; 798 AM.Scale = 1; 799 } 800 801 return false; 802} 803 804bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 805 unsigned Depth) { 806 bool is64Bit = Subtarget->is64Bit(); 807 DebugLoc dl = N.getDebugLoc(); 808 DOUT << "MatchAddress: "; DEBUG(AM.dump()); 809 // Limit recursion. 810 if (Depth > 5) 811 return MatchAddressBase(N, AM); 812 813 // If this is already a %rip relative address, we can only merge immediates 814 // into it. Instead of handling this in every case, we handle it here. 815 // RIP relative addressing: %rip + 32-bit displacement! 816 if (AM.isRIPRelative()) { 817 // FIXME: JumpTable and ExternalSymbol address currently don't like 818 // displacements. It isn't very important, but this should be fixed for 819 // consistency. 820 if (!AM.ES && AM.JT != -1) return true; 821 822 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 823 int64_t Val = AM.Disp + Cst->getSExtValue(); 824 if (isInt32(Val)) { 825 AM.Disp = Val; 826 return false; 827 } 828 } 829 return true; 830 } 831 832 switch (N.getOpcode()) { 833 default: break; 834 case ISD::Constant: { 835 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 836 if (!is64Bit || isInt32(AM.Disp + Val)) { 837 AM.Disp += Val; 838 return false; 839 } 840 break; 841 } 842 843 case X86ISD::SegmentBaseAddress: 844 if (!MatchSegmentBaseAddress(N, AM)) 845 return false; 846 break; 847 848 case X86ISD::Wrapper: 849 case X86ISD::WrapperRIP: 850 if (!MatchWrapper(N, AM)) 851 return false; 852 break; 853 854 case ISD::LOAD: 855 if (!MatchLoad(N, AM)) 856 return false; 857 break; 858 859 case ISD::FrameIndex: 860 if (AM.BaseType == X86ISelAddressMode::RegBase 861 && AM.Base.Reg.getNode() == 0) { 862 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 863 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 864 return false; 865 } 866 break; 867 868 case ISD::SHL: 869 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 870 break; 871 872 if (ConstantSDNode 873 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 874 unsigned Val = CN->getZExtValue(); 875 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 876 // that the base operand remains free for further matching. If 877 // the base doesn't end up getting used, a post-processing step 878 // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 879 if (Val == 1 || Val == 2 || Val == 3) { 880 AM.Scale = 1 << Val; 881 SDValue ShVal = N.getNode()->getOperand(0); 882 883 // Okay, we know that we have a scale by now. However, if the scaled 884 // value is an add of something and a constant, we can fold the 885 // constant into the disp field here. 886 if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 887 isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 888 AM.IndexReg = ShVal.getNode()->getOperand(0); 889 ConstantSDNode *AddVal = 890 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 891 uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 892 if (!is64Bit || isInt32(Disp)) 893 AM.Disp = Disp; 894 else 895 AM.IndexReg = ShVal; 896 } else { 897 AM.IndexReg = ShVal; 898 } 899 return false; 900 } 901 break; 902 } 903 904 case ISD::SMUL_LOHI: 905 case ISD::UMUL_LOHI: 906 // A mul_lohi where we need the low part can be folded as a plain multiply. 907 if (N.getResNo() != 0) break; 908 // FALL THROUGH 909 case ISD::MUL: 910 case X86ISD::MUL_IMM: 911 // X*[3,5,9] -> X+X*[2,4,8] 912 if (AM.BaseType == X86ISelAddressMode::RegBase && 913 AM.Base.Reg.getNode() == 0 && 914 AM.IndexReg.getNode() == 0) { 915 if (ConstantSDNode 916 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 917 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 918 CN->getZExtValue() == 9) { 919 AM.Scale = unsigned(CN->getZExtValue())-1; 920 921 SDValue MulVal = N.getNode()->getOperand(0); 922 SDValue Reg; 923 924 // Okay, we know that we have a scale by now. However, if the scaled 925 // value is an add of something and a constant, we can fold the 926 // constant into the disp field here. 927 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 928 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 929 Reg = MulVal.getNode()->getOperand(0); 930 ConstantSDNode *AddVal = 931 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 932 uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 933 CN->getZExtValue(); 934 if (!is64Bit || isInt32(Disp)) 935 AM.Disp = Disp; 936 else 937 Reg = N.getNode()->getOperand(0); 938 } else { 939 Reg = N.getNode()->getOperand(0); 940 } 941 942 AM.IndexReg = AM.Base.Reg = Reg; 943 return false; 944 } 945 } 946 break; 947 948 case ISD::SUB: { 949 // Given A-B, if A can be completely folded into the address and 950 // the index field with the index field unused, use -B as the index. 951 // This is a win if a has multiple parts that can be folded into 952 // the address. Also, this saves a mov if the base register has 953 // other uses, since it avoids a two-address sub instruction, however 954 // it costs an additional mov if the index register has other uses. 955 956 // Test if the LHS of the sub can be folded. 957 X86ISelAddressMode Backup = AM; 958 if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 959 AM = Backup; 960 break; 961 } 962 // Test if the index field is free for use. 963 if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 964 AM = Backup; 965 break; 966 } 967 int Cost = 0; 968 SDValue RHS = N.getNode()->getOperand(1); 969 // If the RHS involves a register with multiple uses, this 970 // transformation incurs an extra mov, due to the neg instruction 971 // clobbering its operand. 972 if (!RHS.getNode()->hasOneUse() || 973 RHS.getNode()->getOpcode() == ISD::CopyFromReg || 974 RHS.getNode()->getOpcode() == ISD::TRUNCATE || 975 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 976 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 977 RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 978 ++Cost; 979 // If the base is a register with multiple uses, this 980 // transformation may save a mov. 981 if ((AM.BaseType == X86ISelAddressMode::RegBase && 982 AM.Base.Reg.getNode() && 983 !AM.Base.Reg.getNode()->hasOneUse()) || 984 AM.BaseType == X86ISelAddressMode::FrameIndexBase) 985 --Cost; 986 // If the folded LHS was interesting, this transformation saves 987 // address arithmetic. 988 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 989 ((AM.Disp != 0) && (Backup.Disp == 0)) + 990 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 991 --Cost; 992 // If it doesn't look like it may be an overall win, don't do it. 993 if (Cost >= 0) { 994 AM = Backup; 995 break; 996 } 997 998 // Ok, the transformation is legal and appears profitable. Go for it. 999 SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 1000 SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 1001 AM.IndexReg = Neg; 1002 AM.Scale = 1; 1003 1004 // Insert the new nodes into the topological ordering. 1005 if (Zero.getNode()->getNodeId() == -1 || 1006 Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1007 CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 1008 Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 1009 } 1010 if (Neg.getNode()->getNodeId() == -1 || 1011 Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1012 CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 1013 Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 1014 } 1015 return false; 1016 } 1017 1018 case ISD::ADD: { 1019 X86ISelAddressMode Backup = AM; 1020 if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && 1021 !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) 1022 return false; 1023 AM = Backup; 1024 if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && 1025 !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) 1026 return false; 1027 AM = Backup; 1028 1029 // If we couldn't fold both operands into the address at the same time, 1030 // see if we can just put each operand into a register and fold at least 1031 // the add. 1032 if (AM.BaseType == X86ISelAddressMode::RegBase && 1033 !AM.Base.Reg.getNode() && 1034 !AM.IndexReg.getNode()) { 1035 AM.Base.Reg = N.getNode()->getOperand(0); 1036 AM.IndexReg = N.getNode()->getOperand(1); 1037 AM.Scale = 1; 1038 return false; 1039 } 1040 break; 1041 } 1042 1043 case ISD::OR: 1044 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1045 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1046 X86ISelAddressMode Backup = AM; 1047 uint64_t Offset = CN->getSExtValue(); 1048 // Start with the LHS as an addr mode. 1049 if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 1050 // Address could not have picked a GV address for the displacement. 1051 AM.GV == NULL && 1052 // On x86-64, the resultant disp must fit in 32-bits. 1053 (!is64Bit || isInt32(AM.Disp + Offset)) && 1054 // Check to see if the LHS & C is zero. 1055 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 1056 AM.Disp += Offset; 1057 return false; 1058 } 1059 AM = Backup; 1060 } 1061 break; 1062 1063 case ISD::AND: { 1064 // Perform some heroic transforms on an and of a constant-count shift 1065 // with a constant to enable use of the scaled offset field. 1066 1067 SDValue Shift = N.getOperand(0); 1068 if (Shift.getNumOperands() != 2) break; 1069 1070 // Scale must not be used already. 1071 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1072 1073 SDValue X = Shift.getOperand(0); 1074 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1075 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1076 if (!C1 || !C2) break; 1077 1078 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1079 // allows us to convert the shift and and into an h-register extract and 1080 // a scaled index. 1081 if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1082 unsigned ScaleLog = 8 - C1->getZExtValue(); 1083 if (ScaleLog > 0 && ScaleLog < 4 && 1084 C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1085 SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1086 SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1087 SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1088 X, Eight); 1089 SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1090 Srl, Mask); 1091 SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1092 SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1093 And, ShlCount); 1094 1095 // Insert the new nodes into the topological ordering. 1096 if (Eight.getNode()->getNodeId() == -1 || 1097 Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1098 CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1099 Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1100 } 1101 if (Mask.getNode()->getNodeId() == -1 || 1102 Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1103 CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1104 Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1105 } 1106 if (Srl.getNode()->getNodeId() == -1 || 1107 Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1108 CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1109 Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1110 } 1111 if (And.getNode()->getNodeId() == -1 || 1112 And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1113 CurDAG->RepositionNode(N.getNode(), And.getNode()); 1114 And.getNode()->setNodeId(N.getNode()->getNodeId()); 1115 } 1116 if (ShlCount.getNode()->getNodeId() == -1 || 1117 ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1118 CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1119 ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1120 } 1121 if (Shl.getNode()->getNodeId() == -1 || 1122 Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1123 CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1124 Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1125 } 1126 CurDAG->ReplaceAllUsesWith(N, Shl); 1127 AM.IndexReg = And; 1128 AM.Scale = (1 << ScaleLog); 1129 return false; 1130 } 1131 } 1132 1133 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1134 // allows us to fold the shift into this addressing mode. 1135 if (Shift.getOpcode() != ISD::SHL) break; 1136 1137 // Not likely to be profitable if either the AND or SHIFT node has more 1138 // than one use (unless all uses are for address computation). Besides, 1139 // isel mechanism requires their node ids to be reused. 1140 if (!N.hasOneUse() || !Shift.hasOneUse()) 1141 break; 1142 1143 // Verify that the shift amount is something we can fold. 1144 unsigned ShiftCst = C1->getZExtValue(); 1145 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1146 break; 1147 1148 // Get the new AND mask, this folds to a constant. 1149 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1150 SDValue(C2, 0), SDValue(C1, 0)); 1151 SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1152 NewANDMask); 1153 SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1154 NewAND, SDValue(C1, 0)); 1155 1156 // Insert the new nodes into the topological ordering. 1157 if (C1->getNodeId() > X.getNode()->getNodeId()) { 1158 CurDAG->RepositionNode(X.getNode(), C1); 1159 C1->setNodeId(X.getNode()->getNodeId()); 1160 } 1161 if (NewANDMask.getNode()->getNodeId() == -1 || 1162 NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1163 CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1164 NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1165 } 1166 if (NewAND.getNode()->getNodeId() == -1 || 1167 NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1168 CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1169 NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1170 } 1171 if (NewSHIFT.getNode()->getNodeId() == -1 || 1172 NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1173 CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1174 NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1175 } 1176 1177 CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1178 1179 AM.Scale = 1 << ShiftCst; 1180 AM.IndexReg = NewAND; 1181 return false; 1182 } 1183 } 1184 1185 return MatchAddressBase(N, AM); 1186} 1187 1188/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1189/// specified addressing mode without any further recursion. 1190bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1191 // Is the base register already occupied? 1192 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1193 // If so, check to see if the scale index register is set. 1194 if (AM.IndexReg.getNode() == 0) { 1195 AM.IndexReg = N; 1196 AM.Scale = 1; 1197 return false; 1198 } 1199 1200 // Otherwise, we cannot select it. 1201 return true; 1202 } 1203 1204 // Default, generate it as a register. 1205 AM.BaseType = X86ISelAddressMode::RegBase; 1206 AM.Base.Reg = N; 1207 return false; 1208} 1209 1210/// SelectAddr - returns true if it is able pattern match an addressing mode. 1211/// It returns the operands which make up the maximal addressing mode it can 1212/// match by reference. 1213bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, 1214 SDValue &Scale, SDValue &Index, 1215 SDValue &Disp, SDValue &Segment) { 1216 X86ISelAddressMode AM; 1217 bool Done = false; 1218 if (AvoidDupAddrCompute && !N.hasOneUse()) { 1219 unsigned Opcode = N.getOpcode(); 1220 if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && 1221 Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { 1222 // If we are able to fold N into addressing mode, then we'll allow it even 1223 // if N has multiple uses. In general, addressing computation is used as 1224 // addresses by all of its uses. But watch out for CopyToReg uses, that 1225 // means the address computation is liveout. It will be computed by a LEA 1226 // so we want to avoid computing the address twice. 1227 for (SDNode::use_iterator UI = N.getNode()->use_begin(), 1228 UE = N.getNode()->use_end(); UI != UE; ++UI) { 1229 if (UI->getOpcode() == ISD::CopyToReg) { 1230 MatchAddressBase(N, AM); 1231 Done = true; 1232 break; 1233 } 1234 } 1235 } 1236 } 1237 1238 if (!Done && MatchAddress(N, AM)) 1239 return false; 1240 1241 MVT VT = N.getValueType(); 1242 if (AM.BaseType == X86ISelAddressMode::RegBase) { 1243 if (!AM.Base.Reg.getNode()) 1244 AM.Base.Reg = CurDAG->getRegister(0, VT); 1245 } 1246 1247 if (!AM.IndexReg.getNode()) 1248 AM.IndexReg = CurDAG->getRegister(0, VT); 1249 1250 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1251 return true; 1252} 1253 1254/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1255/// match a load whose top elements are either undef or zeros. The load flavor 1256/// is derived from the type of N, which is either v4f32 or v2f64. 1257bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, 1258 SDValue N, SDValue &Base, 1259 SDValue &Scale, SDValue &Index, 1260 SDValue &Disp, SDValue &Segment, 1261 SDValue &InChain, 1262 SDValue &OutChain) { 1263 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1264 InChain = N.getOperand(0).getValue(1); 1265 if (ISD::isNON_EXTLoad(InChain.getNode()) && 1266 InChain.getValue(0).hasOneUse() && 1267 N.hasOneUse() && 1268 IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { 1269 LoadSDNode *LD = cast<LoadSDNode>(InChain); 1270 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1271 return false; 1272 OutChain = LD->getChain(); 1273 return true; 1274 } 1275 } 1276 1277 // Also handle the case where we explicitly require zeros in the top 1278 // elements. This is a vector shuffle from the zero vector. 1279 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1280 // Check to see if the top elements are all zeros (or bitcast of zeros). 1281 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1282 N.getOperand(0).getNode()->hasOneUse() && 1283 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1284 N.getOperand(0).getOperand(0).hasOneUse()) { 1285 // Okay, this is a zero extending load. Fold it. 1286 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1287 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1288 return false; 1289 OutChain = LD->getChain(); 1290 InChain = SDValue(LD, 1); 1291 return true; 1292 } 1293 return false; 1294} 1295 1296 1297/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1298/// mode it matches can be cost effectively emitted as an LEA instruction. 1299bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, 1300 SDValue &Base, SDValue &Scale, 1301 SDValue &Index, SDValue &Disp) { 1302 X86ISelAddressMode AM; 1303 1304 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1305 // segments. 1306 SDValue Copy = AM.Segment; 1307 SDValue T = CurDAG->getRegister(0, MVT::i32); 1308 AM.Segment = T; 1309 if (MatchAddress(N, AM)) 1310 return false; 1311 assert (T == AM.Segment); 1312 AM.Segment = Copy; 1313 1314 MVT VT = N.getValueType(); 1315 unsigned Complexity = 0; 1316 if (AM.BaseType == X86ISelAddressMode::RegBase) 1317 if (AM.Base.Reg.getNode()) 1318 Complexity = 1; 1319 else 1320 AM.Base.Reg = CurDAG->getRegister(0, VT); 1321 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1322 Complexity = 4; 1323 1324 if (AM.IndexReg.getNode()) 1325 Complexity++; 1326 else 1327 AM.IndexReg = CurDAG->getRegister(0, VT); 1328 1329 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1330 // a simple shift. 1331 if (AM.Scale > 1) 1332 Complexity++; 1333 1334 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1335 // to a LEA. This is determined with some expermentation but is by no means 1336 // optimal (especially for code size consideration). LEA is nice because of 1337 // its three-address nature. Tweak the cost function again when we can run 1338 // convertToThreeAddress() at register allocation time. 1339 if (AM.hasSymbolicDisplacement()) { 1340 // For X86-64, we should always use lea to materialize RIP relative 1341 // addresses. 1342 if (Subtarget->is64Bit()) 1343 Complexity = 4; 1344 else 1345 Complexity += 2; 1346 } 1347 1348 if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1349 Complexity++; 1350 1351 // If it isn't worth using an LEA, reject it. 1352 if (Complexity <= 2) 1353 return false; 1354 1355 SDValue Segment; 1356 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1357 return true; 1358} 1359 1360/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1361bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 1362 SDValue &Scale, SDValue &Index, 1363 SDValue &Disp) { 1364 assert(Op.getOpcode() == X86ISD::TLSADDR); 1365 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1366 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1367 1368 X86ISelAddressMode AM; 1369 AM.GV = GA->getGlobal(); 1370 AM.Disp += GA->getOffset(); 1371 AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); 1372 AM.SymbolFlags = GA->getTargetFlags(); 1373 1374 if (N.getValueType() == MVT::i32) { 1375 AM.Scale = 1; 1376 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1377 } else { 1378 AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1379 } 1380 1381 SDValue Segment; 1382 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1383 return true; 1384} 1385 1386 1387bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, 1388 SDValue &Base, SDValue &Scale, 1389 SDValue &Index, SDValue &Disp, 1390 SDValue &Segment) { 1391 if (ISD::isNON_EXTLoad(N.getNode()) && 1392 N.hasOneUse() && 1393 IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) 1394 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1395 return false; 1396} 1397 1398/// getGlobalBaseReg - Return an SDNode that returns the value of 1399/// the global base register. Output instructions required to 1400/// initialize the global base register, if necessary. 1401/// 1402SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1403 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1404 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1405} 1406 1407static SDNode *FindCallStartFromCall(SDNode *Node) { 1408 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1409 assert(Node->getOperand(0).getValueType() == MVT::Other && 1410 "Node doesn't have a token chain argument!"); 1411 return FindCallStartFromCall(Node->getOperand(0).getNode()); 1412} 1413 1414SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1415 SDValue Chain = Node->getOperand(0); 1416 SDValue In1 = Node->getOperand(1); 1417 SDValue In2L = Node->getOperand(2); 1418 SDValue In2H = Node->getOperand(3); 1419 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1420 if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1421 return NULL; 1422 SDValue LSI = Node->getOperand(4); // MemOperand 1423 const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain}; 1424 return CurDAG->getTargetNode(Opc, Node->getDebugLoc(), 1425 MVT::i32, MVT::i32, MVT::Other, Ops, 1426 array_lengthof(Ops)); 1427} 1428 1429SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, MVT NVT) { 1430 if (Node->hasAnyUseOfValue(0)) 1431 return 0; 1432 1433 // Optimize common patterns for __sync_add_and_fetch and 1434 // __sync_sub_and_fetch where the result is not used. This allows us 1435 // to use "lock" version of add, sub, inc, dec instructions. 1436 // FIXME: Do not use special instructions but instead add the "lock" 1437 // prefix to the target node somehow. The extra information will then be 1438 // transferred to machine instruction and it denotes the prefix. 1439 SDValue Chain = Node->getOperand(0); 1440 SDValue Ptr = Node->getOperand(1); 1441 SDValue Val = Node->getOperand(2); 1442 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1443 if (!SelectAddr(Ptr, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1444 return 0; 1445 1446 bool isInc = false, isDec = false, isSub = false, isCN = false; 1447 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); 1448 if (CN) { 1449 isCN = true; 1450 int64_t CNVal = CN->getSExtValue(); 1451 if (CNVal == 1) 1452 isInc = true; 1453 else if (CNVal == -1) 1454 isDec = true; 1455 else if (CNVal >= 0) 1456 Val = CurDAG->getTargetConstant(CNVal, NVT); 1457 else { 1458 isSub = true; 1459 Val = CurDAG->getTargetConstant(-CNVal, NVT); 1460 } 1461 } else if (Val.hasOneUse() && 1462 Val.getOpcode() == ISD::SUB && 1463 X86::isZeroNode(Val.getOperand(0))) { 1464 isSub = true; 1465 Val = Val.getOperand(1); 1466 } 1467 1468 unsigned Opc = 0; 1469 switch (NVT.getSimpleVT()) { 1470 default: return 0; 1471 case MVT::i8: 1472 if (isInc) 1473 Opc = X86::LOCK_INC8m; 1474 else if (isDec) 1475 Opc = X86::LOCK_DEC8m; 1476 else if (isSub) { 1477 if (isCN) 1478 Opc = X86::LOCK_SUB8mi; 1479 else 1480 Opc = X86::LOCK_SUB8mr; 1481 } else { 1482 if (isCN) 1483 Opc = X86::LOCK_ADD8mi; 1484 else 1485 Opc = X86::LOCK_ADD8mr; 1486 } 1487 break; 1488 case MVT::i16: 1489 if (isInc) 1490 Opc = X86::LOCK_INC16m; 1491 else if (isDec) 1492 Opc = X86::LOCK_DEC16m; 1493 else if (isSub) { 1494 if (isCN) { 1495 if (Predicate_i16immSExt8(Val.getNode())) 1496 Opc = X86::LOCK_SUB16mi8; 1497 else 1498 Opc = X86::LOCK_SUB16mi; 1499 } else 1500 Opc = X86::LOCK_SUB16mr; 1501 } else { 1502 if (isCN) { 1503 if (Predicate_i16immSExt8(Val.getNode())) 1504 Opc = X86::LOCK_ADD16mi8; 1505 else 1506 Opc = X86::LOCK_ADD16mi; 1507 } else 1508 Opc = X86::LOCK_ADD16mr; 1509 } 1510 break; 1511 case MVT::i32: 1512 if (isInc) 1513 Opc = X86::LOCK_INC32m; 1514 else if (isDec) 1515 Opc = X86::LOCK_DEC32m; 1516 else if (isSub) { 1517 if (isCN) { 1518 if (Predicate_i32immSExt8(Val.getNode())) 1519 Opc = X86::LOCK_SUB32mi8; 1520 else 1521 Opc = X86::LOCK_SUB32mi; 1522 } else 1523 Opc = X86::LOCK_SUB32mr; 1524 } else { 1525 if (isCN) { 1526 if (Predicate_i32immSExt8(Val.getNode())) 1527 Opc = X86::LOCK_ADD32mi8; 1528 else 1529 Opc = X86::LOCK_ADD32mi; 1530 } else 1531 Opc = X86::LOCK_ADD32mr; 1532 } 1533 break; 1534 case MVT::i64: 1535 if (isInc) 1536 Opc = X86::LOCK_INC64m; 1537 else if (isDec) 1538 Opc = X86::LOCK_DEC64m; 1539 else if (isSub) { 1540 Opc = X86::LOCK_SUB64mr; 1541 if (isCN) { 1542 if (Predicate_i64immSExt8(Val.getNode())) 1543 Opc = X86::LOCK_SUB64mi8; 1544 else if (Predicate_i64immSExt32(Val.getNode())) 1545 Opc = X86::LOCK_SUB64mi32; 1546 } 1547 } else { 1548 Opc = X86::LOCK_ADD64mr; 1549 if (isCN) { 1550 if (Predicate_i64immSExt8(Val.getNode())) 1551 Opc = X86::LOCK_ADD64mi8; 1552 else if (Predicate_i64immSExt32(Val.getNode())) 1553 Opc = X86::LOCK_ADD64mi32; 1554 } 1555 } 1556 break; 1557 } 1558 1559 DebugLoc dl = Node->getDebugLoc(); 1560 SDValue Undef = SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, 1561 dl, NVT), 0); 1562 SDValue MemOp = CurDAG->getMemOperand(cast<MemSDNode>(Node)->getMemOperand()); 1563 if (isInc || isDec) { 1564 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, MemOp, Chain }; 1565 SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 7), 0); 1566 SDValue RetVals[] = { Undef, Ret }; 1567 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1568 } else { 1569 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, MemOp, Chain }; 1570 SDValue Ret = SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Other, Ops, 8), 0); 1571 SDValue RetVals[] = { Undef, Ret }; 1572 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1573 } 1574} 1575 1576SDNode *X86DAGToDAGISel::Select(SDValue N) { 1577 SDNode *Node = N.getNode(); 1578 MVT NVT = Node->getValueType(0); 1579 unsigned Opc, MOpc; 1580 unsigned Opcode = Node->getOpcode(); 1581 DebugLoc dl = Node->getDebugLoc(); 1582 1583#ifndef NDEBUG 1584 DOUT << std::string(Indent, ' ') << "Selecting: "; 1585 DEBUG(Node->dump(CurDAG)); 1586 DOUT << "\n"; 1587 Indent += 2; 1588#endif 1589 1590 if (Node->isMachineOpcode()) { 1591#ifndef NDEBUG 1592 DOUT << std::string(Indent-2, ' ') << "== "; 1593 DEBUG(Node->dump(CurDAG)); 1594 DOUT << "\n"; 1595 Indent -= 2; 1596#endif 1597 return NULL; // Already selected. 1598 } 1599 1600 switch (Opcode) { 1601 default: break; 1602 case X86ISD::GlobalBaseReg: 1603 return getGlobalBaseReg(); 1604 1605 case X86ISD::ATOMOR64_DAG: 1606 return SelectAtomic64(Node, X86::ATOMOR6432); 1607 case X86ISD::ATOMXOR64_DAG: 1608 return SelectAtomic64(Node, X86::ATOMXOR6432); 1609 case X86ISD::ATOMADD64_DAG: 1610 return SelectAtomic64(Node, X86::ATOMADD6432); 1611 case X86ISD::ATOMSUB64_DAG: 1612 return SelectAtomic64(Node, X86::ATOMSUB6432); 1613 case X86ISD::ATOMNAND64_DAG: 1614 return SelectAtomic64(Node, X86::ATOMNAND6432); 1615 case X86ISD::ATOMAND64_DAG: 1616 return SelectAtomic64(Node, X86::ATOMAND6432); 1617 case X86ISD::ATOMSWAP64_DAG: 1618 return SelectAtomic64(Node, X86::ATOMSWAP6432); 1619 1620 case ISD::ATOMIC_LOAD_ADD: { 1621 SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); 1622 if (RetVal) 1623 return RetVal; 1624 break; 1625 } 1626 1627 case ISD::SMUL_LOHI: 1628 case ISD::UMUL_LOHI: { 1629 SDValue N0 = Node->getOperand(0); 1630 SDValue N1 = Node->getOperand(1); 1631 1632 bool isSigned = Opcode == ISD::SMUL_LOHI; 1633 if (!isSigned) 1634 switch (NVT.getSimpleVT()) { 1635 default: llvm_unreachable("Unsupported VT!"); 1636 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1637 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1638 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1639 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1640 } 1641 else 1642 switch (NVT.getSimpleVT()) { 1643 default: llvm_unreachable("Unsupported VT!"); 1644 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1645 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1646 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1647 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1648 } 1649 1650 unsigned LoReg, HiReg; 1651 switch (NVT.getSimpleVT()) { 1652 default: llvm_unreachable("Unsupported VT!"); 1653 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1654 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1655 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1656 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1657 } 1658 1659 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1660 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1661 // multiplty is commmutative 1662 if (!foldedLoad) { 1663 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1664 if (foldedLoad) 1665 std::swap(N0, N1); 1666 } 1667 1668 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1669 N0, SDValue()).getValue(1); 1670 1671 if (foldedLoad) { 1672 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1673 InFlag }; 1674 SDNode *CNode = 1675 CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1676 array_lengthof(Ops)); 1677 InFlag = SDValue(CNode, 1); 1678 // Update the chain. 1679 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1680 } else { 1681 InFlag = 1682 SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1683 } 1684 1685 // Copy the low half of the result, if it is needed. 1686 if (!N.getValue(0).use_empty()) { 1687 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1688 LoReg, NVT, InFlag); 1689 InFlag = Result.getValue(2); 1690 ReplaceUses(N.getValue(0), Result); 1691#ifndef NDEBUG 1692 DOUT << std::string(Indent-2, ' ') << "=> "; 1693 DEBUG(Result.getNode()->dump(CurDAG)); 1694 DOUT << "\n"; 1695#endif 1696 } 1697 // Copy the high half of the result, if it is needed. 1698 if (!N.getValue(1).use_empty()) { 1699 SDValue Result; 1700 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1701 // Prevent use of AH in a REX instruction by referencing AX instead. 1702 // Shift it down 8 bits. 1703 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1704 X86::AX, MVT::i16, InFlag); 1705 InFlag = Result.getValue(2); 1706 Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, 1707 Result, 1708 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1709 // Then truncate it down to i8. 1710 SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); 1711 Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, 1712 MVT::i8, Result, SRIdx), 0); 1713 } else { 1714 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1715 HiReg, NVT, InFlag); 1716 InFlag = Result.getValue(2); 1717 } 1718 ReplaceUses(N.getValue(1), Result); 1719#ifndef NDEBUG 1720 DOUT << std::string(Indent-2, ' ') << "=> "; 1721 DEBUG(Result.getNode()->dump(CurDAG)); 1722 DOUT << "\n"; 1723#endif 1724 } 1725 1726#ifndef NDEBUG 1727 Indent -= 2; 1728#endif 1729 1730 return NULL; 1731 } 1732 1733 case ISD::SDIVREM: 1734 case ISD::UDIVREM: { 1735 SDValue N0 = Node->getOperand(0); 1736 SDValue N1 = Node->getOperand(1); 1737 1738 bool isSigned = Opcode == ISD::SDIVREM; 1739 if (!isSigned) 1740 switch (NVT.getSimpleVT()) { 1741 default: llvm_unreachable("Unsupported VT!"); 1742 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1743 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1744 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1745 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1746 } 1747 else 1748 switch (NVT.getSimpleVT()) { 1749 default: llvm_unreachable("Unsupported VT!"); 1750 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1751 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1752 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1753 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1754 } 1755 1756 unsigned LoReg, HiReg; 1757 unsigned ClrOpcode, SExtOpcode; 1758 switch (NVT.getSimpleVT()) { 1759 default: llvm_unreachable("Unsupported VT!"); 1760 case MVT::i8: 1761 LoReg = X86::AL; HiReg = X86::AH; 1762 ClrOpcode = 0; 1763 SExtOpcode = X86::CBW; 1764 break; 1765 case MVT::i16: 1766 LoReg = X86::AX; HiReg = X86::DX; 1767 ClrOpcode = X86::MOV16r0; 1768 SExtOpcode = X86::CWD; 1769 break; 1770 case MVT::i32: 1771 LoReg = X86::EAX; HiReg = X86::EDX; 1772 ClrOpcode = X86::MOV32r0; 1773 SExtOpcode = X86::CDQ; 1774 break; 1775 case MVT::i64: 1776 LoReg = X86::RAX; HiReg = X86::RDX; 1777 ClrOpcode = ~0U; // NOT USED. 1778 SExtOpcode = X86::CQO; 1779 break; 1780 } 1781 1782 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1783 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1784 bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1785 1786 SDValue InFlag; 1787 if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1788 // Special case for div8, just use a move with zero extension to AX to 1789 // clear the upper 8 bits (AH). 1790 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1791 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1792 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1793 Move = 1794 SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16, 1795 MVT::Other, Ops, 1796 array_lengthof(Ops)), 0); 1797 Chain = Move.getValue(1); 1798 ReplaceUses(N0.getValue(1), Chain); 1799 } else { 1800 Move = 1801 SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1802 Chain = CurDAG->getEntryNode(); 1803 } 1804 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1805 InFlag = Chain.getValue(1); 1806 } else { 1807 InFlag = 1808 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1809 LoReg, N0, SDValue()).getValue(1); 1810 if (isSigned && !signBitIsZero) { 1811 // Sign extend the low part into the high part. 1812 InFlag = 1813 SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1814 } else { 1815 // Zero out the high part, effectively zero extending the input. 1816 SDValue ClrNode; 1817 1818 if (NVT.getSimpleVT() == MVT::i64) { 1819 ClrNode = SDValue(CurDAG->getTargetNode(X86::MOV32r0, dl, MVT::i32), 1820 0); 1821 // We just did a 32-bit clear, insert it into a 64-bit register to 1822 // clear the whole 64-bit reg. 1823 SDValue Undef = 1824 SDValue(CurDAG->getTargetNode(TargetInstrInfo::IMPLICIT_DEF, 1825 dl, MVT::i64), 0); 1826 SDValue SubRegNo = 1827 CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); 1828 ClrNode = 1829 SDValue(CurDAG->getTargetNode(TargetInstrInfo::INSERT_SUBREG, dl, 1830 MVT::i64, Undef, ClrNode, SubRegNo), 1831 0); 1832 } else { 1833 ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), 0); 1834 } 1835 1836 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, 1837 ClrNode, InFlag).getValue(1); 1838 } 1839 } 1840 1841 if (foldedLoad) { 1842 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1843 InFlag }; 1844 SDNode *CNode = 1845 CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1846 array_lengthof(Ops)); 1847 InFlag = SDValue(CNode, 1); 1848 // Update the chain. 1849 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1850 } else { 1851 InFlag = 1852 SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1853 } 1854 1855 // Copy the division (low) result, if it is needed. 1856 if (!N.getValue(0).use_empty()) { 1857 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1858 LoReg, NVT, InFlag); 1859 InFlag = Result.getValue(2); 1860 ReplaceUses(N.getValue(0), Result); 1861#ifndef NDEBUG 1862 DOUT << std::string(Indent-2, ' ') << "=> "; 1863 DEBUG(Result.getNode()->dump(CurDAG)); 1864 DOUT << "\n"; 1865#endif 1866 } 1867 // Copy the remainder (high) result, if it is needed. 1868 if (!N.getValue(1).use_empty()) { 1869 SDValue Result; 1870 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1871 // Prevent use of AH in a REX instruction by referencing AX instead. 1872 // Shift it down 8 bits. 1873 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1874 X86::AX, MVT::i16, InFlag); 1875 InFlag = Result.getValue(2); 1876 Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, 1877 Result, 1878 CurDAG->getTargetConstant(8, MVT::i8)), 1879 0); 1880 // Then truncate it down to i8. 1881 SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); 1882 Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, 1883 MVT::i8, Result, SRIdx), 0); 1884 } else { 1885 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1886 HiReg, NVT, InFlag); 1887 InFlag = Result.getValue(2); 1888 } 1889 ReplaceUses(N.getValue(1), Result); 1890#ifndef NDEBUG 1891 DOUT << std::string(Indent-2, ' ') << "=> "; 1892 DEBUG(Result.getNode()->dump(CurDAG)); 1893 DOUT << "\n"; 1894#endif 1895 } 1896 1897#ifndef NDEBUG 1898 Indent -= 2; 1899#endif 1900 1901 return NULL; 1902 } 1903 1904 case ISD::DECLARE: { 1905 // Handle DECLARE nodes here because the second operand may have been 1906 // wrapped in X86ISD::Wrapper. 1907 SDValue Chain = Node->getOperand(0); 1908 SDValue N1 = Node->getOperand(1); 1909 SDValue N2 = Node->getOperand(2); 1910 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1); 1911 1912 // FIXME: We need to handle this for VLAs. 1913 if (!FINode) { 1914 ReplaceUses(N.getValue(0), Chain); 1915 return NULL; 1916 } 1917 1918 if (N2.getOpcode() == ISD::ADD && 1919 N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg) 1920 N2 = N2.getOperand(1); 1921 1922 // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled 1923 // somehow, just ignore it. 1924 if (N2.getOpcode() != X86ISD::Wrapper && 1925 N2.getOpcode() != X86ISD::WrapperRIP) { 1926 ReplaceUses(N.getValue(0), Chain); 1927 return NULL; 1928 } 1929 GlobalAddressSDNode *GVNode = 1930 dyn_cast<GlobalAddressSDNode>(N2.getOperand(0)); 1931 if (GVNode == 0) { 1932 ReplaceUses(N.getValue(0), Chain); 1933 return NULL; 1934 } 1935 SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), 1936 TLI.getPointerTy()); 1937 SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(), 1938 TLI.getPointerTy()); 1939 SDValue Ops[] = { Tmp1, Tmp2, Chain }; 1940 return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, 1941 MVT::Other, Ops, 1942 array_lengthof(Ops)); 1943 } 1944 } 1945 1946 SDNode *ResNode = SelectCode(N); 1947 1948#ifndef NDEBUG 1949 DOUT << std::string(Indent-2, ' ') << "=> "; 1950 if (ResNode == NULL || ResNode == N.getNode()) 1951 DEBUG(N.getNode()->dump(CurDAG)); 1952 else 1953 DEBUG(ResNode->dump(CurDAG)); 1954 DOUT << "\n"; 1955 Indent -= 2; 1956#endif 1957 1958 return ResNode; 1959} 1960 1961bool X86DAGToDAGISel:: 1962SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 1963 std::vector<SDValue> &OutOps) { 1964 SDValue Op0, Op1, Op2, Op3, Op4; 1965 switch (ConstraintCode) { 1966 case 'o': // offsetable ?? 1967 case 'v': // not offsetable ?? 1968 default: return true; 1969 case 'm': // memory 1970 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) 1971 return true; 1972 break; 1973 } 1974 1975 OutOps.push_back(Op0); 1976 OutOps.push_back(Op1); 1977 OutOps.push_back(Op2); 1978 OutOps.push_back(Op3); 1979 OutOps.push_back(Op4); 1980 return false; 1981} 1982 1983/// createX86ISelDag - This pass converts a legalized DAG into a 1984/// X86-specific DAG, ready for instruction scheduling. 1985/// 1986FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 1987 llvm::CodeGenOpt::Level OptLevel) { 1988 return new X86DAGToDAGISel(TM, OptLevel); 1989} 1990