X86ISelDAGToDAG.cpp revision c23197a26f34f559ea9797de51e187087c039c42
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Target/TargetOptions.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/ErrorHandling.h" 39#include "llvm/Support/MathExtras.h" 40#include "llvm/Support/Streams.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/ADT/SmallPtrSet.h" 43#include "llvm/ADT/Statistic.h" 44using namespace llvm; 45 46#include "llvm/Support/CommandLine.h" 47static cl::opt<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden); 48 49STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 50 51//===----------------------------------------------------------------------===// 52// Pattern Matcher Implementation 53//===----------------------------------------------------------------------===// 54 55namespace { 56 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 57 /// SDValue's instead of register numbers for the leaves of the matched 58 /// tree. 59 struct X86ISelAddressMode { 60 enum { 61 RegBase, 62 FrameIndexBase 63 } BaseType; 64 65 struct { // This is really a union, discriminated by BaseType! 66 SDValue Reg; 67 int FrameIndex; 68 } Base; 69 70 unsigned Scale; 71 SDValue IndexReg; 72 int32_t Disp; 73 SDValue Segment; 74 GlobalValue *GV; 75 Constant *CP; 76 const char *ES; 77 int JT; 78 unsigned Align; // CP alignment. 79 unsigned char SymbolFlags; // X86II::MO_* 80 81 X86ISelAddressMode() 82 : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), 83 Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) { 84 } 85 86 bool hasSymbolicDisplacement() const { 87 return GV != 0 || CP != 0 || ES != 0 || JT != -1; 88 } 89 90 bool hasBaseOrIndexReg() const { 91 return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; 92 } 93 94 /// isRIPRelative - Return true if this addressing mode is already RIP 95 /// relative. 96 bool isRIPRelative() const { 97 if (BaseType != RegBase) return false; 98 if (RegisterSDNode *RegNode = 99 dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) 100 return RegNode->getReg() == X86::RIP; 101 return false; 102 } 103 104 void setBaseReg(SDValue Reg) { 105 BaseType = RegBase; 106 Base.Reg = Reg; 107 } 108 109 void dump() { 110 cerr << "X86ISelAddressMode " << this << "\n"; 111 cerr << "Base.Reg "; 112 if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 113 else cerr << "nul"; 114 cerr << " Base.FrameIndex " << Base.FrameIndex << "\n"; 115 cerr << " Scale" << Scale << "\n"; 116 cerr << "IndexReg "; 117 if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); 118 else cerr << "nul"; 119 cerr << " Disp " << Disp << "\n"; 120 cerr << "GV "; if (GV) GV->dump(); 121 else cerr << "nul"; 122 cerr << " CP "; if (CP) CP->dump(); 123 else cerr << "nul"; 124 cerr << "\n"; 125 cerr << "ES "; if (ES) cerr << ES; else cerr << "nul"; 126 cerr << " JT" << JT << " Align" << Align << "\n"; 127 } 128 }; 129} 130 131namespace { 132 //===--------------------------------------------------------------------===// 133 /// ISel - X86 specific code to select X86 machine instructions for 134 /// SelectionDAG operations. 135 /// 136 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 137 /// X86Lowering - This object fully describes how to lower LLVM code to an 138 /// X86-specific SelectionDAG. 139 X86TargetLowering &X86Lowering; 140 141 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 142 /// make the right decision when generating code for different targets. 143 const X86Subtarget *Subtarget; 144 145 /// CurBB - Current BB being isel'd. 146 /// 147 MachineBasicBlock *CurBB; 148 149 /// OptForSize - If true, selector should try to optimize for code size 150 /// instead of performance. 151 bool OptForSize; 152 153 public: 154 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 155 : SelectionDAGISel(tm, OptLevel), 156 X86Lowering(*tm.getTargetLowering()), 157 Subtarget(&tm.getSubtarget<X86Subtarget>()), 158 OptForSize(false) {} 159 160 virtual const char *getPassName() const { 161 return "X86 DAG->DAG Instruction Selection"; 162 } 163 164 /// InstructionSelect - This callback is invoked by 165 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 166 virtual void InstructionSelect(); 167 168 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 169 170 virtual 171 bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; 172 173// Include the pieces autogenerated from the target description. 174#include "X86GenDAGISel.inc" 175 176 private: 177 SDNode *Select(SDValue N); 178 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 179 180 bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 181 bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 182 bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 183 bool MatchAddress(SDValue N, X86ISelAddressMode &AM, 184 unsigned Depth = 0); 185 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 186 bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, 187 SDValue &Scale, SDValue &Index, SDValue &Disp, 188 SDValue &Segment); 189 bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base, 190 SDValue &Scale, SDValue &Index, SDValue &Disp); 191 bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 192 SDValue &Scale, SDValue &Index, SDValue &Disp); 193 bool SelectScalarSSELoad(SDValue Op, SDValue Pred, 194 SDValue N, SDValue &Base, SDValue &Scale, 195 SDValue &Index, SDValue &Disp, 196 SDValue &Segment, 197 SDValue &InChain, SDValue &OutChain); 198 bool TryFoldLoad(SDValue P, SDValue N, 199 SDValue &Base, SDValue &Scale, 200 SDValue &Index, SDValue &Disp, 201 SDValue &Segment); 202 void PreprocessForRMW(); 203 void PreprocessForFPConvert(); 204 205 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 206 /// inline asm expressions. 207 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 208 char ConstraintCode, 209 std::vector<SDValue> &OutOps); 210 211 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 212 213 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 214 SDValue &Scale, SDValue &Index, 215 SDValue &Disp, SDValue &Segment) { 216 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 217 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 218 AM.Base.Reg; 219 Scale = getI8Imm(AM.Scale); 220 Index = AM.IndexReg; 221 // These are 32-bit even in 64-bit mode since RIP relative offset 222 // is 32-bit. 223 if (AM.GV) 224 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, 225 AM.SymbolFlags); 226 else if (AM.CP) 227 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 228 AM.Align, AM.Disp, AM.SymbolFlags); 229 else if (AM.ES) 230 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 231 else if (AM.JT != -1) 232 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 233 else 234 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 235 236 if (AM.Segment.getNode()) 237 Segment = AM.Segment; 238 else 239 Segment = CurDAG->getRegister(0, MVT::i32); 240 } 241 242 /// getI8Imm - Return a target constant with the specified value, of type 243 /// i8. 244 inline SDValue getI8Imm(unsigned Imm) { 245 return CurDAG->getTargetConstant(Imm, MVT::i8); 246 } 247 248 /// getI16Imm - Return a target constant with the specified value, of type 249 /// i16. 250 inline SDValue getI16Imm(unsigned Imm) { 251 return CurDAG->getTargetConstant(Imm, MVT::i16); 252 } 253 254 /// getI32Imm - Return a target constant with the specified value, of type 255 /// i32. 256 inline SDValue getI32Imm(unsigned Imm) { 257 return CurDAG->getTargetConstant(Imm, MVT::i32); 258 } 259 260 /// getGlobalBaseReg - Return an SDNode that returns the value of 261 /// the global base register. Output instructions required to 262 /// initialize the global base register, if necessary. 263 /// 264 SDNode *getGlobalBaseReg(); 265 266 /// getTargetMachine - Return a reference to the TargetMachine, casted 267 /// to the target-specific type. 268 const X86TargetMachine &getTargetMachine() { 269 return static_cast<const X86TargetMachine &>(TM); 270 } 271 272 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 273 /// to the target-specific type. 274 const X86InstrInfo *getInstrInfo() { 275 return getTargetMachine().getInstrInfo(); 276 } 277 278#ifndef NDEBUG 279 unsigned Indent; 280#endif 281 }; 282} 283 284 285bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, 286 SDNode *Root) const { 287 if (OptLevel == CodeGenOpt::None) return false; 288 289 if (U == Root) 290 switch (U->getOpcode()) { 291 default: break; 292 case ISD::ADD: 293 case ISD::ADDC: 294 case ISD::ADDE: 295 case ISD::AND: 296 case ISD::OR: 297 case ISD::XOR: { 298 SDValue Op1 = U->getOperand(1); 299 300 // If the other operand is a 8-bit immediate we should fold the immediate 301 // instead. This reduces code size. 302 // e.g. 303 // movl 4(%esp), %eax 304 // addl $4, %eax 305 // vs. 306 // movl $4, %eax 307 // addl 4(%esp), %eax 308 // The former is 2 bytes shorter. In case where the increment is 1, then 309 // the saving can be 4 bytes (by using incl %eax). 310 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 311 if (Imm->getAPIntValue().isSignedIntN(8)) 312 return false; 313 314 // If the other operand is a TLS address, we should fold it instead. 315 // This produces 316 // movl %gs:0, %eax 317 // leal i@NTPOFF(%eax), %eax 318 // instead of 319 // movl $i@NTPOFF, %eax 320 // addl %gs:0, %eax 321 // if the block also has an access to a second TLS address this will save 322 // a load. 323 // FIXME: This is probably also true for non TLS addresses. 324 if (Op1.getOpcode() == X86ISD::Wrapper) { 325 SDValue Val = Op1.getOperand(0); 326 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 327 return false; 328 } 329 } 330 } 331 332 // Proceed to 'generic' cycle finder code 333 return SelectionDAGISel::IsLegalAndProfitableToFold(N, U, Root); 334} 335 336/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 337/// and move load below the TokenFactor. Replace store's chain operand with 338/// load's chain result. 339static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, 340 SDValue Store, SDValue TF) { 341 SmallVector<SDValue, 4> Ops; 342 for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) 343 if (Load.getNode() == TF.getOperand(i).getNode()) 344 Ops.push_back(Load.getOperand(0)); 345 else 346 Ops.push_back(TF.getOperand(i)); 347 CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); 348 CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 349 CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 350 Store.getOperand(2), Store.getOperand(3)); 351} 352 353/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. 354/// 355static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, 356 SDValue &Load) { 357 if (N.getOpcode() == ISD::BIT_CONVERT) 358 N = N.getOperand(0); 359 360 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 361 if (!LD || LD->isVolatile()) 362 return false; 363 if (LD->getAddressingMode() != ISD::UNINDEXED) 364 return false; 365 366 ISD::LoadExtType ExtType = LD->getExtensionType(); 367 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 368 return false; 369 370 if (N.hasOneUse() && 371 N.getOperand(1) == Address && 372 N.getNode()->isOperandOf(Chain.getNode())) { 373 Load = N; 374 return true; 375 } 376 return false; 377} 378 379/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain 380/// operand and move load below the call's chain operand. 381static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, 382 SDValue Call, SDValue CallSeqStart) { 383 SmallVector<SDValue, 8> Ops; 384 SDValue Chain = CallSeqStart.getOperand(0); 385 if (Chain.getNode() == Load.getNode()) 386 Ops.push_back(Load.getOperand(0)); 387 else { 388 assert(Chain.getOpcode() == ISD::TokenFactor && 389 "Unexpected CallSeqStart chain operand"); 390 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 391 if (Chain.getOperand(i).getNode() == Load.getNode()) 392 Ops.push_back(Load.getOperand(0)); 393 else 394 Ops.push_back(Chain.getOperand(i)); 395 SDValue NewChain = 396 CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 397 MVT::Other, &Ops[0], Ops.size()); 398 Ops.clear(); 399 Ops.push_back(NewChain); 400 } 401 for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i) 402 Ops.push_back(CallSeqStart.getOperand(i)); 403 CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size()); 404 CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), 405 Load.getOperand(1), Load.getOperand(2)); 406 Ops.clear(); 407 Ops.push_back(SDValue(Load.getNode(), 1)); 408 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 409 Ops.push_back(Call.getOperand(i)); 410 CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); 411} 412 413/// isCalleeLoad - Return true if call address is a load and it can be 414/// moved below CALLSEQ_START and the chains leading up to the call. 415/// Return the CALLSEQ_START by reference as a second output. 416static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { 417 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 418 return false; 419 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 420 if (!LD || 421 LD->isVolatile() || 422 LD->getAddressingMode() != ISD::UNINDEXED || 423 LD->getExtensionType() != ISD::NON_EXTLOAD) 424 return false; 425 426 // Now let's find the callseq_start. 427 while (Chain.getOpcode() != ISD::CALLSEQ_START) { 428 if (!Chain.hasOneUse()) 429 return false; 430 Chain = Chain.getOperand(0); 431 } 432 433 if (Chain.getOperand(0).getNode() == Callee.getNode()) 434 return true; 435 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 436 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode())) 437 return true; 438 return false; 439} 440 441 442/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 443/// This is only run if not in -O0 mode. 444/// This allows the instruction selector to pick more read-modify-write 445/// instructions. This is a common case: 446/// 447/// [Load chain] 448/// ^ 449/// | 450/// [Load] 451/// ^ ^ 452/// | | 453/// / \- 454/// / | 455/// [TokenFactor] [Op] 456/// ^ ^ 457/// | | 458/// \ / 459/// \ / 460/// [Store] 461/// 462/// The fact the store's chain operand != load's chain will prevent the 463/// (store (op (load))) instruction from being selected. We can transform it to: 464/// 465/// [Load chain] 466/// ^ 467/// | 468/// [TokenFactor] 469/// ^ 470/// | 471/// [Load] 472/// ^ ^ 473/// | | 474/// | \- 475/// | | 476/// | [Op] 477/// | ^ 478/// | | 479/// \ / 480/// \ / 481/// [Store] 482void X86DAGToDAGISel::PreprocessForRMW() { 483 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 484 E = CurDAG->allnodes_end(); I != E; ++I) { 485 if (I->getOpcode() == X86ISD::CALL) { 486 /// Also try moving call address load from outside callseq_start to just 487 /// before the call to allow it to be folded. 488 /// 489 /// [Load chain] 490 /// ^ 491 /// | 492 /// [Load] 493 /// ^ ^ 494 /// | | 495 /// / \-- 496 /// / | 497 ///[CALLSEQ_START] | 498 /// ^ | 499 /// | | 500 /// [LOAD/C2Reg] | 501 /// | | 502 /// \ / 503 /// \ / 504 /// [CALL] 505 SDValue Chain = I->getOperand(0); 506 SDValue Load = I->getOperand(1); 507 if (!isCalleeLoad(Load, Chain)) 508 continue; 509 MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); 510 ++NumLoadMoved; 511 continue; 512 } 513 514 if (!ISD::isNON_TRUNCStore(I)) 515 continue; 516 SDValue Chain = I->getOperand(0); 517 518 if (Chain.getNode()->getOpcode() != ISD::TokenFactor) 519 continue; 520 521 SDValue N1 = I->getOperand(1); 522 SDValue N2 = I->getOperand(2); 523 if ((N1.getValueType().isFloatingPoint() && 524 !N1.getValueType().isVector()) || 525 !N1.hasOneUse()) 526 continue; 527 528 bool RModW = false; 529 SDValue Load; 530 unsigned Opcode = N1.getNode()->getOpcode(); 531 switch (Opcode) { 532 case ISD::ADD: 533 case ISD::MUL: 534 case ISD::AND: 535 case ISD::OR: 536 case ISD::XOR: 537 case ISD::ADDC: 538 case ISD::ADDE: 539 case ISD::VECTOR_SHUFFLE: { 540 SDValue N10 = N1.getOperand(0); 541 SDValue N11 = N1.getOperand(1); 542 RModW = isRMWLoad(N10, Chain, N2, Load); 543 if (!RModW) 544 RModW = isRMWLoad(N11, Chain, N2, Load); 545 break; 546 } 547 case ISD::SUB: 548 case ISD::SHL: 549 case ISD::SRA: 550 case ISD::SRL: 551 case ISD::ROTL: 552 case ISD::ROTR: 553 case ISD::SUBC: 554 case ISD::SUBE: 555 case X86ISD::SHLD: 556 case X86ISD::SHRD: { 557 SDValue N10 = N1.getOperand(0); 558 RModW = isRMWLoad(N10, Chain, N2, Load); 559 break; 560 } 561 } 562 563 if (RModW) { 564 MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain); 565 ++NumLoadMoved; 566 } 567 } 568} 569 570 571/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 572/// nodes that target the FP stack to be store and load to the stack. This is a 573/// gross hack. We would like to simply mark these as being illegal, but when 574/// we do that, legalize produces these when it expands calls, then expands 575/// these in the same legalize pass. We would like dag combine to be able to 576/// hack on these between the call expansion and the node legalization. As such 577/// this pass basically does "really late" legalization of these inline with the 578/// X86 isel pass. 579void X86DAGToDAGISel::PreprocessForFPConvert() { 580 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 581 E = CurDAG->allnodes_end(); I != E; ) { 582 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 583 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 584 continue; 585 586 // If the source and destination are SSE registers, then this is a legal 587 // conversion that should not be lowered. 588 MVT SrcVT = N->getOperand(0).getValueType(); 589 MVT DstVT = N->getValueType(0); 590 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 591 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 592 if (SrcIsSSE && DstIsSSE) 593 continue; 594 595 if (!SrcIsSSE && !DstIsSSE) { 596 // If this is an FPStack extension, it is a noop. 597 if (N->getOpcode() == ISD::FP_EXTEND) 598 continue; 599 // If this is a value-preserving FPStack truncation, it is a noop. 600 if (N->getConstantOperandVal(1)) 601 continue; 602 } 603 604 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 605 // FPStack has extload and truncstore. SSE can fold direct loads into other 606 // operations. Based on this, decide what we want to do. 607 MVT MemVT; 608 if (N->getOpcode() == ISD::FP_ROUND) 609 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 610 else 611 MemVT = SrcIsSSE ? SrcVT : DstVT; 612 613 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 614 DebugLoc dl = N->getDebugLoc(); 615 616 // FIXME: optimize the case where the src/dest is a load or store? 617 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 618 N->getOperand(0), 619 MemTmp, NULL, 0, MemVT); 620 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, 621 NULL, 0, MemVT); 622 623 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 624 // extload we created. This will cause general havok on the dag because 625 // anything below the conversion could be folded into other existing nodes. 626 // To avoid invalidating 'I', back it up to the convert node. 627 --I; 628 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 629 630 // Now that we did that, the node is dead. Increment the iterator to the 631 // next node to process, then delete N. 632 ++I; 633 CurDAG->DeleteNode(N); 634 } 635} 636 637/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 638/// when it has created a SelectionDAG for us to codegen. 639void X86DAGToDAGISel::InstructionSelect() { 640 CurBB = BB; // BB can change as result of isel. 641 const Function *F = CurDAG->getMachineFunction().getFunction(); 642 OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); 643 644 DEBUG(BB->dump()); 645 if (OptLevel != CodeGenOpt::None) 646 PreprocessForRMW(); 647 648 // FIXME: This should only happen when not compiled with -O0. 649 PreprocessForFPConvert(); 650 651 // Codegen the basic block. 652#ifndef NDEBUG 653 DOUT << "===== Instruction selection begins:\n"; 654 Indent = 0; 655#endif 656 SelectRoot(*CurDAG); 657#ifndef NDEBUG 658 DOUT << "===== Instruction selection ends:\n"; 659#endif 660 661 CurDAG->RemoveDeadNodes(); 662} 663 664/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 665/// the main function. 666void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 667 MachineFrameInfo *MFI) { 668 const TargetInstrInfo *TII = TM.getInstrInfo(); 669 if (Subtarget->isTargetCygMing()) 670 BuildMI(BB, DebugLoc::getUnknownLoc(), 671 TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 672} 673 674void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 675 // If this is main, emit special code for main. 676 MachineBasicBlock *BB = MF.begin(); 677 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 678 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 679} 680 681 682bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 683 X86ISelAddressMode &AM) { 684 assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 685 SDValue Segment = N.getOperand(0); 686 687 if (AM.Segment.getNode() == 0) { 688 AM.Segment = Segment; 689 return false; 690 } 691 692 return true; 693} 694 695bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 696 // This optimization is valid because the GNU TLS model defines that 697 // gs:0 (or fs:0 on X86-64) contains its own address. 698 // For more information see http://people.redhat.com/drepper/tls.pdf 699 700 SDValue Address = N.getOperand(1); 701 if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 702 !MatchSegmentBaseAddress (Address, AM)) 703 return false; 704 705 return true; 706} 707 708/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 709/// into an addressing mode. These wrap things that will resolve down into a 710/// symbol reference. If no match is possible, this returns true, otherwise it 711/// returns false. 712bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 713 // If the addressing mode already has a symbol as the displacement, we can 714 // never match another symbol. 715 if (AM.hasSymbolicDisplacement()) 716 return true; 717 718 SDValue N0 = N.getOperand(0); 719 720 // Handle X86-64 rip-relative addresses. We check this before checking direct 721 // folding because RIP is preferable to non-RIP accesses. 722 if (Subtarget->is64Bit() && 723 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 724 // they cannot be folded into immediate fields. 725 // FIXME: This can be improved for kernel and other models? 726 TM.getCodeModel() == CodeModel::Small && 727 728 // Base and index reg must be 0 in order to use %rip as base and lowering 729 // must allow RIP. 730 !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 731 732 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 733 int64_t Offset = AM.Disp + G->getOffset(); 734 if (!isInt32(Offset)) return true; 735 AM.GV = G->getGlobal(); 736 AM.Disp = Offset; 737 AM.SymbolFlags = G->getTargetFlags(); 738 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 739 int64_t Offset = AM.Disp + CP->getOffset(); 740 if (!isInt32(Offset)) return true; 741 AM.CP = CP->getConstVal(); 742 AM.Align = CP->getAlignment(); 743 AM.Disp = Offset; 744 AM.SymbolFlags = CP->getTargetFlags(); 745 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 746 AM.ES = S->getSymbol(); 747 AM.SymbolFlags = S->getTargetFlags(); 748 } else { 749 JumpTableSDNode *J = cast<JumpTableSDNode>(N0); 750 AM.JT = J->getIndex(); 751 AM.SymbolFlags = J->getTargetFlags(); 752 } 753 754 if (N.getOpcode() == X86ISD::WrapperRIP) 755 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 756 return false; 757 } 758 759 // Handle the case when globals fit in our immediate field: This is true for 760 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 761 // mode, this results in a non-RIP-relative computation. 762 if (!Subtarget->is64Bit() || 763 (TM.getCodeModel() == CodeModel::Small && 764 TM.getRelocationModel() == Reloc::Static)) { 765 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 766 AM.GV = G->getGlobal(); 767 AM.Disp += G->getOffset(); 768 AM.SymbolFlags = G->getTargetFlags(); 769 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 770 AM.CP = CP->getConstVal(); 771 AM.Align = CP->getAlignment(); 772 AM.Disp += CP->getOffset(); 773 AM.SymbolFlags = CP->getTargetFlags(); 774 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 775 AM.ES = S->getSymbol(); 776 AM.SymbolFlags = S->getTargetFlags(); 777 } else { 778 JumpTableSDNode *J = cast<JumpTableSDNode>(N0); 779 AM.JT = J->getIndex(); 780 AM.SymbolFlags = J->getTargetFlags(); 781 } 782 return false; 783 } 784 785 return true; 786} 787 788/// MatchAddress - Add the specified node to the specified addressing mode, 789/// returning true if it cannot be done. This just pattern matches for the 790/// addressing mode. 791bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM, 792 unsigned Depth) { 793 bool is64Bit = Subtarget->is64Bit(); 794 DebugLoc dl = N.getDebugLoc(); 795 DOUT << "MatchAddress: "; DEBUG(AM.dump()); 796 // Limit recursion. 797 if (Depth > 5) 798 return MatchAddressBase(N, AM); 799 800 // If this is already a %rip relative address, we can only merge immediates 801 // into it. Instead of handling this in every case, we handle it here. 802 // RIP relative addressing: %rip + 32-bit displacement! 803 if (AM.isRIPRelative()) { 804 // FIXME: JumpTable and ExternalSymbol address currently don't like 805 // displacements. It isn't very important, but this should be fixed for 806 // consistency. 807 if (!AM.ES && AM.JT != -1) return true; 808 809 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 810 int64_t Val = AM.Disp + Cst->getSExtValue(); 811 if (isInt32(Val)) { 812 AM.Disp = Val; 813 return false; 814 } 815 } 816 return true; 817 } 818 819 switch (N.getOpcode()) { 820 default: break; 821 case ISD::Constant: { 822 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 823 if (!is64Bit || isInt32(AM.Disp + Val)) { 824 AM.Disp += Val; 825 return false; 826 } 827 break; 828 } 829 830 case X86ISD::SegmentBaseAddress: 831 if (!MatchSegmentBaseAddress(N, AM)) 832 return false; 833 break; 834 835 case X86ISD::Wrapper: 836 case X86ISD::WrapperRIP: 837 if (!MatchWrapper(N, AM)) 838 return false; 839 break; 840 841 case ISD::LOAD: 842 if (!MatchLoad(N, AM)) 843 return false; 844 break; 845 846 case ISD::FrameIndex: 847 if (AM.BaseType == X86ISelAddressMode::RegBase 848 && AM.Base.Reg.getNode() == 0) { 849 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 850 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 851 return false; 852 } 853 break; 854 855 case ISD::SHL: 856 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 857 break; 858 859 if (ConstantSDNode 860 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 861 unsigned Val = CN->getZExtValue(); 862 if (Val == 1 || Val == 2 || Val == 3) { 863 AM.Scale = 1 << Val; 864 SDValue ShVal = N.getNode()->getOperand(0); 865 866 // Okay, we know that we have a scale by now. However, if the scaled 867 // value is an add of something and a constant, we can fold the 868 // constant into the disp field here. 869 if (ShVal.getNode()->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 870 isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { 871 AM.IndexReg = ShVal.getNode()->getOperand(0); 872 ConstantSDNode *AddVal = 873 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 874 uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 875 if (!is64Bit || isInt32(Disp)) 876 AM.Disp = Disp; 877 else 878 AM.IndexReg = ShVal; 879 } else { 880 AM.IndexReg = ShVal; 881 } 882 return false; 883 } 884 break; 885 } 886 887 case ISD::SMUL_LOHI: 888 case ISD::UMUL_LOHI: 889 // A mul_lohi where we need the low part can be folded as a plain multiply. 890 if (N.getResNo() != 0) break; 891 // FALL THROUGH 892 case ISD::MUL: 893 case X86ISD::MUL_IMM: 894 // X*[3,5,9] -> X+X*[2,4,8] 895 if (AM.BaseType == X86ISelAddressMode::RegBase && 896 AM.Base.Reg.getNode() == 0 && 897 AM.IndexReg.getNode() == 0) { 898 if (ConstantSDNode 899 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 900 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 901 CN->getZExtValue() == 9) { 902 AM.Scale = unsigned(CN->getZExtValue())-1; 903 904 SDValue MulVal = N.getNode()->getOperand(0); 905 SDValue Reg; 906 907 // Okay, we know that we have a scale by now. However, if the scaled 908 // value is an add of something and a constant, we can fold the 909 // constant into the disp field here. 910 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 911 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 912 Reg = MulVal.getNode()->getOperand(0); 913 ConstantSDNode *AddVal = 914 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 915 uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 916 CN->getZExtValue(); 917 if (!is64Bit || isInt32(Disp)) 918 AM.Disp = Disp; 919 else 920 Reg = N.getNode()->getOperand(0); 921 } else { 922 Reg = N.getNode()->getOperand(0); 923 } 924 925 AM.IndexReg = AM.Base.Reg = Reg; 926 return false; 927 } 928 } 929 break; 930 931 case ISD::SUB: { 932 // Given A-B, if A can be completely folded into the address and 933 // the index field with the index field unused, use -B as the index. 934 // This is a win if a has multiple parts that can be folded into 935 // the address. Also, this saves a mov if the base register has 936 // other uses, since it avoids a two-address sub instruction, however 937 // it costs an additional mov if the index register has other uses. 938 939 // Test if the LHS of the sub can be folded. 940 X86ISelAddressMode Backup = AM; 941 if (MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) { 942 AM = Backup; 943 break; 944 } 945 // Test if the index field is free for use. 946 if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 947 AM = Backup; 948 break; 949 } 950 int Cost = 0; 951 SDValue RHS = N.getNode()->getOperand(1); 952 // If the RHS involves a register with multiple uses, this 953 // transformation incurs an extra mov, due to the neg instruction 954 // clobbering its operand. 955 if (!RHS.getNode()->hasOneUse() || 956 RHS.getNode()->getOpcode() == ISD::CopyFromReg || 957 RHS.getNode()->getOpcode() == ISD::TRUNCATE || 958 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 959 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 960 RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 961 ++Cost; 962 // If the base is a register with multiple uses, this 963 // transformation may save a mov. 964 if ((AM.BaseType == X86ISelAddressMode::RegBase && 965 AM.Base.Reg.getNode() && 966 !AM.Base.Reg.getNode()->hasOneUse()) || 967 AM.BaseType == X86ISelAddressMode::FrameIndexBase) 968 --Cost; 969 // If the folded LHS was interesting, this transformation saves 970 // address arithmetic. 971 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 972 ((AM.Disp != 0) && (Backup.Disp == 0)) + 973 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 974 --Cost; 975 // If it doesn't look like it may be an overall win, don't do it. 976 if (Cost >= 0) { 977 AM = Backup; 978 break; 979 } 980 981 // Ok, the transformation is legal and appears profitable. Go for it. 982 SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 983 SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 984 AM.IndexReg = Neg; 985 AM.Scale = 1; 986 987 // Insert the new nodes into the topological ordering. 988 if (Zero.getNode()->getNodeId() == -1 || 989 Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 990 CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 991 Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 992 } 993 if (Neg.getNode()->getNodeId() == -1 || 994 Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 995 CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 996 Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 997 } 998 return false; 999 } 1000 1001 case ISD::ADD: { 1002 X86ISelAddressMode Backup = AM; 1003 if (!MatchAddress(N.getNode()->getOperand(0), AM, Depth+1) && 1004 !MatchAddress(N.getNode()->getOperand(1), AM, Depth+1)) 1005 return false; 1006 AM = Backup; 1007 if (!MatchAddress(N.getNode()->getOperand(1), AM, Depth+1) && 1008 !MatchAddress(N.getNode()->getOperand(0), AM, Depth+1)) 1009 return false; 1010 AM = Backup; 1011 1012 // If we couldn't fold both operands into the address at the same time, 1013 // see if we can just put each operand into a register and fold at least 1014 // the add. 1015 if (AM.BaseType == X86ISelAddressMode::RegBase && 1016 !AM.Base.Reg.getNode() && 1017 !AM.IndexReg.getNode()) { 1018 AM.Base.Reg = N.getNode()->getOperand(0); 1019 AM.IndexReg = N.getNode()->getOperand(1); 1020 AM.Scale = 1; 1021 return false; 1022 } 1023 break; 1024 } 1025 1026 case ISD::OR: 1027 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 1028 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1029 X86ISelAddressMode Backup = AM; 1030 uint64_t Offset = CN->getSExtValue(); 1031 // Start with the LHS as an addr mode. 1032 if (!MatchAddress(N.getOperand(0), AM, Depth+1) && 1033 // Address could not have picked a GV address for the displacement. 1034 AM.GV == NULL && 1035 // On x86-64, the resultant disp must fit in 32-bits. 1036 (!is64Bit || isInt32(AM.Disp + Offset)) && 1037 // Check to see if the LHS & C is zero. 1038 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 1039 AM.Disp += Offset; 1040 return false; 1041 } 1042 AM = Backup; 1043 } 1044 break; 1045 1046 case ISD::AND: { 1047 // Perform some heroic transforms on an and of a constant-count shift 1048 // with a constant to enable use of the scaled offset field. 1049 1050 SDValue Shift = N.getOperand(0); 1051 if (Shift.getNumOperands() != 2) break; 1052 1053 // Scale must not be used already. 1054 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1055 1056 SDValue X = Shift.getOperand(0); 1057 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1058 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1059 if (!C1 || !C2) break; 1060 1061 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1062 // allows us to convert the shift and and into an h-register extract and 1063 // a scaled index. 1064 if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1065 unsigned ScaleLog = 8 - C1->getZExtValue(); 1066 if (ScaleLog > 0 && ScaleLog < 4 && 1067 C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1068 SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1069 SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1070 SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1071 X, Eight); 1072 SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1073 Srl, Mask); 1074 SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1075 SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1076 And, ShlCount); 1077 1078 // Insert the new nodes into the topological ordering. 1079 if (Eight.getNode()->getNodeId() == -1 || 1080 Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1081 CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1082 Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1083 } 1084 if (Mask.getNode()->getNodeId() == -1 || 1085 Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1086 CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1087 Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1088 } 1089 if (Srl.getNode()->getNodeId() == -1 || 1090 Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1091 CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1092 Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1093 } 1094 if (And.getNode()->getNodeId() == -1 || 1095 And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1096 CurDAG->RepositionNode(N.getNode(), And.getNode()); 1097 And.getNode()->setNodeId(N.getNode()->getNodeId()); 1098 } 1099 if (ShlCount.getNode()->getNodeId() == -1 || 1100 ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1101 CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1102 ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1103 } 1104 if (Shl.getNode()->getNodeId() == -1 || 1105 Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1106 CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1107 Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1108 } 1109 CurDAG->ReplaceAllUsesWith(N, Shl); 1110 AM.IndexReg = And; 1111 AM.Scale = (1 << ScaleLog); 1112 return false; 1113 } 1114 } 1115 1116 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1117 // allows us to fold the shift into this addressing mode. 1118 if (Shift.getOpcode() != ISD::SHL) break; 1119 1120 // Not likely to be profitable if either the AND or SHIFT node has more 1121 // than one use (unless all uses are for address computation). Besides, 1122 // isel mechanism requires their node ids to be reused. 1123 if (!N.hasOneUse() || !Shift.hasOneUse()) 1124 break; 1125 1126 // Verify that the shift amount is something we can fold. 1127 unsigned ShiftCst = C1->getZExtValue(); 1128 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1129 break; 1130 1131 // Get the new AND mask, this folds to a constant. 1132 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1133 SDValue(C2, 0), SDValue(C1, 0)); 1134 SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1135 NewANDMask); 1136 SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1137 NewAND, SDValue(C1, 0)); 1138 1139 // Insert the new nodes into the topological ordering. 1140 if (C1->getNodeId() > X.getNode()->getNodeId()) { 1141 CurDAG->RepositionNode(X.getNode(), C1); 1142 C1->setNodeId(X.getNode()->getNodeId()); 1143 } 1144 if (NewANDMask.getNode()->getNodeId() == -1 || 1145 NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1146 CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1147 NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1148 } 1149 if (NewAND.getNode()->getNodeId() == -1 || 1150 NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1151 CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1152 NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1153 } 1154 if (NewSHIFT.getNode()->getNodeId() == -1 || 1155 NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1156 CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1157 NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1158 } 1159 1160 CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1161 1162 AM.Scale = 1 << ShiftCst; 1163 AM.IndexReg = NewAND; 1164 return false; 1165 } 1166 } 1167 1168 return MatchAddressBase(N, AM); 1169} 1170 1171/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1172/// specified addressing mode without any further recursion. 1173bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1174 // Is the base register already occupied? 1175 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { 1176 // If so, check to see if the scale index register is set. 1177 if (AM.IndexReg.getNode() == 0) { 1178 AM.IndexReg = N; 1179 AM.Scale = 1; 1180 return false; 1181 } 1182 1183 // Otherwise, we cannot select it. 1184 return true; 1185 } 1186 1187 // Default, generate it as a register. 1188 AM.BaseType = X86ISelAddressMode::RegBase; 1189 AM.Base.Reg = N; 1190 return false; 1191} 1192 1193/// SelectAddr - returns true if it is able pattern match an addressing mode. 1194/// It returns the operands which make up the maximal addressing mode it can 1195/// match by reference. 1196bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base, 1197 SDValue &Scale, SDValue &Index, 1198 SDValue &Disp, SDValue &Segment) { 1199 X86ISelAddressMode AM; 1200 bool Done = false; 1201 if (AvoidDupAddrCompute && !N.hasOneUse()) { 1202 unsigned Opcode = N.getOpcode(); 1203 if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex && 1204 Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) { 1205 // If we are able to fold N into addressing mode, then we'll allow it even 1206 // if N has multiple uses. In general, addressing computation is used as 1207 // addresses by all of its uses. But watch out for CopyToReg uses, that 1208 // means the address computation is liveout. It will be computed by a LEA 1209 // so we want to avoid computing the address twice. 1210 for (SDNode::use_iterator UI = N.getNode()->use_begin(), 1211 UE = N.getNode()->use_end(); UI != UE; ++UI) { 1212 if (UI->getOpcode() == ISD::CopyToReg) { 1213 MatchAddressBase(N, AM); 1214 Done = true; 1215 break; 1216 } 1217 } 1218 } 1219 } 1220 1221 if (!Done && MatchAddress(N, AM)) 1222 return false; 1223 1224 MVT VT = N.getValueType(); 1225 if (AM.BaseType == X86ISelAddressMode::RegBase) { 1226 if (!AM.Base.Reg.getNode()) 1227 AM.Base.Reg = CurDAG->getRegister(0, VT); 1228 } 1229 1230 if (!AM.IndexReg.getNode()) 1231 AM.IndexReg = CurDAG->getRegister(0, VT); 1232 1233 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1234 return true; 1235} 1236 1237/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1238/// match a load whose top elements are either undef or zeros. The load flavor 1239/// is derived from the type of N, which is either v4f32 or v2f64. 1240bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op, SDValue Pred, 1241 SDValue N, SDValue &Base, 1242 SDValue &Scale, SDValue &Index, 1243 SDValue &Disp, SDValue &Segment, 1244 SDValue &InChain, 1245 SDValue &OutChain) { 1246 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1247 InChain = N.getOperand(0).getValue(1); 1248 if (ISD::isNON_EXTLoad(InChain.getNode()) && 1249 InChain.getValue(0).hasOneUse() && 1250 N.hasOneUse() && 1251 IsLegalAndProfitableToFold(N.getNode(), Pred.getNode(), Op.getNode())) { 1252 LoadSDNode *LD = cast<LoadSDNode>(InChain); 1253 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1254 return false; 1255 OutChain = LD->getChain(); 1256 return true; 1257 } 1258 } 1259 1260 // Also handle the case where we explicitly require zeros in the top 1261 // elements. This is a vector shuffle from the zero vector. 1262 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1263 // Check to see if the top elements are all zeros (or bitcast of zeros). 1264 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1265 N.getOperand(0).getNode()->hasOneUse() && 1266 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1267 N.getOperand(0).getOperand(0).hasOneUse()) { 1268 // Okay, this is a zero extending load. Fold it. 1269 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1270 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1271 return false; 1272 OutChain = LD->getChain(); 1273 InChain = SDValue(LD, 1); 1274 return true; 1275 } 1276 return false; 1277} 1278 1279 1280/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1281/// mode it matches can be cost effectively emitted as an LEA instruction. 1282bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N, 1283 SDValue &Base, SDValue &Scale, 1284 SDValue &Index, SDValue &Disp) { 1285 X86ISelAddressMode AM; 1286 1287 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1288 // segments. 1289 SDValue Copy = AM.Segment; 1290 SDValue T = CurDAG->getRegister(0, MVT::i32); 1291 AM.Segment = T; 1292 if (MatchAddress(N, AM)) 1293 return false; 1294 assert (T == AM.Segment); 1295 AM.Segment = Copy; 1296 1297 MVT VT = N.getValueType(); 1298 unsigned Complexity = 0; 1299 if (AM.BaseType == X86ISelAddressMode::RegBase) 1300 if (AM.Base.Reg.getNode()) 1301 Complexity = 1; 1302 else 1303 AM.Base.Reg = CurDAG->getRegister(0, VT); 1304 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1305 Complexity = 4; 1306 1307 if (AM.IndexReg.getNode()) 1308 Complexity++; 1309 else 1310 AM.IndexReg = CurDAG->getRegister(0, VT); 1311 1312 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1313 // a simple shift. 1314 if (AM.Scale > 1) 1315 Complexity++; 1316 1317 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1318 // to a LEA. This is determined with some expermentation but is by no means 1319 // optimal (especially for code size consideration). LEA is nice because of 1320 // its three-address nature. Tweak the cost function again when we can run 1321 // convertToThreeAddress() at register allocation time. 1322 if (AM.hasSymbolicDisplacement()) { 1323 // For X86-64, we should always use lea to materialize RIP relative 1324 // addresses. 1325 if (Subtarget->is64Bit()) 1326 Complexity = 4; 1327 else 1328 Complexity += 2; 1329 } 1330 1331 if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) 1332 Complexity++; 1333 1334 // If it isn't worth using an LEA, reject it. 1335 if (Complexity <= 2) 1336 return false; 1337 1338 SDValue Segment; 1339 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1340 return true; 1341} 1342 1343/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1344bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base, 1345 SDValue &Scale, SDValue &Index, 1346 SDValue &Disp) { 1347 assert(Op.getOpcode() == X86ISD::TLSADDR); 1348 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1349 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1350 1351 X86ISelAddressMode AM; 1352 AM.GV = GA->getGlobal(); 1353 AM.Disp += GA->getOffset(); 1354 AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); 1355 AM.SymbolFlags = GA->getTargetFlags(); 1356 1357 if (N.getValueType() == MVT::i32) { 1358 AM.Scale = 1; 1359 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1360 } else { 1361 AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1362 } 1363 1364 SDValue Segment; 1365 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1366 return true; 1367} 1368 1369 1370bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, 1371 SDValue &Base, SDValue &Scale, 1372 SDValue &Index, SDValue &Disp, 1373 SDValue &Segment) { 1374 if (ISD::isNON_EXTLoad(N.getNode()) && 1375 N.hasOneUse() && 1376 IsLegalAndProfitableToFold(N.getNode(), P.getNode(), P.getNode())) 1377 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1378 return false; 1379} 1380 1381/// getGlobalBaseReg - Return an SDNode that returns the value of 1382/// the global base register. Output instructions required to 1383/// initialize the global base register, if necessary. 1384/// 1385SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1386 MachineFunction *MF = CurBB->getParent(); 1387 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1388 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1389} 1390 1391static SDNode *FindCallStartFromCall(SDNode *Node) { 1392 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1393 assert(Node->getOperand(0).getValueType() == MVT::Other && 1394 "Node doesn't have a token chain argument!"); 1395 return FindCallStartFromCall(Node->getOperand(0).getNode()); 1396} 1397 1398SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1399 SDValue Chain = Node->getOperand(0); 1400 SDValue In1 = Node->getOperand(1); 1401 SDValue In2L = Node->getOperand(2); 1402 SDValue In2H = Node->getOperand(3); 1403 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1404 if (!SelectAddr(In1, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1405 return NULL; 1406 SDValue LSI = Node->getOperand(4); // MemOperand 1407 const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, LSI, Chain}; 1408 return CurDAG->getTargetNode(Opc, Node->getDebugLoc(), 1409 MVT::i32, MVT::i32, MVT::Other, Ops, 1410 array_lengthof(Ops)); 1411} 1412 1413SDNode *X86DAGToDAGISel::Select(SDValue N) { 1414 SDNode *Node = N.getNode(); 1415 MVT NVT = Node->getValueType(0); 1416 unsigned Opc, MOpc; 1417 unsigned Opcode = Node->getOpcode(); 1418 DebugLoc dl = Node->getDebugLoc(); 1419 1420#ifndef NDEBUG 1421 DOUT << std::string(Indent, ' ') << "Selecting: "; 1422 DEBUG(Node->dump(CurDAG)); 1423 DOUT << "\n"; 1424 Indent += 2; 1425#endif 1426 1427 if (Node->isMachineOpcode()) { 1428#ifndef NDEBUG 1429 DOUT << std::string(Indent-2, ' ') << "== "; 1430 DEBUG(Node->dump(CurDAG)); 1431 DOUT << "\n"; 1432 Indent -= 2; 1433#endif 1434 return NULL; // Already selected. 1435 } 1436 1437 switch (Opcode) { 1438 default: break; 1439 case X86ISD::GlobalBaseReg: 1440 return getGlobalBaseReg(); 1441 1442 case X86ISD::ATOMOR64_DAG: 1443 return SelectAtomic64(Node, X86::ATOMOR6432); 1444 case X86ISD::ATOMXOR64_DAG: 1445 return SelectAtomic64(Node, X86::ATOMXOR6432); 1446 case X86ISD::ATOMADD64_DAG: 1447 return SelectAtomic64(Node, X86::ATOMADD6432); 1448 case X86ISD::ATOMSUB64_DAG: 1449 return SelectAtomic64(Node, X86::ATOMSUB6432); 1450 case X86ISD::ATOMNAND64_DAG: 1451 return SelectAtomic64(Node, X86::ATOMNAND6432); 1452 case X86ISD::ATOMAND64_DAG: 1453 return SelectAtomic64(Node, X86::ATOMAND6432); 1454 case X86ISD::ATOMSWAP64_DAG: 1455 return SelectAtomic64(Node, X86::ATOMSWAP6432); 1456 1457 case ISD::SMUL_LOHI: 1458 case ISD::UMUL_LOHI: { 1459 SDValue N0 = Node->getOperand(0); 1460 SDValue N1 = Node->getOperand(1); 1461 1462 bool isSigned = Opcode == ISD::SMUL_LOHI; 1463 if (!isSigned) 1464 switch (NVT.getSimpleVT()) { 1465 default: llvm_unreachable("Unsupported VT!"); 1466 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1467 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1468 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1469 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1470 } 1471 else 1472 switch (NVT.getSimpleVT()) { 1473 default: llvm_unreachable("Unsupported VT!"); 1474 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1475 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1476 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1477 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1478 } 1479 1480 unsigned LoReg, HiReg; 1481 switch (NVT.getSimpleVT()) { 1482 default: llvm_unreachable("Unsupported VT!"); 1483 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1484 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1485 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1486 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1487 } 1488 1489 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1490 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1491 // multiplty is commmutative 1492 if (!foldedLoad) { 1493 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1494 if (foldedLoad) 1495 std::swap(N0, N1); 1496 } 1497 1498 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1499 N0, SDValue()).getValue(1); 1500 1501 if (foldedLoad) { 1502 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1503 InFlag }; 1504 SDNode *CNode = 1505 CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1506 array_lengthof(Ops)); 1507 InFlag = SDValue(CNode, 1); 1508 // Update the chain. 1509 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1510 } else { 1511 InFlag = 1512 SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1513 } 1514 1515 // Copy the low half of the result, if it is needed. 1516 if (!N.getValue(0).use_empty()) { 1517 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1518 LoReg, NVT, InFlag); 1519 InFlag = Result.getValue(2); 1520 ReplaceUses(N.getValue(0), Result); 1521#ifndef NDEBUG 1522 DOUT << std::string(Indent-2, ' ') << "=> "; 1523 DEBUG(Result.getNode()->dump(CurDAG)); 1524 DOUT << "\n"; 1525#endif 1526 } 1527 // Copy the high half of the result, if it is needed. 1528 if (!N.getValue(1).use_empty()) { 1529 SDValue Result; 1530 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1531 // Prevent use of AH in a REX instruction by referencing AX instead. 1532 // Shift it down 8 bits. 1533 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1534 X86::AX, MVT::i16, InFlag); 1535 InFlag = Result.getValue(2); 1536 Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, 1537 Result, 1538 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1539 // Then truncate it down to i8. 1540 SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); 1541 Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, 1542 MVT::i8, Result, SRIdx), 0); 1543 } else { 1544 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1545 HiReg, NVT, InFlag); 1546 InFlag = Result.getValue(2); 1547 } 1548 ReplaceUses(N.getValue(1), Result); 1549#ifndef NDEBUG 1550 DOUT << std::string(Indent-2, ' ') << "=> "; 1551 DEBUG(Result.getNode()->dump(CurDAG)); 1552 DOUT << "\n"; 1553#endif 1554 } 1555 1556#ifndef NDEBUG 1557 Indent -= 2; 1558#endif 1559 1560 return NULL; 1561 } 1562 1563 case ISD::SDIVREM: 1564 case ISD::UDIVREM: { 1565 SDValue N0 = Node->getOperand(0); 1566 SDValue N1 = Node->getOperand(1); 1567 1568 bool isSigned = Opcode == ISD::SDIVREM; 1569 if (!isSigned) 1570 switch (NVT.getSimpleVT()) { 1571 default: llvm_unreachable("Unsupported VT!"); 1572 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1573 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1574 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1575 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1576 } 1577 else 1578 switch (NVT.getSimpleVT()) { 1579 default: llvm_unreachable("Unsupported VT!"); 1580 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1581 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1582 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1583 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1584 } 1585 1586 unsigned LoReg, HiReg; 1587 unsigned ClrOpcode, SExtOpcode; 1588 switch (NVT.getSimpleVT()) { 1589 default: llvm_unreachable("Unsupported VT!"); 1590 case MVT::i8: 1591 LoReg = X86::AL; HiReg = X86::AH; 1592 ClrOpcode = 0; 1593 SExtOpcode = X86::CBW; 1594 break; 1595 case MVT::i16: 1596 LoReg = X86::AX; HiReg = X86::DX; 1597 ClrOpcode = X86::MOV16r0; 1598 SExtOpcode = X86::CWD; 1599 break; 1600 case MVT::i32: 1601 LoReg = X86::EAX; HiReg = X86::EDX; 1602 ClrOpcode = X86::MOV32r0; 1603 SExtOpcode = X86::CDQ; 1604 break; 1605 case MVT::i64: 1606 LoReg = X86::RAX; HiReg = X86::RDX; 1607 ClrOpcode = X86::MOV64r0; 1608 SExtOpcode = X86::CQO; 1609 break; 1610 } 1611 1612 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1613 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1614 bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1615 1616 SDValue InFlag; 1617 if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1618 // Special case for div8, just use a move with zero extension to AX to 1619 // clear the upper 8 bits (AH). 1620 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1621 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1622 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1623 Move = 1624 SDValue(CurDAG->getTargetNode(X86::MOVZX16rm8, dl, MVT::i16, 1625 MVT::Other, Ops, 1626 array_lengthof(Ops)), 0); 1627 Chain = Move.getValue(1); 1628 ReplaceUses(N0.getValue(1), Chain); 1629 } else { 1630 Move = 1631 SDValue(CurDAG->getTargetNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1632 Chain = CurDAG->getEntryNode(); 1633 } 1634 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1635 InFlag = Chain.getValue(1); 1636 } else { 1637 InFlag = 1638 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1639 LoReg, N0, SDValue()).getValue(1); 1640 if (isSigned && !signBitIsZero) { 1641 // Sign extend the low part into the high part. 1642 InFlag = 1643 SDValue(CurDAG->getTargetNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1644 } else { 1645 // Zero out the high part, effectively zero extending the input. 1646 SDValue ClrNode = SDValue(CurDAG->getTargetNode(ClrOpcode, dl, NVT), 1647 0); 1648 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, HiReg, 1649 ClrNode, InFlag).getValue(1); 1650 } 1651 } 1652 1653 if (foldedLoad) { 1654 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1655 InFlag }; 1656 SDNode *CNode = 1657 CurDAG->getTargetNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1658 array_lengthof(Ops)); 1659 InFlag = SDValue(CNode, 1); 1660 // Update the chain. 1661 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1662 } else { 1663 InFlag = 1664 SDValue(CurDAG->getTargetNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1665 } 1666 1667 // Copy the division (low) result, if it is needed. 1668 if (!N.getValue(0).use_empty()) { 1669 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1670 LoReg, NVT, InFlag); 1671 InFlag = Result.getValue(2); 1672 ReplaceUses(N.getValue(0), Result); 1673#ifndef NDEBUG 1674 DOUT << std::string(Indent-2, ' ') << "=> "; 1675 DEBUG(Result.getNode()->dump(CurDAG)); 1676 DOUT << "\n"; 1677#endif 1678 } 1679 // Copy the remainder (high) result, if it is needed. 1680 if (!N.getValue(1).use_empty()) { 1681 SDValue Result; 1682 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1683 // Prevent use of AH in a REX instruction by referencing AX instead. 1684 // Shift it down 8 bits. 1685 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1686 X86::AX, MVT::i16, InFlag); 1687 InFlag = Result.getValue(2); 1688 Result = SDValue(CurDAG->getTargetNode(X86::SHR16ri, dl, MVT::i16, 1689 Result, 1690 CurDAG->getTargetConstant(8, MVT::i8)), 1691 0); 1692 // Then truncate it down to i8. 1693 SDValue SRIdx = CurDAG->getTargetConstant(X86::SUBREG_8BIT, MVT::i32); 1694 Result = SDValue(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, dl, 1695 MVT::i8, Result, SRIdx), 0); 1696 } else { 1697 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1698 HiReg, NVT, InFlag); 1699 InFlag = Result.getValue(2); 1700 } 1701 ReplaceUses(N.getValue(1), Result); 1702#ifndef NDEBUG 1703 DOUT << std::string(Indent-2, ' ') << "=> "; 1704 DEBUG(Result.getNode()->dump(CurDAG)); 1705 DOUT << "\n"; 1706#endif 1707 } 1708 1709#ifndef NDEBUG 1710 Indent -= 2; 1711#endif 1712 1713 return NULL; 1714 } 1715 1716 case ISD::DECLARE: { 1717 // Handle DECLARE nodes here because the second operand may have been 1718 // wrapped in X86ISD::Wrapper. 1719 SDValue Chain = Node->getOperand(0); 1720 SDValue N1 = Node->getOperand(1); 1721 SDValue N2 = Node->getOperand(2); 1722 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1); 1723 1724 // FIXME: We need to handle this for VLAs. 1725 if (!FINode) { 1726 ReplaceUses(N.getValue(0), Chain); 1727 return NULL; 1728 } 1729 1730 if (N2.getOpcode() == ISD::ADD && 1731 N2.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg) 1732 N2 = N2.getOperand(1); 1733 1734 // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled 1735 // somehow, just ignore it. 1736 if (N2.getOpcode() != X86ISD::Wrapper && 1737 N2.getOpcode() != X86ISD::WrapperRIP) { 1738 ReplaceUses(N.getValue(0), Chain); 1739 return NULL; 1740 } 1741 GlobalAddressSDNode *GVNode = 1742 dyn_cast<GlobalAddressSDNode>(N2.getOperand(0)); 1743 if (GVNode == 0) { 1744 ReplaceUses(N.getValue(0), Chain); 1745 return NULL; 1746 } 1747 SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(), 1748 TLI.getPointerTy()); 1749 SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GVNode->getGlobal(), 1750 TLI.getPointerTy()); 1751 SDValue Ops[] = { Tmp1, Tmp2, Chain }; 1752 return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl, 1753 MVT::Other, Ops, 1754 array_lengthof(Ops)); 1755 } 1756 } 1757 1758 SDNode *ResNode = SelectCode(N); 1759 1760#ifndef NDEBUG 1761 DOUT << std::string(Indent-2, ' ') << "=> "; 1762 if (ResNode == NULL || ResNode == N.getNode()) 1763 DEBUG(N.getNode()->dump(CurDAG)); 1764 else 1765 DEBUG(ResNode->dump(CurDAG)); 1766 DOUT << "\n"; 1767 Indent -= 2; 1768#endif 1769 1770 return ResNode; 1771} 1772 1773bool X86DAGToDAGISel:: 1774SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 1775 std::vector<SDValue> &OutOps) { 1776 SDValue Op0, Op1, Op2, Op3, Op4; 1777 switch (ConstraintCode) { 1778 case 'o': // offsetable ?? 1779 case 'v': // not offsetable ?? 1780 default: return true; 1781 case 'm': // memory 1782 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3, Op4)) 1783 return true; 1784 break; 1785 } 1786 1787 OutOps.push_back(Op0); 1788 OutOps.push_back(Op1); 1789 OutOps.push_back(Op2); 1790 OutOps.push_back(Op3); 1791 OutOps.push_back(Op4); 1792 return false; 1793} 1794 1795/// createX86ISelDag - This pass converts a legalized DAG into a 1796/// X86-specific DAG, ready for instruction scheduling. 1797/// 1798FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 1799 llvm::CodeGenOpt::Level OptLevel) { 1800 return new X86DAGToDAGISel(TM, OptLevel); 1801} 1802