X86ISelDAGToDAG.cpp revision 17aa68055beed6faa48ca3a995c5b6fdf5092fd4
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86RegisterInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/Instructions.h" 23#include "llvm/Intrinsics.h" 24#include "llvm/Support/CFG.h" 25#include "llvm/Type.h" 26#include "llvm/CodeGen/MachineConstantPool.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/CodeGen/SelectionDAGISel.h" 32#include "llvm/Target/TargetMachine.h" 33#include "llvm/Target/TargetOptions.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/ErrorHandling.h" 36#include "llvm/Support/MathExtras.h" 37#include "llvm/Support/raw_ostream.h" 38#include "llvm/ADT/SmallPtrSet.h" 39#include "llvm/ADT/Statistic.h" 40using namespace llvm; 41 42STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 43 44//===----------------------------------------------------------------------===// 45// Pattern Matcher Implementation 46//===----------------------------------------------------------------------===// 47 48namespace { 49 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 50 /// SDValue's instead of register numbers for the leaves of the matched 51 /// tree. 52 struct X86ISelAddressMode { 53 enum { 54 RegBase, 55 FrameIndexBase 56 } BaseType; 57 58 // This is really a union, discriminated by BaseType! 59 SDValue Base_Reg; 60 int Base_FrameIndex; 61 62 unsigned Scale; 63 SDValue IndexReg; 64 int32_t Disp; 65 SDValue Segment; 66 const GlobalValue *GV; 67 const Constant *CP; 68 const BlockAddress *BlockAddr; 69 const char *ES; 70 int JT; 71 unsigned Align; // CP alignment. 72 unsigned char SymbolFlags; // X86II::MO_* 73 74 X86ISelAddressMode() 75 : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), 76 Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), 77 SymbolFlags(X86II::MO_NO_FLAG) { 78 } 79 80 bool hasSymbolicDisplacement() const { 81 return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; 82 } 83 84 bool hasBaseOrIndexReg() const { 85 return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; 86 } 87 88 /// isRIPRelative - Return true if this addressing mode is already RIP 89 /// relative. 90 bool isRIPRelative() const { 91 if (BaseType != RegBase) return false; 92 if (RegisterSDNode *RegNode = 93 dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) 94 return RegNode->getReg() == X86::RIP; 95 return false; 96 } 97 98 void setBaseReg(SDValue Reg) { 99 BaseType = RegBase; 100 Base_Reg = Reg; 101 } 102 103 void dump() { 104 dbgs() << "X86ISelAddressMode " << this << '\n'; 105 dbgs() << "Base_Reg "; 106 if (Base_Reg.getNode() != 0) 107 Base_Reg.getNode()->dump(); 108 else 109 dbgs() << "nul"; 110 dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' 111 << " Scale" << Scale << '\n' 112 << "IndexReg "; 113 if (IndexReg.getNode() != 0) 114 IndexReg.getNode()->dump(); 115 else 116 dbgs() << "nul"; 117 dbgs() << " Disp " << Disp << '\n' 118 << "GV "; 119 if (GV) 120 GV->dump(); 121 else 122 dbgs() << "nul"; 123 dbgs() << " CP "; 124 if (CP) 125 CP->dump(); 126 else 127 dbgs() << "nul"; 128 dbgs() << '\n' 129 << "ES "; 130 if (ES) 131 dbgs() << ES; 132 else 133 dbgs() << "nul"; 134 dbgs() << " JT" << JT << " Align" << Align << '\n'; 135 } 136 }; 137} 138 139namespace { 140 //===--------------------------------------------------------------------===// 141 /// ISel - X86 specific code to select X86 machine instructions for 142 /// SelectionDAG operations. 143 /// 144 class X86DAGToDAGISel : public SelectionDAGISel { 145 /// X86Lowering - This object fully describes how to lower LLVM code to an 146 /// X86-specific SelectionDAG. 147 const X86TargetLowering &X86Lowering; 148 149 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 150 /// make the right decision when generating code for different targets. 151 const X86Subtarget *Subtarget; 152 153 /// OptForSize - If true, selector should try to optimize for code size 154 /// instead of performance. 155 bool OptForSize; 156 157 public: 158 explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) 159 : SelectionDAGISel(tm, OptLevel), 160 X86Lowering(*tm.getTargetLowering()), 161 Subtarget(&tm.getSubtarget<X86Subtarget>()), 162 OptForSize(false) {} 163 164 virtual const char *getPassName() const { 165 return "X86 DAG->DAG Instruction Selection"; 166 } 167 168 virtual void EmitFunctionEntryCode(); 169 170 virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; 171 172 virtual void PreprocessISelDAG(); 173 174 inline bool immSext8(SDNode *N) const { 175 return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue()); 176 } 177 178 // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit 179 // sign extended field. 180 inline bool i64immSExt32(SDNode *N) const { 181 uint64_t v = cast<ConstantSDNode>(N)->getZExtValue(); 182 return (int64_t)v == (int32_t)v; 183 } 184 185// Include the pieces autogenerated from the target description. 186#include "X86GenDAGISel.inc" 187 188 private: 189 SDNode *Select(SDNode *N); 190 SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); 191 SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT); 192 193 bool MatchSegmentBaseAddress(SDValue N, X86ISelAddressMode &AM); 194 bool MatchLoad(SDValue N, X86ISelAddressMode &AM); 195 bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); 196 bool MatchAddress(SDValue N, X86ISelAddressMode &AM); 197 bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 198 unsigned Depth); 199 bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); 200 bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, 201 SDValue &Scale, SDValue &Index, SDValue &Disp, 202 SDValue &Segment); 203 bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, 204 SDValue &Scale, SDValue &Index, SDValue &Disp, 205 SDValue &Segment); 206 bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, 207 SDValue &Scale, SDValue &Index, SDValue &Disp, 208 SDValue &Segment); 209 bool SelectScalarSSELoad(SDNode *Root, SDValue N, 210 SDValue &Base, SDValue &Scale, 211 SDValue &Index, SDValue &Disp, 212 SDValue &Segment, 213 SDValue &NodeWithChain); 214 215 bool TryFoldLoad(SDNode *P, SDValue N, 216 SDValue &Base, SDValue &Scale, 217 SDValue &Index, SDValue &Disp, 218 SDValue &Segment); 219 220 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 221 /// inline asm expressions. 222 virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, 223 char ConstraintCode, 224 std::vector<SDValue> &OutOps); 225 226 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 227 228 inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 229 SDValue &Scale, SDValue &Index, 230 SDValue &Disp, SDValue &Segment) { 231 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 232 CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : 233 AM.Base_Reg; 234 Scale = getI8Imm(AM.Scale); 235 Index = AM.IndexReg; 236 // These are 32-bit even in 64-bit mode since RIP relative offset 237 // is 32-bit. 238 if (AM.GV) 239 Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), 240 MVT::i32, AM.Disp, 241 AM.SymbolFlags); 242 else if (AM.CP) 243 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, 244 AM.Align, AM.Disp, AM.SymbolFlags); 245 else if (AM.ES) 246 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); 247 else if (AM.JT != -1) 248 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); 249 else if (AM.BlockAddr) 250 Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, 251 true, AM.SymbolFlags); 252 else 253 Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); 254 255 if (AM.Segment.getNode()) 256 Segment = AM.Segment; 257 else 258 Segment = CurDAG->getRegister(0, MVT::i32); 259 } 260 261 /// getI8Imm - Return a target constant with the specified value, of type 262 /// i8. 263 inline SDValue getI8Imm(unsigned Imm) { 264 return CurDAG->getTargetConstant(Imm, MVT::i8); 265 } 266 267 /// getI32Imm - Return a target constant with the specified value, of type 268 /// i32. 269 inline SDValue getI32Imm(unsigned Imm) { 270 return CurDAG->getTargetConstant(Imm, MVT::i32); 271 } 272 273 /// getGlobalBaseReg - Return an SDNode that returns the value of 274 /// the global base register. Output instructions required to 275 /// initialize the global base register, if necessary. 276 /// 277 SDNode *getGlobalBaseReg(); 278 279 /// getTargetMachine - Return a reference to the TargetMachine, casted 280 /// to the target-specific type. 281 const X86TargetMachine &getTargetMachine() { 282 return static_cast<const X86TargetMachine &>(TM); 283 } 284 285 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted 286 /// to the target-specific type. 287 const X86InstrInfo *getInstrInfo() { 288 return getTargetMachine().getInstrInfo(); 289 } 290 }; 291} 292 293 294bool 295X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { 296 if (OptLevel == CodeGenOpt::None) return false; 297 298 if (!N.hasOneUse()) 299 return false; 300 301 if (N.getOpcode() != ISD::LOAD) 302 return true; 303 304 // If N is a load, do additional profitability checks. 305 if (U == Root) { 306 switch (U->getOpcode()) { 307 default: break; 308 case X86ISD::ADD: 309 case X86ISD::SUB: 310 case X86ISD::AND: 311 case X86ISD::XOR: 312 case X86ISD::OR: 313 case ISD::ADD: 314 case ISD::ADDC: 315 case ISD::ADDE: 316 case ISD::AND: 317 case ISD::OR: 318 case ISD::XOR: { 319 SDValue Op1 = U->getOperand(1); 320 321 // If the other operand is a 8-bit immediate we should fold the immediate 322 // instead. This reduces code size. 323 // e.g. 324 // movl 4(%esp), %eax 325 // addl $4, %eax 326 // vs. 327 // movl $4, %eax 328 // addl 4(%esp), %eax 329 // The former is 2 bytes shorter. In case where the increment is 1, then 330 // the saving can be 4 bytes (by using incl %eax). 331 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) 332 if (Imm->getAPIntValue().isSignedIntN(8)) 333 return false; 334 335 // If the other operand is a TLS address, we should fold it instead. 336 // This produces 337 // movl %gs:0, %eax 338 // leal i@NTPOFF(%eax), %eax 339 // instead of 340 // movl $i@NTPOFF, %eax 341 // addl %gs:0, %eax 342 // if the block also has an access to a second TLS address this will save 343 // a load. 344 // FIXME: This is probably also true for non TLS addresses. 345 if (Op1.getOpcode() == X86ISD::Wrapper) { 346 SDValue Val = Op1.getOperand(0); 347 if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) 348 return false; 349 } 350 } 351 } 352 } 353 354 return true; 355} 356 357/// MoveBelowCallOrigChain - Replace the original chain operand of the call with 358/// load's chain operand and move load below the call's chain operand. 359static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, 360 SDValue Call, SDValue OrigChain) { 361 SmallVector<SDValue, 8> Ops; 362 SDValue Chain = OrigChain.getOperand(0); 363 if (Chain.getNode() == Load.getNode()) 364 Ops.push_back(Load.getOperand(0)); 365 else { 366 assert(Chain.getOpcode() == ISD::TokenFactor && 367 "Unexpected chain operand"); 368 for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) 369 if (Chain.getOperand(i).getNode() == Load.getNode()) 370 Ops.push_back(Load.getOperand(0)); 371 else 372 Ops.push_back(Chain.getOperand(i)); 373 SDValue NewChain = 374 CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(), 375 MVT::Other, &Ops[0], Ops.size()); 376 Ops.clear(); 377 Ops.push_back(NewChain); 378 } 379 for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) 380 Ops.push_back(OrigChain.getOperand(i)); 381 CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); 382 CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), 383 Load.getOperand(1), Load.getOperand(2)); 384 Ops.clear(); 385 Ops.push_back(SDValue(Load.getNode(), 1)); 386 for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) 387 Ops.push_back(Call.getOperand(i)); 388 CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); 389} 390 391/// isCalleeLoad - Return true if call address is a load and it can be 392/// moved below CALLSEQ_START and the chains leading up to the call. 393/// Return the CALLSEQ_START by reference as a second output. 394/// In the case of a tail call, there isn't a callseq node between the call 395/// chain and the load. 396static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { 397 if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) 398 return false; 399 LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); 400 if (!LD || 401 LD->isVolatile() || 402 LD->getAddressingMode() != ISD::UNINDEXED || 403 LD->getExtensionType() != ISD::NON_EXTLOAD) 404 return false; 405 406 // Now let's find the callseq_start. 407 while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { 408 if (!Chain.hasOneUse()) 409 return false; 410 Chain = Chain.getOperand(0); 411 } 412 413 if (!Chain.getNumOperands()) 414 return false; 415 if (Chain.getOperand(0).getNode() == Callee.getNode()) 416 return true; 417 if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && 418 Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && 419 Callee.getValue(1).hasOneUse()) 420 return true; 421 return false; 422} 423 424void X86DAGToDAGISel::PreprocessISelDAG() { 425 // OptForSize is used in pattern predicates that isel is matching. 426 OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); 427 428 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 429 E = CurDAG->allnodes_end(); I != E; ) { 430 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 431 432 if (OptLevel != CodeGenOpt::None && 433 (N->getOpcode() == X86ISD::CALL || 434 N->getOpcode() == X86ISD::TC_RETURN)) { 435 /// Also try moving call address load from outside callseq_start to just 436 /// before the call to allow it to be folded. 437 /// 438 /// [Load chain] 439 /// ^ 440 /// | 441 /// [Load] 442 /// ^ ^ 443 /// | | 444 /// / \-- 445 /// / | 446 ///[CALLSEQ_START] | 447 /// ^ | 448 /// | | 449 /// [LOAD/C2Reg] | 450 /// | | 451 /// \ / 452 /// \ / 453 /// [CALL] 454 bool HasCallSeq = N->getOpcode() == X86ISD::CALL; 455 SDValue Chain = N->getOperand(0); 456 SDValue Load = N->getOperand(1); 457 if (!isCalleeLoad(Load, Chain, HasCallSeq)) 458 continue; 459 MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); 460 ++NumLoadMoved; 461 continue; 462 } 463 464 // Lower fpround and fpextend nodes that target the FP stack to be store and 465 // load to the stack. This is a gross hack. We would like to simply mark 466 // these as being illegal, but when we do that, legalize produces these when 467 // it expands calls, then expands these in the same legalize pass. We would 468 // like dag combine to be able to hack on these between the call expansion 469 // and the node legalization. As such this pass basically does "really 470 // late" legalization of these inline with the X86 isel pass. 471 // FIXME: This should only happen when not compiled with -O0. 472 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 473 continue; 474 475 // If the source and destination are SSE registers, then this is a legal 476 // conversion that should not be lowered. 477 EVT SrcVT = N->getOperand(0).getValueType(); 478 EVT DstVT = N->getValueType(0); 479 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 480 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 481 if (SrcIsSSE && DstIsSSE) 482 continue; 483 484 if (!SrcIsSSE && !DstIsSSE) { 485 // If this is an FPStack extension, it is a noop. 486 if (N->getOpcode() == ISD::FP_EXTEND) 487 continue; 488 // If this is a value-preserving FPStack truncation, it is a noop. 489 if (N->getConstantOperandVal(1)) 490 continue; 491 } 492 493 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 494 // FPStack has extload and truncstore. SSE can fold direct loads into other 495 // operations. Based on this, decide what we want to do. 496 EVT MemVT; 497 if (N->getOpcode() == ISD::FP_ROUND) 498 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 499 else 500 MemVT = SrcIsSSE ? SrcVT : DstVT; 501 502 SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); 503 DebugLoc dl = N->getDebugLoc(); 504 505 // FIXME: optimize the case where the src/dest is a load or store? 506 SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, 507 N->getOperand(0), 508 MemTmp, NULL, 0, MemVT, 509 false, false, 0); 510 SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp, 511 NULL, 0, MemVT, false, false, 0); 512 513 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 514 // extload we created. This will cause general havok on the dag because 515 // anything below the conversion could be folded into other existing nodes. 516 // To avoid invalidating 'I', back it up to the convert node. 517 --I; 518 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 519 520 // Now that we did that, the node is dead. Increment the iterator to the 521 // next node to process, then delete N. 522 ++I; 523 CurDAG->DeleteNode(N); 524 } 525} 526 527 528/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 529/// the main function. 530void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 531 MachineFrameInfo *MFI) { 532 const TargetInstrInfo *TII = TM.getInstrInfo(); 533 if (Subtarget->isTargetCygMing()) 534 BuildMI(BB, DebugLoc(), 535 TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 536} 537 538void X86DAGToDAGISel::EmitFunctionEntryCode() { 539 // If this is main, emit special code for main. 540 if (const Function *Fn = MF->getFunction()) 541 if (Fn->hasExternalLinkage() && Fn->getName() == "main") 542 EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); 543} 544 545 546bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N, 547 X86ISelAddressMode &AM) { 548 assert(N.getOpcode() == X86ISD::SegmentBaseAddress); 549 SDValue Segment = N.getOperand(0); 550 551 if (AM.Segment.getNode() == 0) { 552 AM.Segment = Segment; 553 return false; 554 } 555 556 return true; 557} 558 559bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) { 560 // This optimization is valid because the GNU TLS model defines that 561 // gs:0 (or fs:0 on X86-64) contains its own address. 562 // For more information see http://people.redhat.com/drepper/tls.pdf 563 564 SDValue Address = N.getOperand(1); 565 if (Address.getOpcode() == X86ISD::SegmentBaseAddress && 566 !MatchSegmentBaseAddress (Address, AM)) 567 return false; 568 569 return true; 570} 571 572/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes 573/// into an addressing mode. These wrap things that will resolve down into a 574/// symbol reference. If no match is possible, this returns true, otherwise it 575/// returns false. 576bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { 577 // If the addressing mode already has a symbol as the displacement, we can 578 // never match another symbol. 579 if (AM.hasSymbolicDisplacement()) 580 return true; 581 582 SDValue N0 = N.getOperand(0); 583 CodeModel::Model M = TM.getCodeModel(); 584 585 // Handle X86-64 rip-relative addresses. We check this before checking direct 586 // folding because RIP is preferable to non-RIP accesses. 587 if (Subtarget->is64Bit() && 588 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 589 // they cannot be folded into immediate fields. 590 // FIXME: This can be improved for kernel and other models? 591 (M == CodeModel::Small || M == CodeModel::Kernel) && 592 // Base and index reg must be 0 in order to use %rip as base and lowering 593 // must allow RIP. 594 !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) { 595 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 596 int64_t Offset = AM.Disp + G->getOffset(); 597 if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 598 AM.GV = G->getGlobal(); 599 AM.Disp = Offset; 600 AM.SymbolFlags = G->getTargetFlags(); 601 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 602 int64_t Offset = AM.Disp + CP->getOffset(); 603 if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true; 604 AM.CP = CP->getConstVal(); 605 AM.Align = CP->getAlignment(); 606 AM.Disp = Offset; 607 AM.SymbolFlags = CP->getTargetFlags(); 608 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 609 AM.ES = S->getSymbol(); 610 AM.SymbolFlags = S->getTargetFlags(); 611 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 612 AM.JT = J->getIndex(); 613 AM.SymbolFlags = J->getTargetFlags(); 614 } else { 615 AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 616 AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 617 } 618 619 if (N.getOpcode() == X86ISD::WrapperRIP) 620 AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); 621 return false; 622 } 623 624 // Handle the case when globals fit in our immediate field: This is true for 625 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit 626 // mode, this results in a non-RIP-relative computation. 627 if (!Subtarget->is64Bit() || 628 ((M == CodeModel::Small || M == CodeModel::Kernel) && 629 TM.getRelocationModel() == Reloc::Static)) { 630 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 631 AM.GV = G->getGlobal(); 632 AM.Disp += G->getOffset(); 633 AM.SymbolFlags = G->getTargetFlags(); 634 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 635 AM.CP = CP->getConstVal(); 636 AM.Align = CP->getAlignment(); 637 AM.Disp += CP->getOffset(); 638 AM.SymbolFlags = CP->getTargetFlags(); 639 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { 640 AM.ES = S->getSymbol(); 641 AM.SymbolFlags = S->getTargetFlags(); 642 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 643 AM.JT = J->getIndex(); 644 AM.SymbolFlags = J->getTargetFlags(); 645 } else { 646 AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); 647 AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); 648 } 649 return false; 650 } 651 652 return true; 653} 654 655/// MatchAddress - Add the specified node to the specified addressing mode, 656/// returning true if it cannot be done. This just pattern matches for the 657/// addressing mode. 658bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { 659 if (MatchAddressRecursively(N, AM, 0)) 660 return true; 661 662 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has 663 // a smaller encoding and avoids a scaled-index. 664 if (AM.Scale == 2 && 665 AM.BaseType == X86ISelAddressMode::RegBase && 666 AM.Base_Reg.getNode() == 0) { 667 AM.Base_Reg = AM.IndexReg; 668 AM.Scale = 1; 669 } 670 671 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, 672 // because it has a smaller encoding. 673 // TODO: Which other code models can use this? 674 if (TM.getCodeModel() == CodeModel::Small && 675 Subtarget->is64Bit() && 676 AM.Scale == 1 && 677 AM.BaseType == X86ISelAddressMode::RegBase && 678 AM.Base_Reg.getNode() == 0 && 679 AM.IndexReg.getNode() == 0 && 680 AM.SymbolFlags == X86II::MO_NO_FLAG && 681 AM.hasSymbolicDisplacement()) 682 AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); 683 684 return false; 685} 686 687/// isLogicallyAddWithConstant - Return true if this node is semantically an 688/// add of a value with a constantint. 689static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { 690 // Check for (add x, Cst) 691 if (V->getOpcode() == ISD::ADD) 692 return isa<ConstantSDNode>(V->getOperand(1)); 693 694 // Check for (or x, Cst), where Cst & x == 0. 695 if (V->getOpcode() != ISD::OR || 696 !isa<ConstantSDNode>(V->getOperand(1))) 697 return false; 698 699 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 700 ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1)); 701 702 // Check to see if the LHS & C is zero. 703 return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue()); 704} 705 706bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, 707 unsigned Depth) { 708 bool is64Bit = Subtarget->is64Bit(); 709 DebugLoc dl = N.getDebugLoc(); 710 DEBUG({ 711 dbgs() << "MatchAddress: "; 712 AM.dump(); 713 }); 714 // Limit recursion. 715 if (Depth > 5) 716 return MatchAddressBase(N, AM); 717 718 CodeModel::Model M = TM.getCodeModel(); 719 720 // If this is already a %rip relative address, we can only merge immediates 721 // into it. Instead of handling this in every case, we handle it here. 722 // RIP relative addressing: %rip + 32-bit displacement! 723 if (AM.isRIPRelative()) { 724 // FIXME: JumpTable and ExternalSymbol address currently don't like 725 // displacements. It isn't very important, but this should be fixed for 726 // consistency. 727 if (!AM.ES && AM.JT != -1) return true; 728 729 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) { 730 int64_t Val = AM.Disp + Cst->getSExtValue(); 731 if (X86::isOffsetSuitableForCodeModel(Val, M, 732 AM.hasSymbolicDisplacement())) { 733 AM.Disp = Val; 734 return false; 735 } 736 } 737 return true; 738 } 739 740 switch (N.getOpcode()) { 741 default: break; 742 case ISD::Constant: { 743 uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); 744 if (!is64Bit || 745 X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M, 746 AM.hasSymbolicDisplacement())) { 747 AM.Disp += Val; 748 return false; 749 } 750 break; 751 } 752 753 case X86ISD::SegmentBaseAddress: 754 if (!MatchSegmentBaseAddress(N, AM)) 755 return false; 756 break; 757 758 case X86ISD::Wrapper: 759 case X86ISD::WrapperRIP: 760 if (!MatchWrapper(N, AM)) 761 return false; 762 break; 763 764 case ISD::LOAD: 765 if (!MatchLoad(N, AM)) 766 return false; 767 break; 768 769 case ISD::FrameIndex: 770 if (AM.BaseType == X86ISelAddressMode::RegBase 771 && AM.Base_Reg.getNode() == 0) { 772 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 773 AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 774 return false; 775 } 776 break; 777 778 case ISD::SHL: 779 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) 780 break; 781 782 if (ConstantSDNode 783 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) { 784 unsigned Val = CN->getZExtValue(); 785 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so 786 // that the base operand remains free for further matching. If 787 // the base doesn't end up getting used, a post-processing step 788 // in MatchAddress turns (,x,2) into (x,x), which is cheaper. 789 if (Val == 1 || Val == 2 || Val == 3) { 790 AM.Scale = 1 << Val; 791 SDValue ShVal = N.getNode()->getOperand(0); 792 793 // Okay, we know that we have a scale by now. However, if the scaled 794 // value is an add of something and a constant, we can fold the 795 // constant into the disp field here. 796 if (isLogicallyAddWithConstant(ShVal, CurDAG)) { 797 AM.IndexReg = ShVal.getNode()->getOperand(0); 798 ConstantSDNode *AddVal = 799 cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); 800 uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val); 801 if (!is64Bit || 802 X86::isOffsetSuitableForCodeModel(Disp, M, 803 AM.hasSymbolicDisplacement())) 804 AM.Disp = Disp; 805 else 806 AM.IndexReg = ShVal; 807 } else { 808 AM.IndexReg = ShVal; 809 } 810 return false; 811 } 812 break; 813 } 814 815 case ISD::SMUL_LOHI: 816 case ISD::UMUL_LOHI: 817 // A mul_lohi where we need the low part can be folded as a plain multiply. 818 if (N.getResNo() != 0) break; 819 // FALL THROUGH 820 case ISD::MUL: 821 case X86ISD::MUL_IMM: 822 // X*[3,5,9] -> X+X*[2,4,8] 823 if (AM.BaseType == X86ISelAddressMode::RegBase && 824 AM.Base_Reg.getNode() == 0 && 825 AM.IndexReg.getNode() == 0) { 826 if (ConstantSDNode 827 *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) 828 if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || 829 CN->getZExtValue() == 9) { 830 AM.Scale = unsigned(CN->getZExtValue())-1; 831 832 SDValue MulVal = N.getNode()->getOperand(0); 833 SDValue Reg; 834 835 // Okay, we know that we have a scale by now. However, if the scaled 836 // value is an add of something and a constant, we can fold the 837 // constant into the disp field here. 838 if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 839 isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) { 840 Reg = MulVal.getNode()->getOperand(0); 841 ConstantSDNode *AddVal = 842 cast<ConstantSDNode>(MulVal.getNode()->getOperand(1)); 843 uint64_t Disp = AM.Disp + AddVal->getSExtValue() * 844 CN->getZExtValue(); 845 if (!is64Bit || 846 X86::isOffsetSuitableForCodeModel(Disp, M, 847 AM.hasSymbolicDisplacement())) 848 AM.Disp = Disp; 849 else 850 Reg = N.getNode()->getOperand(0); 851 } else { 852 Reg = N.getNode()->getOperand(0); 853 } 854 855 AM.IndexReg = AM.Base_Reg = Reg; 856 return false; 857 } 858 } 859 break; 860 861 case ISD::SUB: { 862 // Given A-B, if A can be completely folded into the address and 863 // the index field with the index field unused, use -B as the index. 864 // This is a win if a has multiple parts that can be folded into 865 // the address. Also, this saves a mov if the base register has 866 // other uses, since it avoids a two-address sub instruction, however 867 // it costs an additional mov if the index register has other uses. 868 869 // Add an artificial use to this node so that we can keep track of 870 // it if it gets CSE'd with a different node. 871 HandleSDNode Handle(N); 872 873 // Test if the LHS of the sub can be folded. 874 X86ISelAddressMode Backup = AM; 875 if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { 876 AM = Backup; 877 break; 878 } 879 // Test if the index field is free for use. 880 if (AM.IndexReg.getNode() || AM.isRIPRelative()) { 881 AM = Backup; 882 break; 883 } 884 885 int Cost = 0; 886 SDValue RHS = Handle.getValue().getNode()->getOperand(1); 887 // If the RHS involves a register with multiple uses, this 888 // transformation incurs an extra mov, due to the neg instruction 889 // clobbering its operand. 890 if (!RHS.getNode()->hasOneUse() || 891 RHS.getNode()->getOpcode() == ISD::CopyFromReg || 892 RHS.getNode()->getOpcode() == ISD::TRUNCATE || 893 RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || 894 (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && 895 RHS.getNode()->getOperand(0).getValueType() == MVT::i32)) 896 ++Cost; 897 // If the base is a register with multiple uses, this 898 // transformation may save a mov. 899 if ((AM.BaseType == X86ISelAddressMode::RegBase && 900 AM.Base_Reg.getNode() && 901 !AM.Base_Reg.getNode()->hasOneUse()) || 902 AM.BaseType == X86ISelAddressMode::FrameIndexBase) 903 --Cost; 904 // If the folded LHS was interesting, this transformation saves 905 // address arithmetic. 906 if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + 907 ((AM.Disp != 0) && (Backup.Disp == 0)) + 908 (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) 909 --Cost; 910 // If it doesn't look like it may be an overall win, don't do it. 911 if (Cost >= 0) { 912 AM = Backup; 913 break; 914 } 915 916 // Ok, the transformation is legal and appears profitable. Go for it. 917 SDValue Zero = CurDAG->getConstant(0, N.getValueType()); 918 SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); 919 AM.IndexReg = Neg; 920 AM.Scale = 1; 921 922 // Insert the new nodes into the topological ordering. 923 if (Zero.getNode()->getNodeId() == -1 || 924 Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { 925 CurDAG->RepositionNode(N.getNode(), Zero.getNode()); 926 Zero.getNode()->setNodeId(N.getNode()->getNodeId()); 927 } 928 if (Neg.getNode()->getNodeId() == -1 || 929 Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { 930 CurDAG->RepositionNode(N.getNode(), Neg.getNode()); 931 Neg.getNode()->setNodeId(N.getNode()->getNodeId()); 932 } 933 return false; 934 } 935 936 case ISD::ADD: { 937 // Add an artificial use to this node so that we can keep track of 938 // it if it gets CSE'd with a different node. 939 HandleSDNode Handle(N); 940 SDValue LHS = Handle.getValue().getNode()->getOperand(0); 941 SDValue RHS = Handle.getValue().getNode()->getOperand(1); 942 943 X86ISelAddressMode Backup = AM; 944 if (!MatchAddressRecursively(LHS, AM, Depth+1) && 945 !MatchAddressRecursively(RHS, AM, Depth+1)) 946 return false; 947 AM = Backup; 948 LHS = Handle.getValue().getNode()->getOperand(0); 949 RHS = Handle.getValue().getNode()->getOperand(1); 950 951 // Try again after commuting the operands. 952 if (!MatchAddressRecursively(RHS, AM, Depth+1) && 953 !MatchAddressRecursively(LHS, AM, Depth+1)) 954 return false; 955 AM = Backup; 956 LHS = Handle.getValue().getNode()->getOperand(0); 957 RHS = Handle.getValue().getNode()->getOperand(1); 958 959 // If we couldn't fold both operands into the address at the same time, 960 // see if we can just put each operand into a register and fold at least 961 // the add. 962 if (AM.BaseType == X86ISelAddressMode::RegBase && 963 !AM.Base_Reg.getNode() && 964 !AM.IndexReg.getNode()) { 965 AM.Base_Reg = LHS; 966 AM.IndexReg = RHS; 967 AM.Scale = 1; 968 return false; 969 } 970 break; 971 } 972 973 case ISD::OR: 974 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 975 if (isLogicallyAddWithConstant(N, CurDAG)) { 976 X86ISelAddressMode Backup = AM; 977 ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); 978 uint64_t Offset = CN->getSExtValue(); 979 980 // Start with the LHS as an addr mode. 981 if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && 982 // Address could not have picked a GV address for the displacement. 983 AM.GV == NULL && 984 // On x86-64, the resultant disp must fit in 32-bits. 985 (!is64Bit || 986 X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, 987 AM.hasSymbolicDisplacement()))) { 988 AM.Disp += Offset; 989 return false; 990 } 991 AM = Backup; 992 } 993 break; 994 995 case ISD::AND: { 996 // Perform some heroic transforms on an and of a constant-count shift 997 // with a constant to enable use of the scaled offset field. 998 999 SDValue Shift = N.getOperand(0); 1000 if (Shift.getNumOperands() != 2) break; 1001 1002 // Scale must not be used already. 1003 if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; 1004 1005 SDValue X = Shift.getOperand(0); 1006 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1007 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 1008 if (!C1 || !C2) break; 1009 1010 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This 1011 // allows us to convert the shift and and into an h-register extract and 1012 // a scaled index. 1013 if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) { 1014 unsigned ScaleLog = 8 - C1->getZExtValue(); 1015 if (ScaleLog > 0 && ScaleLog < 4 && 1016 C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) { 1017 SDValue Eight = CurDAG->getConstant(8, MVT::i8); 1018 SDValue Mask = CurDAG->getConstant(0xff, N.getValueType()); 1019 SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1020 X, Eight); 1021 SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(), 1022 Srl, Mask); 1023 SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); 1024 SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1025 And, ShlCount); 1026 1027 // Insert the new nodes into the topological ordering. 1028 if (Eight.getNode()->getNodeId() == -1 || 1029 Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1030 CurDAG->RepositionNode(X.getNode(), Eight.getNode()); 1031 Eight.getNode()->setNodeId(X.getNode()->getNodeId()); 1032 } 1033 if (Mask.getNode()->getNodeId() == -1 || 1034 Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1035 CurDAG->RepositionNode(X.getNode(), Mask.getNode()); 1036 Mask.getNode()->setNodeId(X.getNode()->getNodeId()); 1037 } 1038 if (Srl.getNode()->getNodeId() == -1 || 1039 Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1040 CurDAG->RepositionNode(Shift.getNode(), Srl.getNode()); 1041 Srl.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1042 } 1043 if (And.getNode()->getNodeId() == -1 || 1044 And.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1045 CurDAG->RepositionNode(N.getNode(), And.getNode()); 1046 And.getNode()->setNodeId(N.getNode()->getNodeId()); 1047 } 1048 if (ShlCount.getNode()->getNodeId() == -1 || 1049 ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1050 CurDAG->RepositionNode(X.getNode(), ShlCount.getNode()); 1051 ShlCount.getNode()->setNodeId(N.getNode()->getNodeId()); 1052 } 1053 if (Shl.getNode()->getNodeId() == -1 || 1054 Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1055 CurDAG->RepositionNode(N.getNode(), Shl.getNode()); 1056 Shl.getNode()->setNodeId(N.getNode()->getNodeId()); 1057 } 1058 CurDAG->ReplaceAllUsesWith(N, Shl); 1059 AM.IndexReg = And; 1060 AM.Scale = (1 << ScaleLog); 1061 return false; 1062 } 1063 } 1064 1065 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 1066 // allows us to fold the shift into this addressing mode. 1067 if (Shift.getOpcode() != ISD::SHL) break; 1068 1069 // Not likely to be profitable if either the AND or SHIFT node has more 1070 // than one use (unless all uses are for address computation). Besides, 1071 // isel mechanism requires their node ids to be reused. 1072 if (!N.hasOneUse() || !Shift.hasOneUse()) 1073 break; 1074 1075 // Verify that the shift amount is something we can fold. 1076 unsigned ShiftCst = C1->getZExtValue(); 1077 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 1078 break; 1079 1080 // Get the new AND mask, this folds to a constant. 1081 SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(), 1082 SDValue(C2, 0), SDValue(C1, 0)); 1083 SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 1084 NewANDMask); 1085 SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), 1086 NewAND, SDValue(C1, 0)); 1087 1088 // Insert the new nodes into the topological ordering. 1089 if (C1->getNodeId() > X.getNode()->getNodeId()) { 1090 CurDAG->RepositionNode(X.getNode(), C1); 1091 C1->setNodeId(X.getNode()->getNodeId()); 1092 } 1093 if (NewANDMask.getNode()->getNodeId() == -1 || 1094 NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) { 1095 CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode()); 1096 NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId()); 1097 } 1098 if (NewAND.getNode()->getNodeId() == -1 || 1099 NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) { 1100 CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode()); 1101 NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId()); 1102 } 1103 if (NewSHIFT.getNode()->getNodeId() == -1 || 1104 NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) { 1105 CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode()); 1106 NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); 1107 } 1108 1109 CurDAG->ReplaceAllUsesWith(N, NewSHIFT); 1110 1111 AM.Scale = 1 << ShiftCst; 1112 AM.IndexReg = NewAND; 1113 return false; 1114 } 1115 } 1116 1117 return MatchAddressBase(N, AM); 1118} 1119 1120/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 1121/// specified addressing mode without any further recursion. 1122bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { 1123 // Is the base register already occupied? 1124 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { 1125 // If so, check to see if the scale index register is set. 1126 if (AM.IndexReg.getNode() == 0) { 1127 AM.IndexReg = N; 1128 AM.Scale = 1; 1129 return false; 1130 } 1131 1132 // Otherwise, we cannot select it. 1133 return true; 1134 } 1135 1136 // Default, generate it as a register. 1137 AM.BaseType = X86ISelAddressMode::RegBase; 1138 AM.Base_Reg = N; 1139 return false; 1140} 1141 1142/// SelectAddr - returns true if it is able pattern match an addressing mode. 1143/// It returns the operands which make up the maximal addressing mode it can 1144/// match by reference. 1145bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, 1146 SDValue &Scale, SDValue &Index, 1147 SDValue &Disp, SDValue &Segment) { 1148 X86ISelAddressMode AM; 1149 if (MatchAddress(N, AM)) 1150 return false; 1151 1152 EVT VT = N.getValueType(); 1153 if (AM.BaseType == X86ISelAddressMode::RegBase) { 1154 if (!AM.Base_Reg.getNode()) 1155 AM.Base_Reg = CurDAG->getRegister(0, VT); 1156 } 1157 1158 if (!AM.IndexReg.getNode()) 1159 AM.IndexReg = CurDAG->getRegister(0, VT); 1160 1161 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1162 return true; 1163} 1164 1165/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 1166/// match a load whose top elements are either undef or zeros. The load flavor 1167/// is derived from the type of N, which is either v4f32 or v2f64. 1168/// 1169/// We also return: 1170/// PatternChainNode: this is the matched node that has a chain input and 1171/// output. 1172bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, 1173 SDValue N, SDValue &Base, 1174 SDValue &Scale, SDValue &Index, 1175 SDValue &Disp, SDValue &Segment, 1176 SDValue &PatternNodeWithChain) { 1177 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 1178 PatternNodeWithChain = N.getOperand(0); 1179 if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && 1180 PatternNodeWithChain.hasOneUse() && 1181 IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1182 IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1183 LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); 1184 if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) 1185 return false; 1186 return true; 1187 } 1188 } 1189 1190 // Also handle the case where we explicitly require zeros in the top 1191 // elements. This is a vector shuffle from the zero vector. 1192 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() && 1193 // Check to see if the top elements are all zeros (or bitcast of zeros). 1194 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 1195 N.getOperand(0).getNode()->hasOneUse() && 1196 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && 1197 N.getOperand(0).getOperand(0).hasOneUse() && 1198 IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && 1199 IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { 1200 // Okay, this is a zero extending load. Fold it. 1201 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1202 if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) 1203 return false; 1204 PatternNodeWithChain = SDValue(LD, 0); 1205 return true; 1206 } 1207 return false; 1208} 1209 1210 1211/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1212/// mode it matches can be cost effectively emitted as an LEA instruction. 1213bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, 1214 SDValue &Base, SDValue &Scale, 1215 SDValue &Index, SDValue &Disp, 1216 SDValue &Segment) { 1217 X86ISelAddressMode AM; 1218 1219 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support 1220 // segments. 1221 SDValue Copy = AM.Segment; 1222 SDValue T = CurDAG->getRegister(0, MVT::i32); 1223 AM.Segment = T; 1224 if (MatchAddress(N, AM)) 1225 return false; 1226 assert (T == AM.Segment); 1227 AM.Segment = Copy; 1228 1229 EVT VT = N.getValueType(); 1230 unsigned Complexity = 0; 1231 if (AM.BaseType == X86ISelAddressMode::RegBase) 1232 if (AM.Base_Reg.getNode()) 1233 Complexity = 1; 1234 else 1235 AM.Base_Reg = CurDAG->getRegister(0, VT); 1236 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1237 Complexity = 4; 1238 1239 if (AM.IndexReg.getNode()) 1240 Complexity++; 1241 else 1242 AM.IndexReg = CurDAG->getRegister(0, VT); 1243 1244 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1245 // a simple shift. 1246 if (AM.Scale > 1) 1247 Complexity++; 1248 1249 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1250 // to a LEA. This is determined with some expermentation but is by no means 1251 // optimal (especially for code size consideration). LEA is nice because of 1252 // its three-address nature. Tweak the cost function again when we can run 1253 // convertToThreeAddress() at register allocation time. 1254 if (AM.hasSymbolicDisplacement()) { 1255 // For X86-64, we should always use lea to materialize RIP relative 1256 // addresses. 1257 if (Subtarget->is64Bit()) 1258 Complexity = 4; 1259 else 1260 Complexity += 2; 1261 } 1262 1263 if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) 1264 Complexity++; 1265 1266 // If it isn't worth using an LEA, reject it. 1267 if (Complexity <= 2) 1268 return false; 1269 1270 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1271 return true; 1272} 1273 1274/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. 1275bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, 1276 SDValue &Scale, SDValue &Index, 1277 SDValue &Disp, SDValue &Segment) { 1278 assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); 1279 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); 1280 1281 X86ISelAddressMode AM; 1282 AM.GV = GA->getGlobal(); 1283 AM.Disp += GA->getOffset(); 1284 AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); 1285 AM.SymbolFlags = GA->getTargetFlags(); 1286 1287 if (N.getValueType() == MVT::i32) { 1288 AM.Scale = 1; 1289 AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); 1290 } else { 1291 AM.IndexReg = CurDAG->getRegister(0, MVT::i64); 1292 } 1293 1294 getAddressOperands(AM, Base, Scale, Index, Disp, Segment); 1295 return true; 1296} 1297 1298 1299bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, 1300 SDValue &Base, SDValue &Scale, 1301 SDValue &Index, SDValue &Disp, 1302 SDValue &Segment) { 1303 if (!ISD::isNON_EXTLoad(N.getNode()) || 1304 !IsProfitableToFold(N, P, P) || 1305 !IsLegalToFold(N, P, P, OptLevel)) 1306 return false; 1307 1308 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); 1309} 1310 1311/// getGlobalBaseReg - Return an SDNode that returns the value of 1312/// the global base register. Output instructions required to 1313/// initialize the global base register, if necessary. 1314/// 1315SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1316 unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); 1317 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); 1318} 1319 1320SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { 1321 SDValue Chain = Node->getOperand(0); 1322 SDValue In1 = Node->getOperand(1); 1323 SDValue In2L = Node->getOperand(2); 1324 SDValue In2H = Node->getOperand(3); 1325 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1326 if (!SelectAddr(In1.getNode(), In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1327 return NULL; 1328 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1329 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1330 const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; 1331 SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), 1332 MVT::i32, MVT::i32, MVT::Other, Ops, 1333 array_lengthof(Ops)); 1334 cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1); 1335 return ResNode; 1336} 1337 1338SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) { 1339 if (Node->hasAnyUseOfValue(0)) 1340 return 0; 1341 1342 // Optimize common patterns for __sync_add_and_fetch and 1343 // __sync_sub_and_fetch where the result is not used. This allows us 1344 // to use "lock" version of add, sub, inc, dec instructions. 1345 // FIXME: Do not use special instructions but instead add the "lock" 1346 // prefix to the target node somehow. The extra information will then be 1347 // transferred to machine instruction and it denotes the prefix. 1348 SDValue Chain = Node->getOperand(0); 1349 SDValue Ptr = Node->getOperand(1); 1350 SDValue Val = Node->getOperand(2); 1351 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1352 if (!SelectAddr(Ptr.getNode(), Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) 1353 return 0; 1354 1355 bool isInc = false, isDec = false, isSub = false, isCN = false; 1356 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val); 1357 if (CN) { 1358 isCN = true; 1359 int64_t CNVal = CN->getSExtValue(); 1360 if (CNVal == 1) 1361 isInc = true; 1362 else if (CNVal == -1) 1363 isDec = true; 1364 else if (CNVal >= 0) 1365 Val = CurDAG->getTargetConstant(CNVal, NVT); 1366 else { 1367 isSub = true; 1368 Val = CurDAG->getTargetConstant(-CNVal, NVT); 1369 } 1370 } else if (Val.hasOneUse() && 1371 Val.getOpcode() == ISD::SUB && 1372 X86::isZeroNode(Val.getOperand(0))) { 1373 isSub = true; 1374 Val = Val.getOperand(1); 1375 } 1376 1377 unsigned Opc = 0; 1378 switch (NVT.getSimpleVT().SimpleTy) { 1379 default: return 0; 1380 case MVT::i8: 1381 if (isInc) 1382 Opc = X86::LOCK_INC8m; 1383 else if (isDec) 1384 Opc = X86::LOCK_DEC8m; 1385 else if (isSub) { 1386 if (isCN) 1387 Opc = X86::LOCK_SUB8mi; 1388 else 1389 Opc = X86::LOCK_SUB8mr; 1390 } else { 1391 if (isCN) 1392 Opc = X86::LOCK_ADD8mi; 1393 else 1394 Opc = X86::LOCK_ADD8mr; 1395 } 1396 break; 1397 case MVT::i16: 1398 if (isInc) 1399 Opc = X86::LOCK_INC16m; 1400 else if (isDec) 1401 Opc = X86::LOCK_DEC16m; 1402 else if (isSub) { 1403 if (isCN) { 1404 if (immSext8(Val.getNode())) 1405 Opc = X86::LOCK_SUB16mi8; 1406 else 1407 Opc = X86::LOCK_SUB16mi; 1408 } else 1409 Opc = X86::LOCK_SUB16mr; 1410 } else { 1411 if (isCN) { 1412 if (immSext8(Val.getNode())) 1413 Opc = X86::LOCK_ADD16mi8; 1414 else 1415 Opc = X86::LOCK_ADD16mi; 1416 } else 1417 Opc = X86::LOCK_ADD16mr; 1418 } 1419 break; 1420 case MVT::i32: 1421 if (isInc) 1422 Opc = X86::LOCK_INC32m; 1423 else if (isDec) 1424 Opc = X86::LOCK_DEC32m; 1425 else if (isSub) { 1426 if (isCN) { 1427 if (immSext8(Val.getNode())) 1428 Opc = X86::LOCK_SUB32mi8; 1429 else 1430 Opc = X86::LOCK_SUB32mi; 1431 } else 1432 Opc = X86::LOCK_SUB32mr; 1433 } else { 1434 if (isCN) { 1435 if (immSext8(Val.getNode())) 1436 Opc = X86::LOCK_ADD32mi8; 1437 else 1438 Opc = X86::LOCK_ADD32mi; 1439 } else 1440 Opc = X86::LOCK_ADD32mr; 1441 } 1442 break; 1443 case MVT::i64: 1444 if (isInc) 1445 Opc = X86::LOCK_INC64m; 1446 else if (isDec) 1447 Opc = X86::LOCK_DEC64m; 1448 else if (isSub) { 1449 Opc = X86::LOCK_SUB64mr; 1450 if (isCN) { 1451 if (immSext8(Val.getNode())) 1452 Opc = X86::LOCK_SUB64mi8; 1453 else if (i64immSExt32(Val.getNode())) 1454 Opc = X86::LOCK_SUB64mi32; 1455 } 1456 } else { 1457 Opc = X86::LOCK_ADD64mr; 1458 if (isCN) { 1459 if (immSext8(Val.getNode())) 1460 Opc = X86::LOCK_ADD64mi8; 1461 else if (i64immSExt32(Val.getNode())) 1462 Opc = X86::LOCK_ADD64mi32; 1463 } 1464 } 1465 break; 1466 } 1467 1468 DebugLoc dl = Node->getDebugLoc(); 1469 SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, 1470 dl, NVT), 0); 1471 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 1472 MemOp[0] = cast<MemSDNode>(Node)->getMemOperand(); 1473 if (isInc || isDec) { 1474 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain }; 1475 SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0); 1476 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1477 SDValue RetVals[] = { Undef, Ret }; 1478 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1479 } else { 1480 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain }; 1481 SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0); 1482 cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1); 1483 SDValue RetVals[] = { Undef, Ret }; 1484 return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); 1485 } 1486} 1487 1488/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has 1489/// any uses which require the SF or OF bits to be accurate. 1490static bool HasNoSignedComparisonUses(SDNode *N) { 1491 // Examine each user of the node. 1492 for (SDNode::use_iterator UI = N->use_begin(), 1493 UE = N->use_end(); UI != UE; ++UI) { 1494 // Only examine CopyToReg uses. 1495 if (UI->getOpcode() != ISD::CopyToReg) 1496 return false; 1497 // Only examine CopyToReg uses that copy to EFLAGS. 1498 if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != 1499 X86::EFLAGS) 1500 return false; 1501 // Examine each user of the CopyToReg use. 1502 for (SDNode::use_iterator FlagUI = UI->use_begin(), 1503 FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { 1504 // Only examine the Flag result. 1505 if (FlagUI.getUse().getResNo() != 1) continue; 1506 // Anything unusual: assume conservatively. 1507 if (!FlagUI->isMachineOpcode()) return false; 1508 // Examine the opcode of the user. 1509 switch (FlagUI->getMachineOpcode()) { 1510 // These comparisons don't treat the most significant bit specially. 1511 case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr: 1512 case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr: 1513 case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm: 1514 case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm: 1515 case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4: 1516 case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4: 1517 case X86::CMOVA16rr: case X86::CMOVA16rm: 1518 case X86::CMOVA32rr: case X86::CMOVA32rm: 1519 case X86::CMOVA64rr: case X86::CMOVA64rm: 1520 case X86::CMOVAE16rr: case X86::CMOVAE16rm: 1521 case X86::CMOVAE32rr: case X86::CMOVAE32rm: 1522 case X86::CMOVAE64rr: case X86::CMOVAE64rm: 1523 case X86::CMOVB16rr: case X86::CMOVB16rm: 1524 case X86::CMOVB32rr: case X86::CMOVB32rm: 1525 case X86::CMOVB64rr: case X86::CMOVB64rm: 1526 case X86::CMOVBE16rr: case X86::CMOVBE16rm: 1527 case X86::CMOVBE32rr: case X86::CMOVBE32rm: 1528 case X86::CMOVBE64rr: case X86::CMOVBE64rm: 1529 case X86::CMOVE16rr: case X86::CMOVE16rm: 1530 case X86::CMOVE32rr: case X86::CMOVE32rm: 1531 case X86::CMOVE64rr: case X86::CMOVE64rm: 1532 case X86::CMOVNE16rr: case X86::CMOVNE16rm: 1533 case X86::CMOVNE32rr: case X86::CMOVNE32rm: 1534 case X86::CMOVNE64rr: case X86::CMOVNE64rm: 1535 case X86::CMOVNP16rr: case X86::CMOVNP16rm: 1536 case X86::CMOVNP32rr: case X86::CMOVNP32rm: 1537 case X86::CMOVNP64rr: case X86::CMOVNP64rm: 1538 case X86::CMOVP16rr: case X86::CMOVP16rm: 1539 case X86::CMOVP32rr: case X86::CMOVP32rm: 1540 case X86::CMOVP64rr: case X86::CMOVP64rm: 1541 continue; 1542 // Anything else: assume conservatively. 1543 default: return false; 1544 } 1545 } 1546 } 1547 return true; 1548} 1549 1550SDNode *X86DAGToDAGISel::Select(SDNode *Node) { 1551 EVT NVT = Node->getValueType(0); 1552 unsigned Opc, MOpc; 1553 unsigned Opcode = Node->getOpcode(); 1554 DebugLoc dl = Node->getDebugLoc(); 1555 1556 DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); 1557 1558 if (Node->isMachineOpcode()) { 1559 DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); 1560 return NULL; // Already selected. 1561 } 1562 1563 switch (Opcode) { 1564 default: break; 1565 case X86ISD::GlobalBaseReg: 1566 return getGlobalBaseReg(); 1567 1568 case X86ISD::ATOMOR64_DAG: 1569 return SelectAtomic64(Node, X86::ATOMOR6432); 1570 case X86ISD::ATOMXOR64_DAG: 1571 return SelectAtomic64(Node, X86::ATOMXOR6432); 1572 case X86ISD::ATOMADD64_DAG: 1573 return SelectAtomic64(Node, X86::ATOMADD6432); 1574 case X86ISD::ATOMSUB64_DAG: 1575 return SelectAtomic64(Node, X86::ATOMSUB6432); 1576 case X86ISD::ATOMNAND64_DAG: 1577 return SelectAtomic64(Node, X86::ATOMNAND6432); 1578 case X86ISD::ATOMAND64_DAG: 1579 return SelectAtomic64(Node, X86::ATOMAND6432); 1580 case X86ISD::ATOMSWAP64_DAG: 1581 return SelectAtomic64(Node, X86::ATOMSWAP6432); 1582 1583 case ISD::ATOMIC_LOAD_ADD: { 1584 SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT); 1585 if (RetVal) 1586 return RetVal; 1587 break; 1588 } 1589 1590 case ISD::SMUL_LOHI: 1591 case ISD::UMUL_LOHI: { 1592 SDValue N0 = Node->getOperand(0); 1593 SDValue N1 = Node->getOperand(1); 1594 1595 bool isSigned = Opcode == ISD::SMUL_LOHI; 1596 if (!isSigned) { 1597 switch (NVT.getSimpleVT().SimpleTy) { 1598 default: llvm_unreachable("Unsupported VT!"); 1599 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1600 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1601 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1602 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1603 } 1604 } else { 1605 switch (NVT.getSimpleVT().SimpleTy) { 1606 default: llvm_unreachable("Unsupported VT!"); 1607 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1608 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1609 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1610 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1611 } 1612 } 1613 1614 unsigned LoReg, HiReg; 1615 switch (NVT.getSimpleVT().SimpleTy) { 1616 default: llvm_unreachable("Unsupported VT!"); 1617 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1618 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1619 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1620 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1621 } 1622 1623 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1624 bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1625 // Multiply is commmutative. 1626 if (!foldedLoad) { 1627 foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1628 if (foldedLoad) 1629 std::swap(N0, N1); 1630 } 1631 1632 SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, 1633 N0, SDValue()).getValue(1); 1634 1635 if (foldedLoad) { 1636 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1637 InFlag }; 1638 SDNode *CNode = 1639 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1640 array_lengthof(Ops)); 1641 InFlag = SDValue(CNode, 1); 1642 // Update the chain. 1643 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1644 } else { 1645 InFlag = 1646 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1647 } 1648 1649 // Prevent use of AH in a REX instruction by referencing AX instead. 1650 if (HiReg == X86::AH && Subtarget->is64Bit() && 1651 !SDValue(Node, 1).use_empty()) { 1652 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1653 X86::AX, MVT::i16, InFlag); 1654 InFlag = Result.getValue(2); 1655 // Get the low part if needed. Don't use getCopyFromReg for aliasing 1656 // registers. 1657 if (!SDValue(Node, 0).use_empty()) 1658 ReplaceUses(SDValue(Node, 1), 1659 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 1660 1661 // Shift AX down 8 bits. 1662 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 1663 Result, 1664 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1665 // Then truncate it down to i8. 1666 ReplaceUses(SDValue(Node, 1), 1667 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 1668 } 1669 // Copy the low half of the result, if it is needed. 1670 if (!SDValue(Node, 0).use_empty()) { 1671 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1672 LoReg, NVT, InFlag); 1673 InFlag = Result.getValue(2); 1674 ReplaceUses(SDValue(Node, 0), Result); 1675 DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 1676 } 1677 // Copy the high half of the result, if it is needed. 1678 if (!SDValue(Node, 1).use_empty()) { 1679 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1680 HiReg, NVT, InFlag); 1681 InFlag = Result.getValue(2); 1682 ReplaceUses(SDValue(Node, 1), Result); 1683 DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 1684 } 1685 1686 return NULL; 1687 } 1688 1689 case ISD::SDIVREM: 1690 case ISD::UDIVREM: { 1691 SDValue N0 = Node->getOperand(0); 1692 SDValue N1 = Node->getOperand(1); 1693 1694 bool isSigned = Opcode == ISD::SDIVREM; 1695 if (!isSigned) { 1696 switch (NVT.getSimpleVT().SimpleTy) { 1697 default: llvm_unreachable("Unsupported VT!"); 1698 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1699 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1700 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1701 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1702 } 1703 } else { 1704 switch (NVT.getSimpleVT().SimpleTy) { 1705 default: llvm_unreachable("Unsupported VT!"); 1706 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1707 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1708 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1709 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1710 } 1711 } 1712 1713 unsigned LoReg, HiReg, ClrReg; 1714 unsigned ClrOpcode, SExtOpcode; 1715 switch (NVT.getSimpleVT().SimpleTy) { 1716 default: llvm_unreachable("Unsupported VT!"); 1717 case MVT::i8: 1718 LoReg = X86::AL; ClrReg = HiReg = X86::AH; 1719 ClrOpcode = 0; 1720 SExtOpcode = X86::CBW; 1721 break; 1722 case MVT::i16: 1723 LoReg = X86::AX; HiReg = X86::DX; 1724 ClrOpcode = X86::MOV16r0; ClrReg = X86::DX; 1725 SExtOpcode = X86::CWD; 1726 break; 1727 case MVT::i32: 1728 LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; 1729 ClrOpcode = X86::MOV32r0; 1730 SExtOpcode = X86::CDQ; 1731 break; 1732 case MVT::i64: 1733 LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; 1734 ClrOpcode = X86::MOV64r0; 1735 SExtOpcode = X86::CQO; 1736 break; 1737 } 1738 1739 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; 1740 bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); 1741 bool signBitIsZero = CurDAG->SignBitIsZero(N0); 1742 1743 SDValue InFlag; 1744 if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) { 1745 // Special case for div8, just use a move with zero extension to AX to 1746 // clear the upper 8 bits (AH). 1747 SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain; 1748 if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { 1749 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; 1750 Move = 1751 SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16, 1752 MVT::Other, Ops, 1753 array_lengthof(Ops)), 0); 1754 Chain = Move.getValue(1); 1755 ReplaceUses(N0.getValue(1), Chain); 1756 } else { 1757 Move = 1758 SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0); 1759 Chain = CurDAG->getEntryNode(); 1760 } 1761 Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue()); 1762 InFlag = Chain.getValue(1); 1763 } else { 1764 InFlag = 1765 CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, 1766 LoReg, N0, SDValue()).getValue(1); 1767 if (isSigned && !signBitIsZero) { 1768 // Sign extend the low part into the high part. 1769 InFlag = 1770 SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Flag, InFlag),0); 1771 } else { 1772 // Zero out the high part, effectively zero extending the input. 1773 SDValue ClrNode = 1774 SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); 1775 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, 1776 ClrNode, InFlag).getValue(1); 1777 } 1778 } 1779 1780 if (foldedLoad) { 1781 SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), 1782 InFlag }; 1783 SDNode *CNode = 1784 CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Flag, Ops, 1785 array_lengthof(Ops)); 1786 InFlag = SDValue(CNode, 1); 1787 // Update the chain. 1788 ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); 1789 } else { 1790 InFlag = 1791 SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); 1792 } 1793 1794 // Prevent use of AH in a REX instruction by referencing AX instead. 1795 // Shift it down 8 bits. 1796 if (HiReg == X86::AH && Subtarget->is64Bit() && 1797 !SDValue(Node, 1).use_empty()) { 1798 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1799 X86::AX, MVT::i16, InFlag); 1800 InFlag = Result.getValue(2); 1801 1802 // If we also need AL (the quotient), get it by extracting a subreg from 1803 // Result. The fast register allocator does not like multiple CopyFromReg 1804 // nodes using aliasing registers. 1805 if (!SDValue(Node, 0).use_empty()) 1806 ReplaceUses(SDValue(Node, 0), 1807 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 1808 1809 // Shift AX right by 8 bits instead of using AH. 1810 Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, 1811 Result, 1812 CurDAG->getTargetConstant(8, MVT::i8)), 1813 0); 1814 ReplaceUses(SDValue(Node, 1), 1815 CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); 1816 } 1817 // Copy the division (low) result, if it is needed. 1818 if (!SDValue(Node, 0).use_empty()) { 1819 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1820 LoReg, NVT, InFlag); 1821 InFlag = Result.getValue(2); 1822 ReplaceUses(SDValue(Node, 0), Result); 1823 DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 1824 } 1825 // Copy the remainder (high) result, if it is needed. 1826 if (!SDValue(Node, 1).use_empty()) { 1827 SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, 1828 HiReg, NVT, InFlag); 1829 InFlag = Result.getValue(2); 1830 ReplaceUses(SDValue(Node, 1), Result); 1831 DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); 1832 } 1833 return NULL; 1834 } 1835 1836 case X86ISD::CMP: { 1837 SDValue N0 = Node->getOperand(0); 1838 SDValue N1 = Node->getOperand(1); 1839 1840 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to 1841 // use a smaller encoding. 1842 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && 1843 HasNoSignedComparisonUses(Node)) 1844 // Look past the truncate if CMP is the only use of it. 1845 N0 = N0.getOperand(0); 1846 if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 1847 N0.getValueType() != MVT::i8 && 1848 X86::isZeroNode(N1)) { 1849 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1)); 1850 if (!C) break; 1851 1852 // For example, convert "testl %eax, $8" to "testb %al, $8" 1853 if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 && 1854 (!(C->getZExtValue() & 0x80) || 1855 HasNoSignedComparisonUses(Node))) { 1856 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8); 1857 SDValue Reg = N0.getNode()->getOperand(0); 1858 1859 // On x86-32, only the ABCD registers have 8-bit subregisters. 1860 if (!Subtarget->is64Bit()) { 1861 TargetRegisterClass *TRC = 0; 1862 switch (N0.getValueType().getSimpleVT().SimpleTy) { 1863 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 1864 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 1865 default: llvm_unreachable("Unsupported TEST operand type!"); 1866 } 1867 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 1868 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 1869 Reg.getValueType(), Reg, RC), 0); 1870 } 1871 1872 // Extract the l-register. 1873 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, 1874 MVT::i8, Reg); 1875 1876 // Emit a testb. 1877 return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm); 1878 } 1879 1880 // For example, "testl %eax, $2048" to "testb %ah, $8". 1881 if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 && 1882 (!(C->getZExtValue() & 0x8000) || 1883 HasNoSignedComparisonUses(Node))) { 1884 // Shift the immediate right by 8 bits. 1885 SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8, 1886 MVT::i8); 1887 SDValue Reg = N0.getNode()->getOperand(0); 1888 1889 // Put the value in an ABCD register. 1890 TargetRegisterClass *TRC = 0; 1891 switch (N0.getValueType().getSimpleVT().SimpleTy) { 1892 case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break; 1893 case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break; 1894 case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break; 1895 default: llvm_unreachable("Unsupported TEST operand type!"); 1896 } 1897 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32); 1898 Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl, 1899 Reg.getValueType(), Reg, RC), 0); 1900 1901 // Extract the h-register. 1902 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, 1903 MVT::i8, Reg); 1904 1905 // Emit a testb. No special NOREX tricks are needed since there's 1906 // only one GPR operand! 1907 return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, 1908 Subreg, ShiftedImm); 1909 } 1910 1911 // For example, "testl %eax, $32776" to "testw %ax, $32776". 1912 if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 && 1913 N0.getValueType() != MVT::i16 && 1914 (!(C->getZExtValue() & 0x8000) || 1915 HasNoSignedComparisonUses(Node))) { 1916 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16); 1917 SDValue Reg = N0.getNode()->getOperand(0); 1918 1919 // Extract the 16-bit subregister. 1920 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, 1921 MVT::i16, Reg); 1922 1923 // Emit a testw. 1924 return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm); 1925 } 1926 1927 // For example, "testq %rax, $268468232" to "testl %eax, $268468232". 1928 if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 && 1929 N0.getValueType() == MVT::i64 && 1930 (!(C->getZExtValue() & 0x80000000) || 1931 HasNoSignedComparisonUses(Node))) { 1932 SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32); 1933 SDValue Reg = N0.getNode()->getOperand(0); 1934 1935 // Extract the 32-bit subregister. 1936 SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, 1937 MVT::i32, Reg); 1938 1939 // Emit a testl. 1940 return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm); 1941 } 1942 } 1943 break; 1944 } 1945 } 1946 1947 SDNode *ResNode = SelectCode(Node); 1948 1949 DEBUG(dbgs() << "=> "; 1950 if (ResNode == NULL || ResNode == Node) 1951 Node->dump(CurDAG); 1952 else 1953 ResNode->dump(CurDAG); 1954 dbgs() << '\n'); 1955 1956 return ResNode; 1957} 1958 1959bool X86DAGToDAGISel:: 1960SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, 1961 std::vector<SDValue> &OutOps) { 1962 SDValue Op0, Op1, Op2, Op3, Op4; 1963 switch (ConstraintCode) { 1964 case 'o': // offsetable ?? 1965 case 'v': // not offsetable ?? 1966 default: return true; 1967 case 'm': // memory 1968 if (!SelectAddr(Op.getNode(), Op, Op0, Op1, Op2, Op3, Op4)) 1969 return true; 1970 break; 1971 } 1972 1973 OutOps.push_back(Op0); 1974 OutOps.push_back(Op1); 1975 OutOps.push_back(Op2); 1976 OutOps.push_back(Op3); 1977 OutOps.push_back(Op4); 1978 return false; 1979} 1980 1981/// createX86ISelDag - This pass converts a legalized DAG into a 1982/// X86-specific DAG, ready for instruction scheduling. 1983/// 1984FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, 1985 llvm::CodeGenOpt::Level OptLevel) { 1986 return new X86DAGToDAGISel(TM, OptLevel); 1987} 1988