X86ISelDAGToDAG.cpp revision 83ec4b6711980242ef3c55a4fa36b2d7a39c1bfb
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86MachineFunctionInfo.h" 20#include "X86RegisterInfo.h" 21#include "X86Subtarget.h" 22#include "X86TargetMachine.h" 23#include "llvm/GlobalValue.h" 24#include "llvm/Instructions.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/CFG.h" 27#include "llvm/Type.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFunction.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineInstrBuilder.h" 32#include "llvm/CodeGen/MachineRegisterInfo.h" 33#include "llvm/CodeGen/SelectionDAGISel.h" 34#include "llvm/Target/TargetMachine.h" 35#include "llvm/Support/CommandLine.h" 36#include "llvm/Support/Compiler.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/MathExtras.h" 39#include "llvm/ADT/SmallPtrSet.h" 40#include "llvm/ADT/Statistic.h" 41#include <queue> 42#include <set> 43using namespace llvm; 44 45STATISTIC(NumFPKill , "Number of FP_REG_KILL instructions added"); 46STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); 47 48//===----------------------------------------------------------------------===// 49// Pattern Matcher Implementation 50//===----------------------------------------------------------------------===// 51 52namespace { 53 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 54 /// SDOperand's instead of register numbers for the leaves of the matched 55 /// tree. 56 struct X86ISelAddressMode { 57 enum { 58 RegBase, 59 FrameIndexBase 60 } BaseType; 61 62 struct { // This is really a union, discriminated by BaseType! 63 SDOperand Reg; 64 int FrameIndex; 65 } Base; 66 67 bool isRIPRel; // RIP as base? 68 unsigned Scale; 69 SDOperand IndexReg; 70 unsigned Disp; 71 GlobalValue *GV; 72 Constant *CP; 73 const char *ES; 74 int JT; 75 unsigned Align; // CP alignment. 76 77 X86ISelAddressMode() 78 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 79 GV(0), CP(0), ES(0), JT(-1), Align(0) { 80 } 81 }; 82} 83 84namespace { 85 //===--------------------------------------------------------------------===// 86 /// ISel - X86 specific code to select X86 machine instructions for 87 /// SelectionDAG operations. 88 /// 89 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 90 /// ContainsFPCode - Every instruction we select that uses or defines a FP 91 /// register should set this to true. 92 bool ContainsFPCode; 93 94 /// FastISel - Enable fast(er) instruction selection. 95 /// 96 bool FastISel; 97 98 /// TM - Keep a reference to X86TargetMachine. 99 /// 100 X86TargetMachine &TM; 101 102 /// X86Lowering - This object fully describes how to lower LLVM code to an 103 /// X86-specific SelectionDAG. 104 X86TargetLowering X86Lowering; 105 106 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 107 /// make the right decision when generating code for different targets. 108 const X86Subtarget *Subtarget; 109 110 /// GlobalBaseReg - keeps track of the virtual register mapped onto global 111 /// base register. 112 unsigned GlobalBaseReg; 113 114 public: 115 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 116 : SelectionDAGISel(X86Lowering), 117 ContainsFPCode(false), FastISel(fast), TM(tm), 118 X86Lowering(*TM.getTargetLowering()), 119 Subtarget(&TM.getSubtarget<X86Subtarget>()) {} 120 121 virtual bool runOnFunction(Function &Fn) { 122 // Make sure we re-emit a set of the global base reg if necessary 123 GlobalBaseReg = 0; 124 return SelectionDAGISel::runOnFunction(Fn); 125 } 126 127 virtual const char *getPassName() const { 128 return "X86 DAG->DAG Instruction Selection"; 129 } 130 131 /// InstructionSelectBasicBlock - This callback is invoked by 132 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 133 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); 134 135 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 136 137 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const; 138 139// Include the pieces autogenerated from the target description. 140#include "X86GenDAGISel.inc" 141 142 private: 143 SDNode *Select(SDOperand N); 144 145 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, 146 bool isRoot = true, unsigned Depth = 0); 147 bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 148 bool isRoot, unsigned Depth); 149 bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 150 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 151 bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base, 152 SDOperand &Scale, SDOperand &Index, SDOperand &Disp); 153 bool SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 154 SDOperand N, SDOperand &Base, SDOperand &Scale, 155 SDOperand &Index, SDOperand &Disp, 156 SDOperand &InChain, SDOperand &OutChain); 157 bool TryFoldLoad(SDOperand P, SDOperand N, 158 SDOperand &Base, SDOperand &Scale, 159 SDOperand &Index, SDOperand &Disp); 160 void PreprocessForRMW(SelectionDAG &DAG); 161 void PreprocessForFPConvert(SelectionDAG &DAG); 162 163 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 164 /// inline asm expressions. 165 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op, 166 char ConstraintCode, 167 std::vector<SDOperand> &OutOps, 168 SelectionDAG &DAG); 169 170 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 171 172 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base, 173 SDOperand &Scale, SDOperand &Index, 174 SDOperand &Disp) { 175 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 176 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 177 AM.Base.Reg; 178 Scale = getI8Imm(AM.Scale); 179 Index = AM.IndexReg; 180 // These are 32-bit even in 64-bit mode since RIP relative offset 181 // is 32-bit. 182 if (AM.GV) 183 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 184 else if (AM.CP) 185 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp); 186 else if (AM.ES) 187 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 188 else if (AM.JT != -1) 189 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 190 else 191 Disp = getI32Imm(AM.Disp); 192 } 193 194 /// getI8Imm - Return a target constant with the specified value, of type 195 /// i8. 196 inline SDOperand getI8Imm(unsigned Imm) { 197 return CurDAG->getTargetConstant(Imm, MVT::i8); 198 } 199 200 /// getI16Imm - Return a target constant with the specified value, of type 201 /// i16. 202 inline SDOperand getI16Imm(unsigned Imm) { 203 return CurDAG->getTargetConstant(Imm, MVT::i16); 204 } 205 206 /// getI32Imm - Return a target constant with the specified value, of type 207 /// i32. 208 inline SDOperand getI32Imm(unsigned Imm) { 209 return CurDAG->getTargetConstant(Imm, MVT::i32); 210 } 211 212 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 213 /// base register. Return the virtual register that holds this value. 214 SDNode *getGlobalBaseReg(); 215 216 /// getTruncate - return an SDNode that implements a subreg based truncate 217 /// of the specified operand to the the specified value type. 218 SDNode *getTruncate(SDOperand N0, MVT VT); 219 220#ifndef NDEBUG 221 unsigned Indent; 222#endif 223 }; 224} 225 226/// findFlagUse - Return use of MVT::Flag value produced by the specified SDNode. 227/// 228static SDNode *findFlagUse(SDNode *N) { 229 unsigned FlagResNo = N->getNumValues()-1; 230 for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { 231 SDNode *User = I->getUser(); 232 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 233 SDOperand Op = User->getOperand(i); 234 if (Op.Val == N && Op.ResNo == FlagResNo) 235 return User; 236 } 237 } 238 return NULL; 239} 240 241/// findNonImmUse - Return true by reference in "found" if "Use" is an 242/// non-immediate use of "Def". This function recursively traversing 243/// up the operand chain ignoring certain nodes. 244static void findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, 245 SDNode *Root, SDNode *Skip, bool &found, 246 SmallPtrSet<SDNode*, 16> &Visited) { 247 if (found || 248 Use->getNodeId() > Def->getNodeId() || 249 !Visited.insert(Use)) 250 return; 251 252 for (unsigned i = 0, e = Use->getNumOperands(); !found && i != e; ++i) { 253 SDNode *N = Use->getOperand(i).Val; 254 if (N == Skip) 255 continue; 256 if (N == Def) { 257 if (Use == ImmedUse) 258 continue; // We are not looking for immediate use. 259 if (Use == Root) { 260 // Must be a chain reading node where it is possible to reach its own 261 // chain operand through a path started from another operand. 262 assert(Use->getOpcode() == ISD::STORE || 263 Use->getOpcode() == X86ISD::CMP || 264 Use->getOpcode() == ISD::INTRINSIC_W_CHAIN || 265 Use->getOpcode() == ISD::INTRINSIC_VOID); 266 continue; 267 } 268 found = true; 269 break; 270 } 271 272 // Traverse up the operand chain. 273 findNonImmUse(N, Def, ImmedUse, Root, Skip, found, Visited); 274 } 275} 276 277/// isNonImmUse - Start searching from Root up the DAG to check is Def can 278/// be reached. Return true if that's the case. However, ignore direct uses 279/// by ImmedUse (which would be U in the example illustrated in 280/// CanBeFoldedBy) and by Root (which can happen in the store case). 281/// FIXME: to be really generic, we should allow direct use by any node 282/// that is being folded. But realisticly since we only fold loads which 283/// have one non-chain use, we only need to watch out for load/op/store 284/// and load/op/cmp case where the root (store / cmp) may reach the load via 285/// its chain operand. 286static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, 287 SDNode *Skip = NULL) { 288 SmallPtrSet<SDNode*, 16> Visited; 289 bool found = false; 290 findNonImmUse(Root, Def, ImmedUse, Root, Skip, found, Visited); 291 return found; 292} 293 294 295bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { 296 if (FastISel) return false; 297 298 // If U use can somehow reach N through another path then U can't fold N or 299 // it will create a cycle. e.g. In the following diagram, U can reach N 300 // through X. If N is folded into into U, then X is both a predecessor and 301 // a successor of U. 302 // 303 // [ N ] 304 // ^ ^ 305 // | | 306 // / \--- 307 // / [X] 308 // | ^ 309 // [U]--------| 310 311 if (isNonImmUse(Root, N, U)) 312 return false; 313 314 // If U produces a flag, then it gets (even more) interesting. Since it 315 // would have been "glued" together with its flag use, we need to check if 316 // it might reach N: 317 // 318 // [ N ] 319 // ^ ^ 320 // | | 321 // [U] \-- 322 // ^ [TF] 323 // | ^ 324 // | | 325 // \ / 326 // [FU] 327 // 328 // If FU (flag use) indirectly reach N (the load), and U fold N (call it 329 // NU), then TF is a predecessor of FU and a successor of NU. But since 330 // NU and FU are flagged together, this effectively creates a cycle. 331 bool HasFlagUse = false; 332 MVT VT = Root->getValueType(Root->getNumValues()-1); 333 while ((VT == MVT::Flag && !Root->use_empty())) { 334 SDNode *FU = findFlagUse(Root); 335 if (FU == NULL) 336 break; 337 else { 338 Root = FU; 339 HasFlagUse = true; 340 } 341 VT = Root->getValueType(Root->getNumValues()-1); 342 } 343 344 if (HasFlagUse) 345 return !isNonImmUse(Root, N, Root, U); 346 return true; 347} 348 349/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 350/// and move load below the TokenFactor. Replace store's chain operand with 351/// load's chain result. 352static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, 353 SDOperand Store, SDOperand TF) { 354 std::vector<SDOperand> Ops; 355 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i) 356 if (Load.Val == TF.Val->getOperand(i).Val) 357 Ops.push_back(Load.Val->getOperand(0)); 358 else 359 Ops.push_back(TF.Val->getOperand(i)); 360 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size()); 361 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 362 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 363 Store.getOperand(2), Store.getOperand(3)); 364} 365 366/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. 367/// 368static bool isRMWLoad(SDOperand N, SDOperand Chain, SDOperand Address, 369 SDOperand &Load) { 370 if (N.getOpcode() == ISD::BIT_CONVERT) 371 N = N.getOperand(0); 372 373 LoadSDNode *LD = dyn_cast<LoadSDNode>(N); 374 if (!LD || LD->isVolatile()) 375 return false; 376 if (LD->getAddressingMode() != ISD::UNINDEXED) 377 return false; 378 379 ISD::LoadExtType ExtType = LD->getExtensionType(); 380 if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD) 381 return false; 382 383 if (N.hasOneUse() && 384 N.getOperand(1) == Address && 385 N.Val->isOperandOf(Chain.Val)) { 386 Load = N; 387 return true; 388 } 389 return false; 390} 391 392/// PreprocessForRMW - Preprocess the DAG to make instruction selection better. 393/// This is only run if not in -fast mode (aka -O0). 394/// This allows the instruction selector to pick more read-modify-write 395/// instructions. This is a common case: 396/// 397/// [Load chain] 398/// ^ 399/// | 400/// [Load] 401/// ^ ^ 402/// | | 403/// / \- 404/// / | 405/// [TokenFactor] [Op] 406/// ^ ^ 407/// | | 408/// \ / 409/// \ / 410/// [Store] 411/// 412/// The fact the store's chain operand != load's chain will prevent the 413/// (store (op (load))) instruction from being selected. We can transform it to: 414/// 415/// [Load chain] 416/// ^ 417/// | 418/// [TokenFactor] 419/// ^ 420/// | 421/// [Load] 422/// ^ ^ 423/// | | 424/// | \- 425/// | | 426/// | [Op] 427/// | ^ 428/// | | 429/// \ / 430/// \ / 431/// [Store] 432void X86DAGToDAGISel::PreprocessForRMW(SelectionDAG &DAG) { 433 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 434 E = DAG.allnodes_end(); I != E; ++I) { 435 if (!ISD::isNON_TRUNCStore(I)) 436 continue; 437 SDOperand Chain = I->getOperand(0); 438 if (Chain.Val->getOpcode() != ISD::TokenFactor) 439 continue; 440 441 SDOperand N1 = I->getOperand(1); 442 SDOperand N2 = I->getOperand(2); 443 if ((N1.getValueType().isFloatingPoint() && 444 !N1.getValueType().isVector()) || 445 !N1.hasOneUse()) 446 continue; 447 448 bool RModW = false; 449 SDOperand Load; 450 unsigned Opcode = N1.Val->getOpcode(); 451 switch (Opcode) { 452 case ISD::ADD: 453 case ISD::MUL: 454 case ISD::AND: 455 case ISD::OR: 456 case ISD::XOR: 457 case ISD::ADDC: 458 case ISD::ADDE: 459 case ISD::VECTOR_SHUFFLE: { 460 SDOperand N10 = N1.getOperand(0); 461 SDOperand N11 = N1.getOperand(1); 462 RModW = isRMWLoad(N10, Chain, N2, Load); 463 if (!RModW) 464 RModW = isRMWLoad(N11, Chain, N2, Load); 465 break; 466 } 467 case ISD::SUB: 468 case ISD::SHL: 469 case ISD::SRA: 470 case ISD::SRL: 471 case ISD::ROTL: 472 case ISD::ROTR: 473 case ISD::SUBC: 474 case ISD::SUBE: 475 case X86ISD::SHLD: 476 case X86ISD::SHRD: { 477 SDOperand N10 = N1.getOperand(0); 478 RModW = isRMWLoad(N10, Chain, N2, Load); 479 break; 480 } 481 } 482 483 if (RModW) { 484 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain); 485 ++NumLoadMoved; 486 } 487 } 488} 489 490 491/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend 492/// nodes that target the FP stack to be store and load to the stack. This is a 493/// gross hack. We would like to simply mark these as being illegal, but when 494/// we do that, legalize produces these when it expands calls, then expands 495/// these in the same legalize pass. We would like dag combine to be able to 496/// hack on these between the call expansion and the node legalization. As such 497/// this pass basically does "really late" legalization of these inline with the 498/// X86 isel pass. 499void X86DAGToDAGISel::PreprocessForFPConvert(SelectionDAG &DAG) { 500 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 501 E = DAG.allnodes_end(); I != E; ) { 502 SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. 503 if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND) 504 continue; 505 506 // If the source and destination are SSE registers, then this is a legal 507 // conversion that should not be lowered. 508 MVT SrcVT = N->getOperand(0).getValueType(); 509 MVT DstVT = N->getValueType(0); 510 bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT); 511 bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT); 512 if (SrcIsSSE && DstIsSSE) 513 continue; 514 515 if (!SrcIsSSE && !DstIsSSE) { 516 // If this is an FPStack extension, it is a noop. 517 if (N->getOpcode() == ISD::FP_EXTEND) 518 continue; 519 // If this is a value-preserving FPStack truncation, it is a noop. 520 if (N->getConstantOperandVal(1)) 521 continue; 522 } 523 524 // Here we could have an FP stack truncation or an FPStack <-> SSE convert. 525 // FPStack has extload and truncstore. SSE can fold direct loads into other 526 // operations. Based on this, decide what we want to do. 527 MVT MemVT; 528 if (N->getOpcode() == ISD::FP_ROUND) 529 MemVT = DstVT; // FP_ROUND must use DstVT, we can't do a 'trunc load'. 530 else 531 MemVT = SrcIsSSE ? SrcVT : DstVT; 532 533 SDOperand MemTmp = DAG.CreateStackTemporary(MemVT); 534 535 // FIXME: optimize the case where the src/dest is a load or store? 536 SDOperand Store = DAG.getTruncStore(DAG.getEntryNode(), N->getOperand(0), 537 MemTmp, NULL, 0, MemVT); 538 SDOperand Result = DAG.getExtLoad(ISD::EXTLOAD, DstVT, Store, MemTmp, 539 NULL, 0, MemVT); 540 541 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the 542 // extload we created. This will cause general havok on the dag because 543 // anything below the conversion could be folded into other existing nodes. 544 // To avoid invalidating 'I', back it up to the convert node. 545 --I; 546 DAG.ReplaceAllUsesOfValueWith(SDOperand(N, 0), Result); 547 548 // Now that we did that, the node is dead. Increment the iterator to the 549 // next node to process, then delete N. 550 ++I; 551 DAG.DeleteNode(N); 552 } 553} 554 555/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 556/// when it has created a SelectionDAG for us to codegen. 557void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { 558 DEBUG(BB->dump()); 559 MachineFunction::iterator FirstMBB = BB; 560 561 if (!FastISel) 562 PreprocessForRMW(DAG); 563 564 // FIXME: This should only happen when not -fast. 565 PreprocessForFPConvert(DAG); 566 567 // Codegen the basic block. 568#ifndef NDEBUG 569 DOUT << "===== Instruction selection begins:\n"; 570 Indent = 0; 571#endif 572 DAG.setRoot(SelectRoot(DAG.getRoot())); 573#ifndef NDEBUG 574 DOUT << "===== Instruction selection ends:\n"; 575#endif 576 577 DAG.RemoveDeadNodes(); 578 579 // Emit machine code to BB. This can change 'BB' to the last block being 580 // inserted into. 581 ScheduleAndEmitDAG(DAG); 582 583 // If we are emitting FP stack code, scan the basic block to determine if this 584 // block defines any FP values. If so, put an FP_REG_KILL instruction before 585 // the terminator of the block. 586 587 // Note that FP stack instructions are used in all modes for long double, 588 // so we always need to do this check. 589 // Also note that it's possible for an FP stack register to be live across 590 // an instruction that produces multiple basic blocks (SSE CMOV) so we 591 // must check all the generated basic blocks. 592 593 // Scan all of the machine instructions in these MBBs, checking for FP 594 // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.) 595 MachineFunction::iterator MBBI = FirstMBB; 596 MachineFunction::iterator EndMBB = BB; ++EndMBB; 597 for (; MBBI != EndMBB; ++MBBI) { 598 MachineBasicBlock *MBB = MBBI; 599 600 // If this block returns, ignore it. We don't want to insert an FP_REG_KILL 601 // before the return. 602 if (!MBB->empty()) { 603 MachineBasicBlock::iterator EndI = MBB->end(); 604 --EndI; 605 if (EndI->getDesc().isReturn()) 606 continue; 607 } 608 609 bool ContainsFPCode = false; 610 for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); 611 !ContainsFPCode && I != E; ++I) { 612 if (I->getNumOperands() != 0 && I->getOperand(0).isRegister()) { 613 const TargetRegisterClass *clas; 614 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { 615 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && 616 TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && 617 ((clas = RegInfo->getRegClass(I->getOperand(0).getReg())) == 618 X86::RFP32RegisterClass || 619 clas == X86::RFP64RegisterClass || 620 clas == X86::RFP80RegisterClass)) { 621 ContainsFPCode = true; 622 break; 623 } 624 } 625 } 626 } 627 // Check PHI nodes in successor blocks. These PHI's will be lowered to have 628 // a copy of the input value in this block. In SSE mode, we only care about 629 // 80-bit values. 630 if (!ContainsFPCode) { 631 // Final check, check LLVM BB's that are successors to the LLVM BB 632 // corresponding to BB for FP PHI nodes. 633 const BasicBlock *LLVMBB = BB->getBasicBlock(); 634 const PHINode *PN; 635 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); 636 !ContainsFPCode && SI != E; ++SI) { 637 for (BasicBlock::const_iterator II = SI->begin(); 638 (PN = dyn_cast<PHINode>(II)); ++II) { 639 if (PN->getType()==Type::X86_FP80Ty || 640 (!Subtarget->hasSSE1() && PN->getType()->isFloatingPoint()) || 641 (!Subtarget->hasSSE2() && PN->getType()==Type::DoubleTy)) { 642 ContainsFPCode = true; 643 break; 644 } 645 } 646 } 647 } 648 // Finally, if we found any FP code, emit the FP_REG_KILL instruction. 649 if (ContainsFPCode) { 650 BuildMI(*MBB, MBBI->getFirstTerminator(), 651 TM.getInstrInfo()->get(X86::FP_REG_KILL)); 652 ++NumFPKill; 653 } 654 } 655} 656 657/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 658/// the main function. 659void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 660 MachineFrameInfo *MFI) { 661 const TargetInstrInfo *TII = TM.getInstrInfo(); 662 if (Subtarget->isTargetCygMing()) 663 BuildMI(BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); 664} 665 666void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 667 // If this is main, emit special code for main. 668 MachineBasicBlock *BB = MF.begin(); 669 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 670 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 671} 672 673/// MatchAddress - Add the specified node to the specified addressing mode, 674/// returning true if it cannot be done. This just pattern matches for the 675/// addressing mode. 676bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, 677 bool isRoot, unsigned Depth) { 678 // Limit recursion. 679 if (Depth > 5) 680 return MatchAddressBase(N, AM, isRoot, Depth); 681 682 // RIP relative addressing: %rip + 32-bit displacement! 683 if (AM.isRIPRel) { 684 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 685 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 686 if (isInt32(AM.Disp + Val)) { 687 AM.Disp += Val; 688 return false; 689 } 690 } 691 return true; 692 } 693 694 int id = N.Val->getNodeId(); 695 bool AlreadySelected = isSelected(id); // Already selected, not yet replaced. 696 697 switch (N.getOpcode()) { 698 default: break; 699 case ISD::Constant: { 700 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 701 if (isInt32(AM.Disp + Val)) { 702 AM.Disp += Val; 703 return false; 704 } 705 break; 706 } 707 708 case X86ISD::Wrapper: { 709 bool is64Bit = Subtarget->is64Bit(); 710 // Under X86-64 non-small code model, GV (and friends) are 64-bits. 711 // Also, base and index reg must be 0 in order to use rip as base. 712 if (is64Bit && (TM.getCodeModel() != CodeModel::Small || 713 AM.Base.Reg.Val || AM.IndexReg.Val)) 714 break; 715 if (AM.GV != 0 || AM.CP != 0 || AM.ES != 0 || AM.JT != -1) 716 break; 717 // If value is available in a register both base and index components have 718 // been picked, we can't fit the result available in the register in the 719 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 720 if (!AlreadySelected || (AM.Base.Reg.Val && AM.IndexReg.Val)) { 721 SDOperand N0 = N.getOperand(0); 722 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { 723 GlobalValue *GV = G->getGlobal(); 724 AM.GV = GV; 725 AM.Disp += G->getOffset(); 726 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 727 Subtarget->isPICStyleRIPRel(); 728 return false; 729 } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { 730 AM.CP = CP->getConstVal(); 731 AM.Align = CP->getAlignment(); 732 AM.Disp += CP->getOffset(); 733 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 734 Subtarget->isPICStyleRIPRel(); 735 return false; 736 } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) { 737 AM.ES = S->getSymbol(); 738 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 739 Subtarget->isPICStyleRIPRel(); 740 return false; 741 } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { 742 AM.JT = J->getIndex(); 743 AM.isRIPRel = TM.getRelocationModel() != Reloc::Static && 744 Subtarget->isPICStyleRIPRel(); 745 return false; 746 } 747 } 748 break; 749 } 750 751 case ISD::FrameIndex: 752 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) { 753 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 754 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 755 return false; 756 } 757 break; 758 759 case ISD::SHL: 760 if (AlreadySelected || AM.IndexReg.Val != 0 || AM.Scale != 1 || AM.isRIPRel) 761 break; 762 763 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) { 764 unsigned Val = CN->getValue(); 765 if (Val == 1 || Val == 2 || Val == 3) { 766 AM.Scale = 1 << Val; 767 SDOperand ShVal = N.Val->getOperand(0); 768 769 // Okay, we know that we have a scale by now. However, if the scaled 770 // value is an add of something and a constant, we can fold the 771 // constant into the disp field here. 772 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 773 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) { 774 AM.IndexReg = ShVal.Val->getOperand(0); 775 ConstantSDNode *AddVal = 776 cast<ConstantSDNode>(ShVal.Val->getOperand(1)); 777 uint64_t Disp = AM.Disp + (AddVal->getValue() << Val); 778 if (isInt32(Disp)) 779 AM.Disp = Disp; 780 else 781 AM.IndexReg = ShVal; 782 } else { 783 AM.IndexReg = ShVal; 784 } 785 return false; 786 } 787 break; 788 } 789 790 case ISD::SMUL_LOHI: 791 case ISD::UMUL_LOHI: 792 // A mul_lohi where we need the low part can be folded as a plain multiply. 793 if (N.ResNo != 0) break; 794 // FALL THROUGH 795 case ISD::MUL: 796 // X*[3,5,9] -> X+X*[2,4,8] 797 if (!AlreadySelected && 798 AM.BaseType == X86ISelAddressMode::RegBase && 799 AM.Base.Reg.Val == 0 && 800 AM.IndexReg.Val == 0 && 801 !AM.isRIPRel) { 802 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) 803 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { 804 AM.Scale = unsigned(CN->getValue())-1; 805 806 SDOperand MulVal = N.Val->getOperand(0); 807 SDOperand Reg; 808 809 // Okay, we know that we have a scale by now. However, if the scaled 810 // value is an add of something and a constant, we can fold the 811 // constant into the disp field here. 812 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 813 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) { 814 Reg = MulVal.Val->getOperand(0); 815 ConstantSDNode *AddVal = 816 cast<ConstantSDNode>(MulVal.Val->getOperand(1)); 817 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue(); 818 if (isInt32(Disp)) 819 AM.Disp = Disp; 820 else 821 Reg = N.Val->getOperand(0); 822 } else { 823 Reg = N.Val->getOperand(0); 824 } 825 826 AM.IndexReg = AM.Base.Reg = Reg; 827 return false; 828 } 829 } 830 break; 831 832 case ISD::ADD: 833 if (!AlreadySelected) { 834 X86ISelAddressMode Backup = AM; 835 if (!MatchAddress(N.Val->getOperand(0), AM, false, Depth+1) && 836 !MatchAddress(N.Val->getOperand(1), AM, false, Depth+1)) 837 return false; 838 AM = Backup; 839 if (!MatchAddress(N.Val->getOperand(1), AM, false, Depth+1) && 840 !MatchAddress(N.Val->getOperand(0), AM, false, Depth+1)) 841 return false; 842 AM = Backup; 843 } 844 break; 845 846 case ISD::OR: 847 // Handle "X | C" as "X + C" iff X is known to have C bits clear. 848 if (AlreadySelected) break; 849 850 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 851 X86ISelAddressMode Backup = AM; 852 // Start with the LHS as an addr mode. 853 if (!MatchAddress(N.getOperand(0), AM, false) && 854 // Address could not have picked a GV address for the displacement. 855 AM.GV == NULL && 856 // On x86-64, the resultant disp must fit in 32-bits. 857 isInt32(AM.Disp + CN->getSignExtended()) && 858 // Check to see if the LHS & C is zero. 859 CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { 860 AM.Disp += CN->getValue(); 861 return false; 862 } 863 AM = Backup; 864 } 865 break; 866 867 case ISD::AND: { 868 // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this 869 // allows us to fold the shift into this addressing mode. 870 if (AlreadySelected) break; 871 SDOperand Shift = N.getOperand(0); 872 if (Shift.getOpcode() != ISD::SHL) break; 873 874 // Scale must not be used already. 875 if (AM.IndexReg.Val != 0 || AM.Scale != 1) break; 876 877 // Not when RIP is used as the base. 878 if (AM.isRIPRel) break; 879 880 ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1)); 881 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); 882 if (!C1 || !C2) break; 883 884 // Not likely to be profitable if either the AND or SHIFT node has more 885 // than one use (unless all uses are for address computation). Besides, 886 // isel mechanism requires their node ids to be reused. 887 if (!N.hasOneUse() || !Shift.hasOneUse()) 888 break; 889 890 // Verify that the shift amount is something we can fold. 891 unsigned ShiftCst = C1->getValue(); 892 if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3) 893 break; 894 895 // Get the new AND mask, this folds to a constant. 896 SDOperand NewANDMask = CurDAG->getNode(ISD::SRL, N.getValueType(), 897 SDOperand(C2, 0), SDOperand(C1, 0)); 898 SDOperand NewAND = CurDAG->getNode(ISD::AND, N.getValueType(), 899 Shift.getOperand(0), NewANDMask); 900 NewANDMask.Val->setNodeId(Shift.Val->getNodeId()); 901 NewAND.Val->setNodeId(N.Val->getNodeId()); 902 903 AM.Scale = 1 << ShiftCst; 904 AM.IndexReg = NewAND; 905 return false; 906 } 907 } 908 909 return MatchAddressBase(N, AM, isRoot, Depth); 910} 911 912/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the 913/// specified addressing mode without any further recursion. 914bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, 915 bool isRoot, unsigned Depth) { 916 // Is the base register already occupied? 917 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { 918 // If so, check to see if the scale index register is set. 919 if (AM.IndexReg.Val == 0 && !AM.isRIPRel) { 920 AM.IndexReg = N; 921 AM.Scale = 1; 922 return false; 923 } 924 925 // Otherwise, we cannot select it. 926 return true; 927 } 928 929 // Default, generate it as a register. 930 AM.BaseType = X86ISelAddressMode::RegBase; 931 AM.Base.Reg = N; 932 return false; 933} 934 935/// SelectAddr - returns true if it is able pattern match an addressing mode. 936/// It returns the operands which make up the maximal addressing mode it can 937/// match by reference. 938bool X86DAGToDAGISel::SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, 939 SDOperand &Scale, SDOperand &Index, 940 SDOperand &Disp) { 941 X86ISelAddressMode AM; 942 if (MatchAddress(N, AM)) 943 return false; 944 945 MVT VT = N.getValueType(); 946 if (AM.BaseType == X86ISelAddressMode::RegBase) { 947 if (!AM.Base.Reg.Val) 948 AM.Base.Reg = CurDAG->getRegister(0, VT); 949 } 950 951 if (!AM.IndexReg.Val) 952 AM.IndexReg = CurDAG->getRegister(0, VT); 953 954 getAddressOperands(AM, Base, Scale, Index, Disp); 955 return true; 956} 957 958/// isZeroNode - Returns true if Elt is a constant zero or a floating point 959/// constant +0.0. 960static inline bool isZeroNode(SDOperand Elt) { 961 return ((isa<ConstantSDNode>(Elt) && 962 cast<ConstantSDNode>(Elt)->getValue() == 0) || 963 (isa<ConstantFPSDNode>(Elt) && 964 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 965} 966 967 968/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to 969/// match a load whose top elements are either undef or zeros. The load flavor 970/// is derived from the type of N, which is either v4f32 or v2f64. 971bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand Op, SDOperand Pred, 972 SDOperand N, SDOperand &Base, 973 SDOperand &Scale, SDOperand &Index, 974 SDOperand &Disp, SDOperand &InChain, 975 SDOperand &OutChain) { 976 if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) { 977 InChain = N.getOperand(0).getValue(1); 978 if (ISD::isNON_EXTLoad(InChain.Val) && 979 InChain.getValue(0).hasOneUse() && 980 N.hasOneUse() && 981 CanBeFoldedBy(N.Val, Pred.Val, Op.Val)) { 982 LoadSDNode *LD = cast<LoadSDNode>(InChain); 983 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 984 return false; 985 OutChain = LD->getChain(); 986 return true; 987 } 988 } 989 990 // Also handle the case where we explicitly require zeros in the top 991 // elements. This is a vector shuffle from the zero vector. 992 if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.Val->hasOneUse() && 993 // Check to see if the top elements are all zeros (or bitcast of zeros). 994 N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 995 N.getOperand(0).Val->hasOneUse() && 996 ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).Val) && 997 N.getOperand(0).getOperand(0).hasOneUse()) { 998 // Okay, this is a zero extending load. Fold it. 999 LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); 1000 if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp)) 1001 return false; 1002 OutChain = LD->getChain(); 1003 InChain = SDOperand(LD, 1); 1004 return true; 1005 } 1006 return false; 1007} 1008 1009 1010/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 1011/// mode it matches can be cost effectively emitted as an LEA instruction. 1012bool X86DAGToDAGISel::SelectLEAAddr(SDOperand Op, SDOperand N, 1013 SDOperand &Base, SDOperand &Scale, 1014 SDOperand &Index, SDOperand &Disp) { 1015 X86ISelAddressMode AM; 1016 if (MatchAddress(N, AM)) 1017 return false; 1018 1019 MVT VT = N.getValueType(); 1020 unsigned Complexity = 0; 1021 if (AM.BaseType == X86ISelAddressMode::RegBase) 1022 if (AM.Base.Reg.Val) 1023 Complexity = 1; 1024 else 1025 AM.Base.Reg = CurDAG->getRegister(0, VT); 1026 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 1027 Complexity = 4; 1028 1029 if (AM.IndexReg.Val) 1030 Complexity++; 1031 else 1032 AM.IndexReg = CurDAG->getRegister(0, VT); 1033 1034 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with 1035 // a simple shift. 1036 if (AM.Scale > 1) 1037 Complexity++; 1038 1039 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 1040 // to a LEA. This is determined with some expermentation but is by no means 1041 // optimal (especially for code size consideration). LEA is nice because of 1042 // its three-address nature. Tweak the cost function again when we can run 1043 // convertToThreeAddress() at register allocation time. 1044 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { 1045 // For X86-64, we should always use lea to materialize RIP relative 1046 // addresses. 1047 if (Subtarget->is64Bit()) 1048 Complexity = 4; 1049 else 1050 Complexity += 2; 1051 } 1052 1053 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val)) 1054 Complexity++; 1055 1056 if (Complexity > 2) { 1057 getAddressOperands(AM, Base, Scale, Index, Disp); 1058 return true; 1059 } 1060 return false; 1061} 1062 1063bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N, 1064 SDOperand &Base, SDOperand &Scale, 1065 SDOperand &Index, SDOperand &Disp) { 1066 if (ISD::isNON_EXTLoad(N.Val) && 1067 N.hasOneUse() && 1068 CanBeFoldedBy(N.Val, P.Val, P.Val)) 1069 return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp); 1070 return false; 1071} 1072 1073/// getGlobalBaseReg - Output the instructions required to put the 1074/// base address to use for accessing globals into a register. 1075/// 1076SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 1077 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); 1078 if (!GlobalBaseReg) { 1079 // Insert the set of GlobalBaseReg into the first MBB of the function 1080 MachineFunction *MF = BB->getParent(); 1081 MachineBasicBlock &FirstMBB = MF->front(); 1082 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 1083 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 1084 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1085 1086 const TargetInstrInfo *TII = TM.getInstrInfo(); 1087 // Operand of MovePCtoStack is completely ignored by asm printer. It's 1088 // only used in JIT code emission as displacement to pc. 1089 BuildMI(FirstMBB, MBBI, TII->get(X86::MOVPC32r), PC).addImm(0); 1090 1091 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 1092 // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external 1093 if (TM.getRelocationModel() == Reloc::PIC_ && 1094 Subtarget->isPICStyleGOT()) { 1095 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 1096 BuildMI(FirstMBB, MBBI, TII->get(X86::ADD32ri), GlobalBaseReg) 1097 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_"); 1098 } else { 1099 GlobalBaseReg = PC; 1100 } 1101 1102 } 1103 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val; 1104} 1105 1106static SDNode *FindCallStartFromCall(SDNode *Node) { 1107 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 1108 assert(Node->getOperand(0).getValueType() == MVT::Other && 1109 "Node doesn't have a token chain argument!"); 1110 return FindCallStartFromCall(Node->getOperand(0).Val); 1111} 1112 1113SDNode *X86DAGToDAGISel::getTruncate(SDOperand N0, MVT VT) { 1114 SDOperand SRIdx; 1115 switch (VT.getSimpleVT()) { 1116 default: assert(0 && "Unknown truncate!"); 1117 case MVT::i8: 1118 SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1119 // Ensure that the source register has an 8-bit subreg on 32-bit targets 1120 if (!Subtarget->is64Bit()) { 1121 unsigned Opc; 1122 MVT VT; 1123 switch (N0.getValueType().getSimpleVT()) { 1124 default: assert(0 && "Unknown truncate!"); 1125 case MVT::i16: 1126 Opc = X86::MOV16to16_; 1127 VT = MVT::i16; 1128 break; 1129 case MVT::i32: 1130 Opc = X86::MOV32to32_; 1131 VT = MVT::i32; 1132 break; 1133 } 1134 N0 = SDOperand(CurDAG->getTargetNode(Opc, VT, MVT::Flag, N0), 0); 1135 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1136 VT, N0, SRIdx, N0.getValue(1)); 1137 } 1138 break; 1139 case MVT::i16: 1140 SRIdx = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2 1141 break; 1142 case MVT::i32: 1143 SRIdx = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3 1144 break; 1145 } 1146 return CurDAG->getTargetNode(X86::EXTRACT_SUBREG, VT, N0, SRIdx); 1147} 1148 1149 1150SDNode *X86DAGToDAGISel::Select(SDOperand N) { 1151 SDNode *Node = N.Val; 1152 MVT NVT = Node->getValueType(0); 1153 unsigned Opc, MOpc; 1154 unsigned Opcode = Node->getOpcode(); 1155 1156#ifndef NDEBUG 1157 DOUT << std::string(Indent, ' ') << "Selecting: "; 1158 DEBUG(Node->dump(CurDAG)); 1159 DOUT << "\n"; 1160 Indent += 2; 1161#endif 1162 1163 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { 1164#ifndef NDEBUG 1165 DOUT << std::string(Indent-2, ' ') << "== "; 1166 DEBUG(Node->dump(CurDAG)); 1167 DOUT << "\n"; 1168 Indent -= 2; 1169#endif 1170 return NULL; // Already selected. 1171 } 1172 1173 switch (Opcode) { 1174 default: break; 1175 case X86ISD::GlobalBaseReg: 1176 return getGlobalBaseReg(); 1177 1178 case ISD::ADD: { 1179 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd 1180 // code and is matched first so to prevent it from being turned into 1181 // LEA32r X+c. 1182 // In 64-bit small code size mode, use LEA to take advantage of 1183 // RIP-relative addressing. 1184 if (TM.getCodeModel() != CodeModel::Small) 1185 break; 1186 MVT PtrVT = TLI.getPointerTy(); 1187 SDOperand N0 = N.getOperand(0); 1188 SDOperand N1 = N.getOperand(1); 1189 if (N.Val->getValueType(0) == PtrVT && 1190 N0.getOpcode() == X86ISD::Wrapper && 1191 N1.getOpcode() == ISD::Constant) { 1192 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue(); 1193 SDOperand C(0, 0); 1194 // TODO: handle ExternalSymbolSDNode. 1195 if (GlobalAddressSDNode *G = 1196 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { 1197 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, 1198 G->getOffset() + Offset); 1199 } else if (ConstantPoolSDNode *CP = 1200 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { 1201 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT, 1202 CP->getAlignment(), 1203 CP->getOffset()+Offset); 1204 } 1205 1206 if (C.Val) { 1207 if (Subtarget->is64Bit()) { 1208 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), 1209 CurDAG->getRegister(0, PtrVT), C }; 1210 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4); 1211 } else 1212 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C); 1213 } 1214 } 1215 1216 // Other cases are handled by auto-generated code. 1217 break; 1218 } 1219 1220 case ISD::SMUL_LOHI: 1221 case ISD::UMUL_LOHI: { 1222 SDOperand N0 = Node->getOperand(0); 1223 SDOperand N1 = Node->getOperand(1); 1224 1225 bool isSigned = Opcode == ISD::SMUL_LOHI; 1226 if (!isSigned) 1227 switch (NVT.getSimpleVT()) { 1228 default: assert(0 && "Unsupported VT!"); 1229 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 1230 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 1231 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 1232 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 1233 } 1234 else 1235 switch (NVT.getSimpleVT()) { 1236 default: assert(0 && "Unsupported VT!"); 1237 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 1238 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 1239 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 1240 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 1241 } 1242 1243 unsigned LoReg, HiReg; 1244 switch (NVT.getSimpleVT()) { 1245 default: assert(0 && "Unsupported VT!"); 1246 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 1247 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 1248 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 1249 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 1250 } 1251 1252 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1253 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1254 // multiplty is commmutative 1255 if (!foldedLoad) { 1256 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); 1257 if (foldedLoad) 1258 std::swap(N0, N1); 1259 } 1260 1261 AddToISelQueue(N0); 1262 SDOperand InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), LoReg, 1263 N0, SDOperand()).getValue(1); 1264 1265 if (foldedLoad) { 1266 AddToISelQueue(N1.getOperand(0)); 1267 AddToISelQueue(Tmp0); 1268 AddToISelQueue(Tmp1); 1269 AddToISelQueue(Tmp2); 1270 AddToISelQueue(Tmp3); 1271 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1272 SDNode *CNode = 1273 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1274 InFlag = SDOperand(CNode, 1); 1275 // Update the chain. 1276 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1277 } else { 1278 AddToISelQueue(N1); 1279 InFlag = 1280 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1281 } 1282 1283 // Copy the low half of the result, if it is needed. 1284 if (!N.getValue(0).use_empty()) { 1285 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1286 LoReg, NVT, InFlag); 1287 InFlag = Result.getValue(2); 1288 ReplaceUses(N.getValue(0), Result); 1289#ifndef NDEBUG 1290 DOUT << std::string(Indent-2, ' ') << "=> "; 1291 DEBUG(Result.Val->dump(CurDAG)); 1292 DOUT << "\n"; 1293#endif 1294 } 1295 // Copy the high half of the result, if it is needed. 1296 if (!N.getValue(1).use_empty()) { 1297 SDOperand Result; 1298 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1299 // Prevent use of AH in a REX instruction by referencing AX instead. 1300 // Shift it down 8 bits. 1301 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1302 X86::AX, MVT::i16, InFlag); 1303 InFlag = Result.getValue(2); 1304 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1305 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1306 // Then truncate it down to i8. 1307 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1308 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1309 MVT::i8, Result, SRIdx), 0); 1310 } else { 1311 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1312 HiReg, NVT, InFlag); 1313 InFlag = Result.getValue(2); 1314 } 1315 ReplaceUses(N.getValue(1), Result); 1316#ifndef NDEBUG 1317 DOUT << std::string(Indent-2, ' ') << "=> "; 1318 DEBUG(Result.Val->dump(CurDAG)); 1319 DOUT << "\n"; 1320#endif 1321 } 1322 1323#ifndef NDEBUG 1324 Indent -= 2; 1325#endif 1326 1327 return NULL; 1328 } 1329 1330 case ISD::SDIVREM: 1331 case ISD::UDIVREM: { 1332 SDOperand N0 = Node->getOperand(0); 1333 SDOperand N1 = Node->getOperand(1); 1334 1335 bool isSigned = Opcode == ISD::SDIVREM; 1336 if (!isSigned) 1337 switch (NVT.getSimpleVT()) { 1338 default: assert(0 && "Unsupported VT!"); 1339 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 1340 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 1341 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 1342 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 1343 } 1344 else 1345 switch (NVT.getSimpleVT()) { 1346 default: assert(0 && "Unsupported VT!"); 1347 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 1348 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 1349 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1350 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1351 } 1352 1353 unsigned LoReg, HiReg; 1354 unsigned ClrOpcode, SExtOpcode; 1355 switch (NVT.getSimpleVT()) { 1356 default: assert(0 && "Unsupported VT!"); 1357 case MVT::i8: 1358 LoReg = X86::AL; HiReg = X86::AH; 1359 ClrOpcode = 0; 1360 SExtOpcode = X86::CBW; 1361 break; 1362 case MVT::i16: 1363 LoReg = X86::AX; HiReg = X86::DX; 1364 ClrOpcode = X86::MOV16r0; 1365 SExtOpcode = X86::CWD; 1366 break; 1367 case MVT::i32: 1368 LoReg = X86::EAX; HiReg = X86::EDX; 1369 ClrOpcode = X86::MOV32r0; 1370 SExtOpcode = X86::CDQ; 1371 break; 1372 case MVT::i64: 1373 LoReg = X86::RAX; HiReg = X86::RDX; 1374 ClrOpcode = X86::MOV64r0; 1375 SExtOpcode = X86::CQO; 1376 break; 1377 } 1378 1379 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1380 bool foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1381 1382 SDOperand InFlag; 1383 if (NVT == MVT::i8 && !isSigned) { 1384 // Special case for div8, just use a move with zero extension to AX to 1385 // clear the upper 8 bits (AH). 1386 SDOperand Tmp0, Tmp1, Tmp2, Tmp3, Move, Chain; 1387 if (TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3)) { 1388 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N0.getOperand(0) }; 1389 AddToISelQueue(N0.getOperand(0)); 1390 AddToISelQueue(Tmp0); 1391 AddToISelQueue(Tmp1); 1392 AddToISelQueue(Tmp2); 1393 AddToISelQueue(Tmp3); 1394 Move = 1395 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rm8, MVT::i16, MVT::Other, 1396 Ops, 5), 0); 1397 Chain = Move.getValue(1); 1398 ReplaceUses(N0.getValue(1), Chain); 1399 } else { 1400 AddToISelQueue(N0); 1401 Move = 1402 SDOperand(CurDAG->getTargetNode(X86::MOVZX16rr8, MVT::i16, N0), 0); 1403 Chain = CurDAG->getEntryNode(); 1404 } 1405 Chain = CurDAG->getCopyToReg(Chain, X86::AX, Move, SDOperand()); 1406 InFlag = Chain.getValue(1); 1407 } else { 1408 AddToISelQueue(N0); 1409 InFlag = 1410 CurDAG->getCopyToReg(CurDAG->getEntryNode(), 1411 LoReg, N0, SDOperand()).getValue(1); 1412 if (isSigned) { 1413 // Sign extend the low part into the high part. 1414 InFlag = 1415 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); 1416 } else { 1417 // Zero out the high part, effectively zero extending the input. 1418 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0); 1419 InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), HiReg, 1420 ClrNode, InFlag).getValue(1); 1421 } 1422 } 1423 1424 if (foldedLoad) { 1425 AddToISelQueue(N1.getOperand(0)); 1426 AddToISelQueue(Tmp0); 1427 AddToISelQueue(Tmp1); 1428 AddToISelQueue(Tmp2); 1429 AddToISelQueue(Tmp3); 1430 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, N1.getOperand(0), InFlag }; 1431 SDNode *CNode = 1432 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1433 InFlag = SDOperand(CNode, 1); 1434 // Update the chain. 1435 ReplaceUses(N1.getValue(1), SDOperand(CNode, 0)); 1436 } else { 1437 AddToISelQueue(N1); 1438 InFlag = 1439 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1440 } 1441 1442 // Copy the division (low) result, if it is needed. 1443 if (!N.getValue(0).use_empty()) { 1444 SDOperand Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1445 LoReg, NVT, InFlag); 1446 InFlag = Result.getValue(2); 1447 ReplaceUses(N.getValue(0), Result); 1448#ifndef NDEBUG 1449 DOUT << std::string(Indent-2, ' ') << "=> "; 1450 DEBUG(Result.Val->dump(CurDAG)); 1451 DOUT << "\n"; 1452#endif 1453 } 1454 // Copy the remainder (high) result, if it is needed. 1455 if (!N.getValue(1).use_empty()) { 1456 SDOperand Result; 1457 if (HiReg == X86::AH && Subtarget->is64Bit()) { 1458 // Prevent use of AH in a REX instruction by referencing AX instead. 1459 // Shift it down 8 bits. 1460 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1461 X86::AX, MVT::i16, InFlag); 1462 InFlag = Result.getValue(2); 1463 Result = SDOperand(CurDAG->getTargetNode(X86::SHR16ri, MVT::i16, Result, 1464 CurDAG->getTargetConstant(8, MVT::i8)), 0); 1465 // Then truncate it down to i8. 1466 SDOperand SRIdx = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1 1467 Result = SDOperand(CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 1468 MVT::i8, Result, SRIdx), 0); 1469 } else { 1470 Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), 1471 HiReg, NVT, InFlag); 1472 InFlag = Result.getValue(2); 1473 } 1474 ReplaceUses(N.getValue(1), Result); 1475#ifndef NDEBUG 1476 DOUT << std::string(Indent-2, ' ') << "=> "; 1477 DEBUG(Result.Val->dump(CurDAG)); 1478 DOUT << "\n"; 1479#endif 1480 } 1481 1482#ifndef NDEBUG 1483 Indent -= 2; 1484#endif 1485 1486 return NULL; 1487 } 1488 1489 case ISD::ANY_EXTEND: { 1490 // Check if the type extended to supports subregs. 1491 if (NVT == MVT::i8) 1492 break; 1493 1494 SDOperand N0 = Node->getOperand(0); 1495 // Get the subregsiter index for the type to extend. 1496 MVT N0VT = N0.getValueType(); 1497 unsigned Idx = (N0VT == MVT::i32) ? X86::SUBREG_32BIT : 1498 (N0VT == MVT::i16) ? X86::SUBREG_16BIT : 1499 (Subtarget->is64Bit()) ? X86::SUBREG_8BIT : 0; 1500 1501 // If we don't have a subreg Idx, let generated ISel have a try. 1502 if (Idx == 0) 1503 break; 1504 1505 // If we have an index, generate an insert_subreg into undef. 1506 AddToISelQueue(N0); 1507 SDOperand Undef = 1508 SDOperand(CurDAG->getTargetNode(X86::IMPLICIT_DEF, NVT), 0); 1509 SDOperand SRIdx = CurDAG->getTargetConstant(Idx, MVT::i32); 1510 SDNode *ResNode = CurDAG->getTargetNode(X86::INSERT_SUBREG, 1511 NVT, Undef, N0, SRIdx); 1512 1513#ifndef NDEBUG 1514 DOUT << std::string(Indent-2, ' ') << "=> "; 1515 DEBUG(ResNode->dump(CurDAG)); 1516 DOUT << "\n"; 1517 Indent -= 2; 1518#endif 1519 return ResNode; 1520 } 1521 1522 case ISD::SIGN_EXTEND_INREG: { 1523 SDOperand N0 = Node->getOperand(0); 1524 AddToISelQueue(N0); 1525 1526 MVT SVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1527 SDOperand TruncOp = SDOperand(getTruncate(N0, SVT), 0); 1528 unsigned Opc = 0; 1529 switch (NVT.getSimpleVT()) { 1530 default: assert(0 && "Unknown sign_extend_inreg!"); 1531 case MVT::i16: 1532 if (SVT == MVT::i8) Opc = X86::MOVSX16rr8; 1533 else assert(0 && "Unknown sign_extend_inreg!"); 1534 break; 1535 case MVT::i32: 1536 switch (SVT.getSimpleVT()) { 1537 default: assert(0 && "Unknown sign_extend_inreg!"); 1538 case MVT::i8: Opc = X86::MOVSX32rr8; break; 1539 case MVT::i16: Opc = X86::MOVSX32rr16; break; 1540 } 1541 break; 1542 case MVT::i64: 1543 switch (SVT.getSimpleVT()) { 1544 default: assert(0 && "Unknown sign_extend_inreg!"); 1545 case MVT::i8: Opc = X86::MOVSX64rr8; break; 1546 case MVT::i16: Opc = X86::MOVSX64rr16; break; 1547 case MVT::i32: Opc = X86::MOVSX64rr32; break; 1548 } 1549 break; 1550 } 1551 1552 SDNode *ResNode = CurDAG->getTargetNode(Opc, NVT, TruncOp); 1553 1554#ifndef NDEBUG 1555 DOUT << std::string(Indent-2, ' ') << "=> "; 1556 DEBUG(TruncOp.Val->dump(CurDAG)); 1557 DOUT << "\n"; 1558 DOUT << std::string(Indent-2, ' ') << "=> "; 1559 DEBUG(ResNode->dump(CurDAG)); 1560 DOUT << "\n"; 1561 Indent -= 2; 1562#endif 1563 return ResNode; 1564 break; 1565 } 1566 1567 case ISD::TRUNCATE: { 1568 SDOperand Input = Node->getOperand(0); 1569 AddToISelQueue(Node->getOperand(0)); 1570 SDNode *ResNode = getTruncate(Input, NVT); 1571 1572#ifndef NDEBUG 1573 DOUT << std::string(Indent-2, ' ') << "=> "; 1574 DEBUG(ResNode->dump(CurDAG)); 1575 DOUT << "\n"; 1576 Indent -= 2; 1577#endif 1578 return ResNode; 1579 break; 1580 } 1581 } 1582 1583 SDNode *ResNode = SelectCode(N); 1584 1585#ifndef NDEBUG 1586 DOUT << std::string(Indent-2, ' ') << "=> "; 1587 if (ResNode == NULL || ResNode == N.Val) 1588 DEBUG(N.Val->dump(CurDAG)); 1589 else 1590 DEBUG(ResNode->dump(CurDAG)); 1591 DOUT << "\n"; 1592 Indent -= 2; 1593#endif 1594 1595 return ResNode; 1596} 1597 1598bool X86DAGToDAGISel:: 1599SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode, 1600 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){ 1601 SDOperand Op0, Op1, Op2, Op3; 1602 switch (ConstraintCode) { 1603 case 'o': // offsetable ?? 1604 case 'v': // not offsetable ?? 1605 default: return true; 1606 case 'm': // memory 1607 if (!SelectAddr(Op, Op, Op0, Op1, Op2, Op3)) 1608 return true; 1609 break; 1610 } 1611 1612 OutOps.push_back(Op0); 1613 OutOps.push_back(Op1); 1614 OutOps.push_back(Op2); 1615 OutOps.push_back(Op3); 1616 AddToISelQueue(Op0); 1617 AddToISelQueue(Op1); 1618 AddToISelQueue(Op2); 1619 AddToISelQueue(Op3); 1620 return false; 1621} 1622 1623/// createX86ISelDag - This pass converts a legalized DAG into a 1624/// X86-specific DAG, ready for instruction scheduling. 1625/// 1626FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) { 1627 return new X86DAGToDAGISel(TM, Fast); 1628} 1629