X86ISelDAGToDAG.cpp revision c45a2c72ccf57e84a18beb898740fe5b8fe0d2c8
1//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the Evan Cheng and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines a DAG pattern matching instruction selector for X86, 11// converting from a legalized dag to a X86 dag. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "x86-isel" 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86RegisterInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/GlobalValue.h" 23#include "llvm/Instructions.h" 24#include "llvm/Intrinsics.h" 25#include "llvm/Support/CFG.h" 26#include "llvm/CodeGen/MachineConstantPool.h" 27#include "llvm/CodeGen/MachineFunction.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineInstrBuilder.h" 30#include "llvm/CodeGen/SSARegMap.h" 31#include "llvm/CodeGen/SelectionDAGISel.h" 32#include "llvm/Target/TargetMachine.h" 33#include "llvm/Support/Compiler.h" 34#include "llvm/Support/Debug.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/ADT/Statistic.h" 37#include <deque> 38#include <iostream> 39#include <queue> 40#include <set> 41using namespace llvm; 42 43//===----------------------------------------------------------------------===// 44// Pattern Matcher Implementation 45//===----------------------------------------------------------------------===// 46 47namespace { 48 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses 49 /// SDOperand's instead of register numbers for the leaves of the matched 50 /// tree. 51 struct X86ISelAddressMode { 52 enum { 53 RegBase, 54 FrameIndexBase 55 } BaseType; 56 57 struct { // This is really a union, discriminated by BaseType! 58 SDOperand Reg; 59 int FrameIndex; 60 } Base; 61 62 bool isRIPRel; // RIP relative? 63 unsigned Scale; 64 SDOperand IndexReg; 65 unsigned Disp; 66 GlobalValue *GV; 67 Constant *CP; 68 const char *ES; 69 int JT; 70 unsigned Align; // CP alignment. 71 72 X86ISelAddressMode() 73 : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0), 74 GV(0), CP(0), ES(0), JT(-1), Align(0) { 75 } 76 }; 77} 78 79namespace { 80 Statistic<> 81 NumFPKill("x86-codegen", "Number of FP_REG_KILL instructions added"); 82 83 Statistic<> 84 NumLoadMoved("x86-codegen", "Number of loads moved below TokenFactor"); 85 86 //===--------------------------------------------------------------------===// 87 /// ISel - X86 specific code to select X86 machine instructions for 88 /// SelectionDAG operations. 89 /// 90 class VISIBILITY_HIDDEN X86DAGToDAGISel : public SelectionDAGISel { 91 /// ContainsFPCode - Every instruction we select that uses or defines a FP 92 /// register should set this to true. 93 bool ContainsFPCode; 94 95 /// FastISel - Enable fast(er) instruction selection. 96 /// 97 bool FastISel; 98 99 /// TM - Keep a reference to X86TargetMachine. 100 /// 101 X86TargetMachine &TM; 102 103 /// X86Lowering - This object fully describes how to lower LLVM code to an 104 /// X86-specific SelectionDAG. 105 X86TargetLowering X86Lowering; 106 107 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 108 /// make the right decision when generating code for different targets. 109 const X86Subtarget *Subtarget; 110 111 /// GlobalBaseReg - keeps track of the virtual register mapped onto global 112 /// base register. 113 unsigned GlobalBaseReg; 114 115 public: 116 X86DAGToDAGISel(X86TargetMachine &tm, bool fast) 117 : SelectionDAGISel(X86Lowering), 118 ContainsFPCode(false), FastISel(fast), TM(tm), 119 X86Lowering(*TM.getTargetLowering()), 120 Subtarget(&TM.getSubtarget<X86Subtarget>()) {} 121 122 virtual bool runOnFunction(Function &Fn) { 123 // Make sure we re-emit a set of the global base reg if necessary 124 GlobalBaseReg = 0; 125 return SelectionDAGISel::runOnFunction(Fn); 126 } 127 128 virtual const char *getPassName() const { 129 return "X86 DAG->DAG Instruction Selection"; 130 } 131 132 /// InstructionSelectBasicBlock - This callback is invoked by 133 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. 134 virtual void InstructionSelectBasicBlock(SelectionDAG &DAG); 135 136 virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); 137 138 virtual bool CanBeFoldedBy(SDNode *N, SDNode *U); 139 140// Include the pieces autogenerated from the target description. 141#include "X86GenDAGISel.inc" 142 143 private: 144 SDNode *Select(SDOperand N); 145 146 bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true); 147 bool SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, 148 SDOperand &Index, SDOperand &Disp); 149 bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, 150 SDOperand &Index, SDOperand &Disp); 151 bool TryFoldLoad(SDOperand P, SDOperand N, 152 SDOperand &Base, SDOperand &Scale, 153 SDOperand &Index, SDOperand &Disp); 154 void InstructionSelectPreprocess(SelectionDAG &DAG); 155 156 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 157 /// inline asm expressions. 158 virtual bool SelectInlineAsmMemoryOperand(const SDOperand &Op, 159 char ConstraintCode, 160 std::vector<SDOperand> &OutOps, 161 SelectionDAG &DAG); 162 163 void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI); 164 165 inline void getAddressOperands(X86ISelAddressMode &AM, SDOperand &Base, 166 SDOperand &Scale, SDOperand &Index, 167 SDOperand &Disp) { 168 Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? 169 CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : 170 AM.Base.Reg; 171 Scale = getI8Imm(AM.Scale); 172 Index = AM.IndexReg; 173 // These are 32-bit even in 64-bit mode since RIP relative offset 174 // is 32-bit. 175 if (AM.GV) 176 Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp); 177 else if (AM.CP) 178 Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Align, AM.Disp); 179 else if (AM.ES) 180 Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32); 181 else if (AM.JT != -1) 182 Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32); 183 else 184 Disp = getI32Imm(AM.Disp); 185 } 186 187 /// getI8Imm - Return a target constant with the specified value, of type 188 /// i8. 189 inline SDOperand getI8Imm(unsigned Imm) { 190 return CurDAG->getTargetConstant(Imm, MVT::i8); 191 } 192 193 /// getI16Imm - Return a target constant with the specified value, of type 194 /// i16. 195 inline SDOperand getI16Imm(unsigned Imm) { 196 return CurDAG->getTargetConstant(Imm, MVT::i16); 197 } 198 199 /// getI32Imm - Return a target constant with the specified value, of type 200 /// i32. 201 inline SDOperand getI32Imm(unsigned Imm) { 202 return CurDAG->getTargetConstant(Imm, MVT::i32); 203 } 204 205 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 206 /// base register. Return the virtual register that holds this value. 207 SDNode *getGlobalBaseReg(); 208 209#ifndef NDEBUG 210 unsigned Indent; 211#endif 212 }; 213} 214 215static void findNonImmUse(SDNode* Use, SDNode* Def, bool &found, 216 std::set<SDNode *> &Visited) { 217 if (found || 218 Use->getNodeId() > Def->getNodeId() || 219 !Visited.insert(Use).second) 220 return; 221 222 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { 223 SDNode *N = Use->getOperand(i).Val; 224 if (N != Def) { 225 findNonImmUse(N, Def, found, Visited); 226 } else { 227 found = true; 228 break; 229 } 230 } 231} 232 233static inline bool isNonImmUse(SDNode* Use, SDNode* Def) { 234 std::set<SDNode *> Visited; 235 bool found = false; 236 for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { 237 SDNode *N = Use->getOperand(i).Val; 238 if (N != Def) { 239 findNonImmUse(N, Def, found, Visited); 240 if (found) break; 241 } 242 } 243 return found; 244} 245 246 247bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U) { 248 // If U use can somehow reach N through another path then U can't fold N or 249 // it will create a cycle. e.g. In the following diagram, U can reach N 250 // through X. If N is folded into into U, then X is both a predecessor and 251 // a successor of U. 252 // 253 // [ N ] 254 // ^ ^ 255 // | | 256 // / \--- 257 // / [X] 258 // | ^ 259 // [U]--------| 260 return !FastISel && !isNonImmUse(U, N); 261} 262 263/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand 264/// and move load below the TokenFactor. Replace store's chain operand with 265/// load's chain result. 266static void MoveBelowTokenFactor(SelectionDAG &DAG, SDOperand Load, 267 SDOperand Store, SDOperand TF) { 268 std::vector<SDOperand> Ops; 269 for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i) 270 if (Load.Val == TF.Val->getOperand(i).Val) 271 Ops.push_back(Load.Val->getOperand(0)); 272 else 273 Ops.push_back(TF.Val->getOperand(i)); 274 DAG.UpdateNodeOperands(TF, &Ops[0], Ops.size()); 275 DAG.UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); 276 DAG.UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), 277 Store.getOperand(2), Store.getOperand(3)); 278} 279 280/// InstructionSelectPreprocess - Preprocess the DAG to allow the instruction 281/// selector to pick more load-modify-store instructions. This is a common 282/// case: 283/// 284/// [Load chain] 285/// ^ 286/// | 287/// [Load] 288/// ^ ^ 289/// | | 290/// / \- 291/// / | 292/// [TokenFactor] [Op] 293/// ^ ^ 294/// | | 295/// \ / 296/// \ / 297/// [Store] 298/// 299/// The fact the store's chain operand != load's chain will prevent the 300/// (store (op (load))) instruction from being selected. We can transform it to: 301/// 302/// [Load chain] 303/// ^ 304/// | 305/// [TokenFactor] 306/// ^ 307/// | 308/// [Load] 309/// ^ ^ 310/// | | 311/// | \- 312/// | | 313/// | [Op] 314/// | ^ 315/// | | 316/// \ / 317/// \ / 318/// [Store] 319void X86DAGToDAGISel::InstructionSelectPreprocess(SelectionDAG &DAG) { 320 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 321 E = DAG.allnodes_end(); I != E; ++I) { 322 if (I->getOpcode() != ISD::STORE) 323 continue; 324 SDOperand Chain = I->getOperand(0); 325 if (Chain.Val->getOpcode() != ISD::TokenFactor) 326 continue; 327 328 SDOperand N1 = I->getOperand(1); 329 SDOperand N2 = I->getOperand(2); 330 if (MVT::isFloatingPoint(N1.getValueType()) || 331 MVT::isVector(N1.getValueType()) || 332 !N1.hasOneUse()) 333 continue; 334 335 bool RModW = false; 336 SDOperand Load; 337 unsigned Opcode = N1.Val->getOpcode(); 338 switch (Opcode) { 339 case ISD::ADD: 340 case ISD::MUL: 341 case ISD::AND: 342 case ISD::OR: 343 case ISD::XOR: 344 case ISD::ADDC: 345 case ISD::ADDE: { 346 SDOperand N10 = N1.getOperand(0); 347 SDOperand N11 = N1.getOperand(1); 348 if (N10.Val->getOpcode() == ISD::LOAD) 349 RModW = true; 350 else if (N11.Val->getOpcode() == ISD::LOAD) { 351 RModW = true; 352 std::swap(N10, N11); 353 } 354 RModW = RModW && N10.Val->isOperand(Chain.Val) && N10.hasOneUse() && 355 (N10.getOperand(1) == N2) && 356 (N10.Val->getValueType(0) == N1.getValueType()); 357 if (RModW) 358 Load = N10; 359 break; 360 } 361 case ISD::SUB: 362 case ISD::SHL: 363 case ISD::SRA: 364 case ISD::SRL: 365 case ISD::ROTL: 366 case ISD::ROTR: 367 case ISD::SUBC: 368 case ISD::SUBE: 369 case X86ISD::SHLD: 370 case X86ISD::SHRD: { 371 SDOperand N10 = N1.getOperand(0); 372 if (N10.Val->getOpcode() == ISD::LOAD) 373 RModW = N10.Val->isOperand(Chain.Val) && N10.hasOneUse() && 374 (N10.getOperand(1) == N2) && 375 (N10.Val->getValueType(0) == N1.getValueType()); 376 if (RModW) 377 Load = N10; 378 break; 379 } 380 } 381 382 if (RModW) { 383 MoveBelowTokenFactor(DAG, Load, SDOperand(I, 0), Chain); 384 ++NumLoadMoved; 385 } 386 } 387} 388 389/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel 390/// when it has created a SelectionDAG for us to codegen. 391void X86DAGToDAGISel::InstructionSelectBasicBlock(SelectionDAG &DAG) { 392 DEBUG(BB->dump()); 393 MachineFunction::iterator FirstMBB = BB; 394 395 if (!FastISel) 396 InstructionSelectPreprocess(DAG); 397 398 // Codegen the basic block. 399#ifndef NDEBUG 400 DEBUG(std::cerr << "===== Instruction selection begins:\n"); 401 Indent = 0; 402#endif 403 DAG.setRoot(SelectRoot(DAG.getRoot())); 404#ifndef NDEBUG 405 DEBUG(std::cerr << "===== Instruction selection ends:\n"); 406#endif 407 408 DAG.RemoveDeadNodes(); 409 410 // Emit machine code to BB. 411 ScheduleAndEmitDAG(DAG); 412 413 // If we are emitting FP stack code, scan the basic block to determine if this 414 // block defines any FP values. If so, put an FP_REG_KILL instruction before 415 // the terminator of the block. 416 if (!Subtarget->hasSSE2()) { 417 // Note that FP stack instructions *are* used in SSE code when returning 418 // values, but these are not live out of the basic block, so we don't need 419 // an FP_REG_KILL in this case either. 420 bool ContainsFPCode = false; 421 422 // Scan all of the machine instructions in these MBBs, checking for FP 423 // stores. 424 MachineFunction::iterator MBBI = FirstMBB; 425 do { 426 for (MachineBasicBlock::iterator I = MBBI->begin(), E = MBBI->end(); 427 !ContainsFPCode && I != E; ++I) { 428 for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { 429 if (I->getOperand(op).isRegister() && I->getOperand(op).isDef() && 430 MRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) && 431 RegMap->getRegClass(I->getOperand(0).getReg()) == 432 X86::RFPRegisterClass) { 433 ContainsFPCode = true; 434 break; 435 } 436 } 437 } 438 } while (!ContainsFPCode && &*(MBBI++) != BB); 439 440 // Check PHI nodes in successor blocks. These PHI's will be lowered to have 441 // a copy of the input value in this block. 442 if (!ContainsFPCode) { 443 // Final check, check LLVM BB's that are successors to the LLVM BB 444 // corresponding to BB for FP PHI nodes. 445 const BasicBlock *LLVMBB = BB->getBasicBlock(); 446 const PHINode *PN; 447 for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB); 448 !ContainsFPCode && SI != E; ++SI) { 449 for (BasicBlock::const_iterator II = SI->begin(); 450 (PN = dyn_cast<PHINode>(II)); ++II) { 451 if (PN->getType()->isFloatingPoint()) { 452 ContainsFPCode = true; 453 break; 454 } 455 } 456 } 457 } 458 459 // Finally, if we found any FP code, emit the FP_REG_KILL instruction. 460 if (ContainsFPCode) { 461 BuildMI(*BB, BB->getFirstTerminator(), X86::FP_REG_KILL, 0); 462 ++NumFPKill; 463 } 464 } 465} 466 467/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in 468/// the main function. 469void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, 470 MachineFrameInfo *MFI) { 471 if (Subtarget->TargetType == X86Subtarget::isCygwin) 472 BuildMI(BB, X86::CALLpcrel32, 1).addExternalSymbol("__main"); 473 474 // Switch the FPU to 64-bit precision mode for better compatibility and speed. 475 int CWFrameIdx = MFI->CreateStackObject(2, 2); 476 addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx); 477 478 // Set the high part to be 64-bit precision. 479 addFrameReference(BuildMI(BB, X86::MOV8mi, 5), 480 CWFrameIdx, 1).addImm(2); 481 482 // Reload the modified control word now. 483 addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx); 484} 485 486void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { 487 // If this is main, emit special code for main. 488 MachineBasicBlock *BB = MF.begin(); 489 if (Fn.hasExternalLinkage() && Fn.getName() == "main") 490 EmitSpecialCodeForMain(BB, MF.getFrameInfo()); 491} 492 493/// MatchAddress - Add the specified node to the specified addressing mode, 494/// returning true if it cannot be done. This just pattern matches for the 495/// addressing mode 496bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, 497 bool isRoot) { 498 // RIP relative addressing: %rip + 32-bit displacement! 499 if (AM.isRIPRel) { 500 if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) { 501 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 502 if (isInt32(AM.Disp + Val)) { 503 AM.Disp += Val; 504 return false; 505 } 506 } 507 return true; 508 } 509 510 int id = N.Val->getNodeId(); 511 bool Available = isSelected(id); 512 513 switch (N.getOpcode()) { 514 default: break; 515 case ISD::Constant: { 516 int64_t Val = cast<ConstantSDNode>(N)->getSignExtended(); 517 if (isInt32(AM.Disp + Val)) { 518 AM.Disp += Val; 519 return false; 520 } 521 break; 522 } 523 524 case X86ISD::Wrapper: 525 // If value is available in a register both base and index components have 526 // been picked, we can't fit the result available in the register in the 527 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement. 528 529 // Can't fit GV or CP in addressing mode for X86-64 medium or large code 530 // model since the displacement field is 32-bit. Ok for small code model. 531 532 // For X86-64 PIC code, only allow GV / CP + displacement so we can use RIP 533 // relative addressing mode. 534 if ((!Subtarget->is64Bit() || TM.getCodeModel() == CodeModel::Small) && 535 (!Available || (AM.Base.Reg.Val && AM.IndexReg.Val))) { 536 bool isRIP = Subtarget->is64Bit(); 537 if (isRIP && (AM.Base.Reg.Val || AM.Scale > 1 || AM.IndexReg.Val || 538 AM.BaseType == X86ISelAddressMode::FrameIndexBase)) 539 break; 540 if (ConstantPoolSDNode *CP = 541 dyn_cast<ConstantPoolSDNode>(N.getOperand(0))) { 542 if (AM.CP == 0) { 543 AM.CP = CP->getConstVal(); 544 AM.Align = CP->getAlignment(); 545 AM.Disp += CP->getOffset(); 546 if (isRIP) 547 AM.isRIPRel = true; 548 return false; 549 } 550 } else if (GlobalAddressSDNode *G = 551 dyn_cast<GlobalAddressSDNode>(N.getOperand(0))) { 552 if (AM.GV == 0) { 553 AM.GV = G->getGlobal(); 554 AM.Disp += G->getOffset(); 555 if (isRIP) 556 AM.isRIPRel = true; 557 return false; 558 } 559 } else if (isRoot && isRIP) { 560 if (ExternalSymbolSDNode *S = 561 dyn_cast<ExternalSymbolSDNode>(N.getOperand(0))) { 562 AM.ES = S->getSymbol(); 563 AM.isRIPRel = true; 564 return false; 565 } else if (JumpTableSDNode *J = 566 dyn_cast<JumpTableSDNode>(N.getOperand(0))) { 567 AM.JT = J->getIndex(); 568 AM.isRIPRel = true; 569 return false; 570 } 571 } 572 } 573 break; 574 575 case ISD::FrameIndex: 576 if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base.Reg.Val == 0) { 577 AM.BaseType = X86ISelAddressMode::FrameIndexBase; 578 AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); 579 return false; 580 } 581 break; 582 583 case ISD::SHL: 584 if (!Available && AM.IndexReg.Val == 0 && AM.Scale == 1) 585 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) { 586 unsigned Val = CN->getValue(); 587 if (Val == 1 || Val == 2 || Val == 3) { 588 AM.Scale = 1 << Val; 589 SDOperand ShVal = N.Val->getOperand(0); 590 591 // Okay, we know that we have a scale by now. However, if the scaled 592 // value is an add of something and a constant, we can fold the 593 // constant into the disp field here. 594 if (ShVal.Val->getOpcode() == ISD::ADD && ShVal.hasOneUse() && 595 isa<ConstantSDNode>(ShVal.Val->getOperand(1))) { 596 AM.IndexReg = ShVal.Val->getOperand(0); 597 ConstantSDNode *AddVal = 598 cast<ConstantSDNode>(ShVal.Val->getOperand(1)); 599 uint64_t Disp = AM.Disp + AddVal->getValue() << Val; 600 if (isInt32(Disp)) 601 AM.Disp = Disp; 602 else 603 AM.IndexReg = ShVal; 604 } else { 605 AM.IndexReg = ShVal; 606 } 607 return false; 608 } 609 } 610 break; 611 612 case ISD::MUL: 613 // X*[3,5,9] -> X+X*[2,4,8] 614 if (!Available && 615 AM.BaseType == X86ISelAddressMode::RegBase && 616 AM.Base.Reg.Val == 0 && 617 AM.IndexReg.Val == 0) 618 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1))) 619 if (CN->getValue() == 3 || CN->getValue() == 5 || CN->getValue() == 9) { 620 AM.Scale = unsigned(CN->getValue())-1; 621 622 SDOperand MulVal = N.Val->getOperand(0); 623 SDOperand Reg; 624 625 // Okay, we know that we have a scale by now. However, if the scaled 626 // value is an add of something and a constant, we can fold the 627 // constant into the disp field here. 628 if (MulVal.Val->getOpcode() == ISD::ADD && MulVal.hasOneUse() && 629 isa<ConstantSDNode>(MulVal.Val->getOperand(1))) { 630 Reg = MulVal.Val->getOperand(0); 631 ConstantSDNode *AddVal = 632 cast<ConstantSDNode>(MulVal.Val->getOperand(1)); 633 uint64_t Disp = AM.Disp + AddVal->getValue() * CN->getValue(); 634 if (isInt32(Disp)) 635 AM.Disp = Disp; 636 else 637 Reg = N.Val->getOperand(0); 638 } else { 639 Reg = N.Val->getOperand(0); 640 } 641 642 AM.IndexReg = AM.Base.Reg = Reg; 643 return false; 644 } 645 break; 646 647 case ISD::ADD: { 648 if (!Available) { 649 X86ISelAddressMode Backup = AM; 650 if (!MatchAddress(N.Val->getOperand(0), AM, false) && 651 !MatchAddress(N.Val->getOperand(1), AM, false)) 652 return false; 653 AM = Backup; 654 if (!MatchAddress(N.Val->getOperand(1), AM, false) && 655 !MatchAddress(N.Val->getOperand(0), AM, false)) 656 return false; 657 AM = Backup; 658 } 659 break; 660 } 661 662 case ISD::OR: { 663 if (!Available) { 664 X86ISelAddressMode Backup = AM; 665 // Look for (x << c1) | c2 where (c2 < c1) 666 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(0)); 667 if (CN && !MatchAddress(N.Val->getOperand(1), AM, false)) { 668 if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) { 669 AM.Disp = CN->getValue(); 670 return false; 671 } 672 } 673 AM = Backup; 674 CN = dyn_cast<ConstantSDNode>(N.Val->getOperand(1)); 675 if (CN && !MatchAddress(N.Val->getOperand(0), AM, false)) { 676 if (AM.GV == NULL && AM.Disp == 0 && CN->getValue() < AM.Scale) { 677 AM.Disp = CN->getValue(); 678 return false; 679 } 680 } 681 AM = Backup; 682 } 683 break; 684 } 685 } 686 687 // Is the base register already occupied? 688 if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { 689 // If so, check to see if the scale index register is set. 690 if (AM.IndexReg.Val == 0) { 691 AM.IndexReg = N; 692 AM.Scale = 1; 693 return false; 694 } 695 696 // Otherwise, we cannot select it. 697 return true; 698 } 699 700 // Default, generate it as a register. 701 AM.BaseType = X86ISelAddressMode::RegBase; 702 AM.Base.Reg = N; 703 return false; 704} 705 706/// SelectAddr - returns true if it is able pattern match an addressing mode. 707/// It returns the operands which make up the maximal addressing mode it can 708/// match by reference. 709bool X86DAGToDAGISel::SelectAddr(SDOperand N, SDOperand &Base, SDOperand &Scale, 710 SDOperand &Index, SDOperand &Disp) { 711 X86ISelAddressMode AM; 712 if (MatchAddress(N, AM)) 713 return false; 714 715 MVT::ValueType VT = N.getValueType(); 716 if (AM.BaseType == X86ISelAddressMode::RegBase) { 717 if (!AM.Base.Reg.Val) 718 AM.Base.Reg = CurDAG->getRegister(0, VT); 719 } 720 721 if (!AM.IndexReg.Val) 722 AM.IndexReg = CurDAG->getRegister(0, VT); 723 724 getAddressOperands(AM, Base, Scale, Index, Disp); 725 return true; 726} 727 728/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing 729/// mode it matches can be cost effectively emitted as an LEA instruction. 730bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base, 731 SDOperand &Scale, 732 SDOperand &Index, SDOperand &Disp) { 733 X86ISelAddressMode AM; 734 if (MatchAddress(N, AM)) 735 return false; 736 737 MVT::ValueType VT = N.getValueType(); 738 unsigned Complexity = 0; 739 if (AM.BaseType == X86ISelAddressMode::RegBase) 740 if (AM.Base.Reg.Val) 741 Complexity = 1; 742 else 743 AM.Base.Reg = CurDAG->getRegister(0, VT); 744 else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) 745 Complexity = 4; 746 747 if (AM.IndexReg.Val) 748 Complexity++; 749 else 750 AM.IndexReg = CurDAG->getRegister(0, VT); 751 752 if (AM.Scale > 2) 753 Complexity += 2; 754 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg 755 else if (AM.Scale > 1) 756 Complexity++; 757 758 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA 759 // to a LEA. This is determined with some expermentation but is by no means 760 // optimal (especially for code size consideration). LEA is nice because of 761 // its three-address nature. Tweak the cost function again when we can run 762 // convertToThreeAddress() at register allocation time. 763 if (AM.GV || AM.CP || AM.ES || AM.JT != -1) { 764 // For X86-64, we should always use lea to materialize RIP relative 765 // addresses. 766 if (Subtarget->is64Bit()) 767 Complexity = 4; 768 else 769 Complexity += 2; 770 } 771 772 if (AM.Disp && (AM.Base.Reg.Val || AM.IndexReg.Val)) 773 Complexity++; 774 775 if (Complexity > 2) { 776 getAddressOperands(AM, Base, Scale, Index, Disp); 777 return true; 778 } 779 return false; 780} 781 782bool X86DAGToDAGISel::TryFoldLoad(SDOperand P, SDOperand N, 783 SDOperand &Base, SDOperand &Scale, 784 SDOperand &Index, SDOperand &Disp) { 785 if (N.getOpcode() == ISD::LOAD && 786 N.hasOneUse() && 787 CanBeFoldedBy(N.Val, P.Val)) 788 return SelectAddr(N.getOperand(1), Base, Scale, Index, Disp); 789 return false; 790} 791 792static bool isRegister0(SDOperand Op) { 793 if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) 794 return (R->getReg() == 0); 795 return false; 796} 797 798/// getGlobalBaseReg - Output the instructions required to put the 799/// base address to use for accessing globals into a register. 800/// 801SDNode *X86DAGToDAGISel::getGlobalBaseReg() { 802 assert(!Subtarget->is64Bit() && "X86-64 PIC uses RIP relative addressing"); 803 if (!GlobalBaseReg) { 804 // Insert the set of GlobalBaseReg into the first MBB of the function 805 MachineBasicBlock &FirstMBB = BB->getParent()->front(); 806 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 807 SSARegMap *RegMap = BB->getParent()->getSSARegMap(); 808 // FIXME: when we get to LP64, we will need to create the appropriate 809 // type of register here. 810 GlobalBaseReg = RegMap->createVirtualRegister(X86::GR32RegisterClass); 811 BuildMI(FirstMBB, MBBI, X86::MovePCtoStack, 0); 812 BuildMI(FirstMBB, MBBI, X86::POP32r, 1, GlobalBaseReg); 813 } 814 return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).Val; 815} 816 817static SDNode *FindCallStartFromCall(SDNode *Node) { 818 if (Node->getOpcode() == ISD::CALLSEQ_START) return Node; 819 assert(Node->getOperand(0).getValueType() == MVT::Other && 820 "Node doesn't have a token chain argument!"); 821 return FindCallStartFromCall(Node->getOperand(0).Val); 822} 823 824SDNode *X86DAGToDAGISel::Select(SDOperand N) { 825 SDNode *Node = N.Val; 826 MVT::ValueType NVT = Node->getValueType(0); 827 unsigned Opc, MOpc; 828 unsigned Opcode = Node->getOpcode(); 829 830#ifndef NDEBUG 831 DEBUG(std::cerr << std::string(Indent, ' ')); 832 DEBUG(std::cerr << "Selecting: "); 833 DEBUG(Node->dump(CurDAG)); 834 DEBUG(std::cerr << "\n"); 835 Indent += 2; 836#endif 837 838 if (Opcode >= ISD::BUILTIN_OP_END && Opcode < X86ISD::FIRST_NUMBER) { 839#ifndef NDEBUG 840 DEBUG(std::cerr << std::string(Indent-2, ' ')); 841 DEBUG(std::cerr << "== "); 842 DEBUG(Node->dump(CurDAG)); 843 DEBUG(std::cerr << "\n"); 844 Indent -= 2; 845#endif 846 return NULL; // Already selected. 847 } 848 849 switch (Opcode) { 850 default: break; 851 case X86ISD::GlobalBaseReg: 852 return getGlobalBaseReg(); 853 854 case ISD::ADD: { 855 // Turn ADD X, c to MOV32ri X+c. This cannot be done with tblgen'd 856 // code and is matched first so to prevent it from being turned into 857 // LEA32r X+c. 858 // In 64-bit mode, use LEA to take advantage of RIP-relative addressing. 859 MVT::ValueType PtrVT = TLI.getPointerTy(); 860 SDOperand N0 = N.getOperand(0); 861 SDOperand N1 = N.getOperand(1); 862 if (N.Val->getValueType(0) == PtrVT && 863 N0.getOpcode() == X86ISD::Wrapper && 864 N1.getOpcode() == ISD::Constant) { 865 unsigned Offset = (unsigned)cast<ConstantSDNode>(N1)->getValue(); 866 SDOperand C(0, 0); 867 // TODO: handle ExternalSymbolSDNode. 868 if (GlobalAddressSDNode *G = 869 dyn_cast<GlobalAddressSDNode>(N0.getOperand(0))) { 870 C = CurDAG->getTargetGlobalAddress(G->getGlobal(), PtrVT, 871 G->getOffset() + Offset); 872 } else if (ConstantPoolSDNode *CP = 873 dyn_cast<ConstantPoolSDNode>(N0.getOperand(0))) { 874 C = CurDAG->getTargetConstantPool(CP->getConstVal(), PtrVT, 875 CP->getAlignment(), 876 CP->getOffset()+Offset); 877 } 878 879 if (C.Val) { 880 if (Subtarget->is64Bit()) { 881 SDOperand Ops[] = { CurDAG->getRegister(0, PtrVT), getI8Imm(1), 882 CurDAG->getRegister(0, PtrVT), C }; 883 return CurDAG->SelectNodeTo(N.Val, X86::LEA64r, MVT::i64, Ops, 4); 884 } else 885 return CurDAG->SelectNodeTo(N.Val, X86::MOV32ri, PtrVT, C); 886 } 887 } 888 889 // Other cases are handled by auto-generated code. 890 break; 891 } 892 893 case ISD::MULHU: 894 case ISD::MULHS: { 895 if (Opcode == ISD::MULHU) 896 switch (NVT) { 897 default: assert(0 && "Unsupported VT!"); 898 case MVT::i8: Opc = X86::MUL8r; MOpc = X86::MUL8m; break; 899 case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break; 900 case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break; 901 case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break; 902 } 903 else 904 switch (NVT) { 905 default: assert(0 && "Unsupported VT!"); 906 case MVT::i8: Opc = X86::IMUL8r; MOpc = X86::IMUL8m; break; 907 case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break; 908 case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break; 909 case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break; 910 } 911 912 unsigned LoReg, HiReg; 913 switch (NVT) { 914 default: assert(0 && "Unsupported VT!"); 915 case MVT::i8: LoReg = X86::AL; HiReg = X86::AH; break; 916 case MVT::i16: LoReg = X86::AX; HiReg = X86::DX; break; 917 case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break; 918 case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break; 919 } 920 921 SDOperand N0 = Node->getOperand(0); 922 SDOperand N1 = Node->getOperand(1); 923 924 bool foldedLoad = false; 925 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 926 foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 927 // MULHU and MULHS are commmutative 928 if (!foldedLoad) { 929 foldedLoad = TryFoldLoad(N, N0, Tmp0, Tmp1, Tmp2, Tmp3); 930 if (foldedLoad) { 931 N0 = Node->getOperand(1); 932 N1 = Node->getOperand(0); 933 } 934 } 935 936 SDOperand Chain; 937 if (foldedLoad) { 938 Chain = N1.getOperand(0); 939 AddToISelQueue(Chain); 940 } else 941 Chain = CurDAG->getEntryNode(); 942 943 SDOperand InFlag(0, 0); 944 AddToISelQueue(N0); 945 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT), 946 N0, InFlag); 947 InFlag = Chain.getValue(1); 948 949 if (foldedLoad) { 950 AddToISelQueue(Tmp0); 951 AddToISelQueue(Tmp1); 952 AddToISelQueue(Tmp2); 953 AddToISelQueue(Tmp3); 954 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag }; 955 SDNode *CNode = 956 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 957 Chain = SDOperand(CNode, 0); 958 InFlag = SDOperand(CNode, 1); 959 } else { 960 AddToISelQueue(N1); 961 InFlag = 962 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 963 } 964 965 SDOperand Result = CurDAG->getCopyFromReg(Chain, HiReg, NVT, InFlag); 966 ReplaceUses(N.getValue(0), Result); 967 if (foldedLoad) 968 ReplaceUses(N1.getValue(1), Result.getValue(1)); 969 970#ifndef NDEBUG 971 DEBUG(std::cerr << std::string(Indent-2, ' ')); 972 DEBUG(std::cerr << "=> "); 973 DEBUG(Result.Val->dump(CurDAG)); 974 DEBUG(std::cerr << "\n"); 975 Indent -= 2; 976#endif 977 return NULL; 978 } 979 980 case ISD::SDIV: 981 case ISD::UDIV: 982 case ISD::SREM: 983 case ISD::UREM: { 984 bool isSigned = Opcode == ISD::SDIV || Opcode == ISD::SREM; 985 bool isDiv = Opcode == ISD::SDIV || Opcode == ISD::UDIV; 986 if (!isSigned) 987 switch (NVT) { 988 default: assert(0 && "Unsupported VT!"); 989 case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break; 990 case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break; 991 case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break; 992 case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break; 993 } 994 else 995 switch (NVT) { 996 default: assert(0 && "Unsupported VT!"); 997 case MVT::i8: Opc = X86::IDIV8r; MOpc = X86::IDIV8m; break; 998 case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break; 999 case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break; 1000 case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break; 1001 } 1002 1003 unsigned LoReg, HiReg; 1004 unsigned ClrOpcode, SExtOpcode; 1005 switch (NVT) { 1006 default: assert(0 && "Unsupported VT!"); 1007 case MVT::i8: 1008 LoReg = X86::AL; HiReg = X86::AH; 1009 ClrOpcode = X86::MOV8r0; 1010 SExtOpcode = X86::CBW; 1011 break; 1012 case MVT::i16: 1013 LoReg = X86::AX; HiReg = X86::DX; 1014 ClrOpcode = X86::MOV16r0; 1015 SExtOpcode = X86::CWD; 1016 break; 1017 case MVT::i32: 1018 LoReg = X86::EAX; HiReg = X86::EDX; 1019 ClrOpcode = X86::MOV32r0; 1020 SExtOpcode = X86::CDQ; 1021 break; 1022 case MVT::i64: 1023 LoReg = X86::RAX; HiReg = X86::RDX; 1024 ClrOpcode = X86::MOV64r0; 1025 SExtOpcode = X86::CQO; 1026 break; 1027 } 1028 1029 SDOperand N0 = Node->getOperand(0); 1030 SDOperand N1 = Node->getOperand(1); 1031 1032 bool foldedLoad = false; 1033 SDOperand Tmp0, Tmp1, Tmp2, Tmp3; 1034 foldedLoad = TryFoldLoad(N, N1, Tmp0, Tmp1, Tmp2, Tmp3); 1035 SDOperand Chain; 1036 if (foldedLoad) { 1037 Chain = N1.getOperand(0); 1038 AddToISelQueue(Chain); 1039 } else 1040 Chain = CurDAG->getEntryNode(); 1041 1042 SDOperand InFlag(0, 0); 1043 AddToISelQueue(N0); 1044 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(LoReg, NVT), 1045 N0, InFlag); 1046 InFlag = Chain.getValue(1); 1047 1048 if (isSigned) { 1049 // Sign extend the low part into the high part. 1050 InFlag = 1051 SDOperand(CurDAG->getTargetNode(SExtOpcode, MVT::Flag, InFlag), 0); 1052 } else { 1053 // Zero out the high part, effectively zero extending the input. 1054 SDOperand ClrNode = SDOperand(CurDAG->getTargetNode(ClrOpcode, NVT), 0); 1055 Chain = CurDAG->getCopyToReg(Chain, CurDAG->getRegister(HiReg, NVT), 1056 ClrNode, InFlag); 1057 InFlag = Chain.getValue(1); 1058 } 1059 1060 if (foldedLoad) { 1061 AddToISelQueue(Tmp0); 1062 AddToISelQueue(Tmp1); 1063 AddToISelQueue(Tmp2); 1064 AddToISelQueue(Tmp3); 1065 SDOperand Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Chain, InFlag }; 1066 SDNode *CNode = 1067 CurDAG->getTargetNode(MOpc, MVT::Other, MVT::Flag, Ops, 6); 1068 Chain = SDOperand(CNode, 0); 1069 InFlag = SDOperand(CNode, 1); 1070 } else { 1071 AddToISelQueue(N1); 1072 InFlag = 1073 SDOperand(CurDAG->getTargetNode(Opc, MVT::Flag, N1, InFlag), 0); 1074 } 1075 1076 SDOperand Result = CurDAG->getCopyFromReg(Chain, isDiv ? LoReg : HiReg, 1077 NVT, InFlag); 1078 ReplaceUses(N.getValue(0), Result); 1079 if (foldedLoad) 1080 ReplaceUses(N1.getValue(1), Result.getValue(1)); 1081 1082#ifndef NDEBUG 1083 DEBUG(std::cerr << std::string(Indent-2, ' ')); 1084 DEBUG(std::cerr << "=> "); 1085 DEBUG(Result.Val->dump(CurDAG)); 1086 DEBUG(std::cerr << "\n"); 1087 Indent -= 2; 1088#endif 1089 1090 return NULL; 1091 } 1092 1093 case ISD::TRUNCATE: { 1094 if (!Subtarget->is64Bit() && NVT == MVT::i8) { 1095 unsigned Opc2; 1096 MVT::ValueType VT; 1097 switch (Node->getOperand(0).getValueType()) { 1098 default: assert(0 && "Unknown truncate!"); 1099 case MVT::i16: 1100 Opc = X86::MOV16to16_; 1101 VT = MVT::i16; 1102 Opc2 = X86::TRUNC_16_to8; 1103 break; 1104 case MVT::i32: 1105 Opc = X86::MOV32to32_; 1106 VT = MVT::i32; 1107 Opc2 = X86::TRUNC_32_to8; 1108 break; 1109 } 1110 1111 AddToISelQueue(Node->getOperand(0)); 1112 SDOperand Tmp = 1113 SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0); 1114 SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp); 1115 1116#ifndef NDEBUG 1117 DEBUG(std::cerr << std::string(Indent-2, ' ')); 1118 DEBUG(std::cerr << "=> "); 1119 DEBUG(ResNode->dump(CurDAG)); 1120 DEBUG(std::cerr << "\n"); 1121 Indent -= 2; 1122#endif 1123 return ResNode; 1124 } 1125 1126 break; 1127 } 1128 } 1129 1130 SDNode *ResNode = SelectCode(N); 1131 1132#ifndef NDEBUG 1133 DEBUG(std::cerr << std::string(Indent-2, ' ')); 1134 DEBUG(std::cerr << "=> "); 1135 if (ResNode == NULL || ResNode == N.Val) 1136 DEBUG(N.Val->dump(CurDAG)); 1137 else 1138 DEBUG(ResNode->dump(CurDAG)); 1139 DEBUG(std::cerr << "\n"); 1140 Indent -= 2; 1141#endif 1142 1143 return ResNode; 1144} 1145 1146bool X86DAGToDAGISel:: 1147SelectInlineAsmMemoryOperand(const SDOperand &Op, char ConstraintCode, 1148 std::vector<SDOperand> &OutOps, SelectionDAG &DAG){ 1149 SDOperand Op0, Op1, Op2, Op3; 1150 switch (ConstraintCode) { 1151 case 'o': // offsetable ?? 1152 case 'v': // not offsetable ?? 1153 default: return true; 1154 case 'm': // memory 1155 if (!SelectAddr(Op, Op0, Op1, Op2, Op3)) 1156 return true; 1157 break; 1158 } 1159 1160 OutOps.push_back(Op0); 1161 OutOps.push_back(Op1); 1162 OutOps.push_back(Op2); 1163 OutOps.push_back(Op3); 1164 AddToISelQueue(Op0); 1165 AddToISelQueue(Op1); 1166 AddToISelQueue(Op2); 1167 AddToISelQueue(Op3); 1168 return false; 1169} 1170 1171/// createX86ISelDag - This pass converts a legalized DAG into a 1172/// X86-specific DAG, ready for instruction scheduling. 1173/// 1174FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, bool Fast) { 1175 return new X86DAGToDAGISel(TM, Fast); 1176} 1177