1//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief Custom DAG lowering for SI 12// 13//===----------------------------------------------------------------------===// 14 15#include "SIISelLowering.h" 16#include "AMDIL.h" 17#include "AMDGPU.h" 18#include "AMDILIntrinsicInfo.h" 19#include "SIInstrInfo.h" 20#include "SIMachineFunctionInfo.h" 21#include "SIRegisterInfo.h" 22#include "llvm/IR/Function.h" 23#include "llvm/CodeGen/CallingConvLower.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27 28using namespace llvm; 29 30SITargetLowering::SITargetLowering(TargetMachine &TM) : 31 AMDGPUTargetLowering(TM), 32 TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())), 33 TRI(TM.getRegisterInfo()) { 34 35 addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); 36 addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); 37 38 addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass); 39 addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); 40 addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); 41 42 addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); 43 addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); 44 45 addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass); 46 47 addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass); 48 addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); 49 50 addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); 51 addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); 52 53 addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); 54 addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); 55 56 addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); 57 addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); 58 59 computeRegisterProperties(); 60 61 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); 62 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); 63 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); 64 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); 65 66 setOperationAction(ISD::ADD, MVT::i64, Legal); 67 setOperationAction(ISD::ADD, MVT::i32, Legal); 68 69 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 70 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 71 72 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 73 setTargetDAGCombine(ISD::SELECT_CC); 74 75 setTargetDAGCombine(ISD::SETCC); 76 77 setSchedulingPreference(Sched::Source); 78} 79 80SDValue SITargetLowering::LowerFormalArguments( 81 SDValue Chain, 82 CallingConv::ID CallConv, 83 bool isVarArg, 84 const SmallVectorImpl<ISD::InputArg> &Ins, 85 DebugLoc DL, SelectionDAG &DAG, 86 SmallVectorImpl<SDValue> &InVals) const { 87 88 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 89 90 MachineFunction &MF = DAG.getMachineFunction(); 91 FunctionType *FType = MF.getFunction()->getFunctionType(); 92 SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 93 94 assert(CallConv == CallingConv::C); 95 96 SmallVector<ISD::InputArg, 16> Splits; 97 uint32_t Skipped = 0; 98 99 for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) { 100 const ISD::InputArg &Arg = Ins[i]; 101 102 // First check if it's a PS input addr 103 if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) { 104 105 assert((PSInputNum <= 15) && "Too many PS inputs!"); 106 107 if (!Arg.Used) { 108 // We can savely skip PS inputs 109 Skipped |= 1 << i; 110 ++PSInputNum; 111 continue; 112 } 113 114 Info->PSInputAddr |= 1 << PSInputNum++; 115 } 116 117 // Second split vertices into their elements 118 if (Arg.VT.isVector()) { 119 ISD::InputArg NewArg = Arg; 120 NewArg.Flags.setSplit(); 121 NewArg.VT = Arg.VT.getVectorElementType(); 122 123 // We REALLY want the ORIGINAL number of vertex elements here, e.g. a 124 // three or five element vertex only needs three or five registers, 125 // NOT four or eigth. 126 Type *ParamType = FType->getParamType(Arg.OrigArgIndex); 127 unsigned NumElements = ParamType->getVectorNumElements(); 128 129 for (unsigned j = 0; j != NumElements; ++j) { 130 Splits.push_back(NewArg); 131 NewArg.PartOffset += NewArg.VT.getStoreSize(); 132 } 133 134 } else { 135 Splits.push_back(Arg); 136 } 137 } 138 139 SmallVector<CCValAssign, 16> ArgLocs; 140 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 141 getTargetMachine(), ArgLocs, *DAG.getContext()); 142 143 // At least one interpolation mode must be enabled or else the GPU will hang. 144 if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) { 145 Info->PSInputAddr |= 1; 146 CCInfo.AllocateReg(AMDGPU::VGPR0); 147 CCInfo.AllocateReg(AMDGPU::VGPR1); 148 } 149 150 AnalyzeFormalArguments(CCInfo, Splits); 151 152 for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { 153 154 if (Skipped & (1 << i)) { 155 InVals.push_back(SDValue()); 156 continue; 157 } 158 159 CCValAssign &VA = ArgLocs[ArgIdx++]; 160 assert(VA.isRegLoc() && "Parameter must be in a register!"); 161 162 unsigned Reg = VA.getLocReg(); 163 MVT VT = VA.getLocVT(); 164 165 if (VT == MVT::i64) { 166 // For now assume it is a pointer 167 Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, 168 &AMDGPU::SReg_64RegClass); 169 Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass); 170 InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); 171 continue; 172 } 173 174 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); 175 176 Reg = MF.addLiveIn(Reg, RC); 177 SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); 178 179 const ISD::InputArg &Arg = Ins[i]; 180 if (Arg.VT.isVector()) { 181 182 // Build a vector from the registers 183 Type *ParamType = FType->getParamType(Arg.OrigArgIndex); 184 unsigned NumElements = ParamType->getVectorNumElements(); 185 186 SmallVector<SDValue, 4> Regs; 187 Regs.push_back(Val); 188 for (unsigned j = 1; j != NumElements; ++j) { 189 Reg = ArgLocs[ArgIdx++].getLocReg(); 190 Reg = MF.addLiveIn(Reg, RC); 191 Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); 192 } 193 194 // Fill up the missing vector elements 195 NumElements = Arg.VT.getVectorNumElements() - NumElements; 196 for (unsigned j = 0; j != NumElements; ++j) 197 Regs.push_back(DAG.getUNDEF(VT)); 198 199 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, 200 Regs.data(), Regs.size())); 201 continue; 202 } 203 204 InVals.push_back(Val); 205 } 206 return Chain; 207} 208 209MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( 210 MachineInstr * MI, MachineBasicBlock * BB) const { 211 MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); 212 MachineBasicBlock::iterator I = MI; 213 214 switch (MI->getOpcode()) { 215 default: 216 return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 217 case AMDGPU::BRANCH: return BB; 218 case AMDGPU::SI_WQM: 219 LowerSI_WQM(MI, *BB, I, MRI); 220 break; 221 } 222 return BB; 223} 224 225void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, 226 MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { 227 BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) 228 .addReg(AMDGPU::EXEC); 229 230 MI->eraseFromParent(); 231} 232 233EVT SITargetLowering::getSetCCResultType(EVT VT) const { 234 return MVT::i1; 235} 236 237MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { 238 return MVT::i32; 239} 240 241//===----------------------------------------------------------------------===// 242// Custom DAG Lowering Operations 243//===----------------------------------------------------------------------===// 244 245SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 246 switch (Op.getOpcode()) { 247 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 248 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 249 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 250 } 251 return SDValue(); 252} 253 254/// \brief Helper function for LowerBRCOND 255static SDNode *findUser(SDValue Value, unsigned Opcode) { 256 257 SDNode *Parent = Value.getNode(); 258 for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end(); 259 I != E; ++I) { 260 261 if (I.getUse().get() != Value) 262 continue; 263 264 if (I->getOpcode() == Opcode) 265 return *I; 266 } 267 return 0; 268} 269 270/// This transforms the control flow intrinsics to get the branch destination as 271/// last parameter, also switches branch target with BR if the need arise 272SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, 273 SelectionDAG &DAG) const { 274 275 DebugLoc DL = BRCOND.getDebugLoc(); 276 277 SDNode *Intr = BRCOND.getOperand(1).getNode(); 278 SDValue Target = BRCOND.getOperand(2); 279 SDNode *BR = 0; 280 281 if (Intr->getOpcode() == ISD::SETCC) { 282 // As long as we negate the condition everything is fine 283 SDNode *SetCC = Intr; 284 assert(SetCC->getConstantOperandVal(1) == 1); 285 assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() == 286 ISD::SETNE); 287 Intr = SetCC->getOperand(0).getNode(); 288 289 } else { 290 // Get the target from BR if we don't negate the condition 291 BR = findUser(BRCOND, ISD::BR); 292 Target = BR->getOperand(1); 293 } 294 295 assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); 296 297 // Build the result and 298 SmallVector<EVT, 4> Res; 299 for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i) 300 Res.push_back(Intr->getValueType(i)); 301 302 // operands of the new intrinsic call 303 SmallVector<SDValue, 4> Ops; 304 Ops.push_back(BRCOND.getOperand(0)); 305 for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i) 306 Ops.push_back(Intr->getOperand(i)); 307 Ops.push_back(Target); 308 309 // build the new intrinsic call 310 SDNode *Result = DAG.getNode( 311 Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, 312 DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode(); 313 314 if (BR) { 315 // Give the branch instruction our target 316 SDValue Ops[] = { 317 BR->getOperand(0), 318 BRCOND.getOperand(2) 319 }; 320 DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2); 321 } 322 323 SDValue Chain = SDValue(Result, Result->getNumValues() - 1); 324 325 // Copy the intrinsic results to registers 326 for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) { 327 SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg); 328 if (!CopyToReg) 329 continue; 330 331 Chain = DAG.getCopyToReg( 332 Chain, DL, 333 CopyToReg->getOperand(1), 334 SDValue(Result, i - 1), 335 SDValue()); 336 337 DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0)); 338 } 339 340 // Remove the old intrinsic from the chain 341 DAG.ReplaceAllUsesOfValueWith( 342 SDValue(Intr, Intr->getNumValues() - 1), 343 Intr->getOperand(0)); 344 345 return Chain; 346} 347 348SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 349 SDValue LHS = Op.getOperand(0); 350 SDValue RHS = Op.getOperand(1); 351 SDValue True = Op.getOperand(2); 352 SDValue False = Op.getOperand(3); 353 SDValue CC = Op.getOperand(4); 354 EVT VT = Op.getValueType(); 355 DebugLoc DL = Op.getDebugLoc(); 356 357 // Possible Min/Max pattern 358 SDValue MinMax = LowerMinMax(Op, DAG); 359 if (MinMax.getNode()) { 360 return MinMax; 361 } 362 363 SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); 364 return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 365} 366 367//===----------------------------------------------------------------------===// 368// Custom DAG optimizations 369//===----------------------------------------------------------------------===// 370 371SDValue SITargetLowering::PerformDAGCombine(SDNode *N, 372 DAGCombinerInfo &DCI) const { 373 SelectionDAG &DAG = DCI.DAG; 374 DebugLoc DL = N->getDebugLoc(); 375 EVT VT = N->getValueType(0); 376 377 switch (N->getOpcode()) { 378 default: break; 379 case ISD::SELECT_CC: { 380 N->dump(); 381 ConstantSDNode *True, *False; 382 // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) 383 if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) 384 && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) 385 && True->isAllOnesValue() 386 && False->isNullValue() 387 && VT == MVT::i1) { 388 return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), 389 N->getOperand(1), N->getOperand(4)); 390 391 } 392 break; 393 } 394 case ISD::SETCC: { 395 SDValue Arg0 = N->getOperand(0); 396 SDValue Arg1 = N->getOperand(1); 397 SDValue CC = N->getOperand(2); 398 ConstantSDNode * C = NULL; 399 ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); 400 401 // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) 402 if (VT == MVT::i1 403 && Arg0.getOpcode() == ISD::SIGN_EXTEND 404 && Arg0.getOperand(0).getValueType() == MVT::i1 405 && (C = dyn_cast<ConstantSDNode>(Arg1)) 406 && C->isNullValue() 407 && CCOp == ISD::SETNE) { 408 return SimplifySetCC(VT, Arg0.getOperand(0), 409 DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); 410 } 411 break; 412 } 413 } 414 return SDValue(); 415} 416 417/// \brief Test if RegClass is one of the VSrc classes 418static bool isVSrc(unsigned RegClass) { 419 return AMDGPU::VSrc_32RegClassID == RegClass || 420 AMDGPU::VSrc_64RegClassID == RegClass; 421} 422 423/// \brief Test if RegClass is one of the SSrc classes 424static bool isSSrc(unsigned RegClass) { 425 return AMDGPU::SSrc_32RegClassID == RegClass || 426 AMDGPU::SSrc_64RegClassID == RegClass; 427} 428 429/// \brief Analyze the possible immediate value Op 430/// 431/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate 432/// and the immediate value if it's a literal immediate 433int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { 434 435 union { 436 int32_t I; 437 float F; 438 } Imm; 439 440 if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) 441 Imm.I = Node->getSExtValue(); 442 else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) 443 Imm.F = Node->getValueAPF().convertToFloat(); 444 else 445 return -1; // It isn't an immediate 446 447 if ((Imm.I >= -16 && Imm.I <= 64) || 448 Imm.F == 0.5f || Imm.F == -0.5f || 449 Imm.F == 1.0f || Imm.F == -1.0f || 450 Imm.F == 2.0f || Imm.F == -2.0f || 451 Imm.F == 4.0f || Imm.F == -4.0f) 452 return 0; // It's an inline immediate 453 454 return Imm.I; // It's a literal immediate 455} 456 457/// \brief Try to fold an immediate directly into an instruction 458bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, 459 bool &ScalarSlotUsed) const { 460 461 MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand); 462 if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode())) 463 return false; 464 465 const SDValue &Op = Mov->getOperand(0); 466 int32_t Value = analyzeImmediate(Op.getNode()); 467 if (Value == -1) { 468 // Not an immediate at all 469 return false; 470 471 } else if (Value == 0) { 472 // Inline immediates can always be fold 473 Operand = Op; 474 return true; 475 476 } else if (Value == Immediate) { 477 // Already fold literal immediate 478 Operand = Op; 479 return true; 480 481 } else if (!ScalarSlotUsed && !Immediate) { 482 // Fold this literal immediate 483 ScalarSlotUsed = true; 484 Immediate = Value; 485 Operand = Op; 486 return true; 487 488 } 489 490 return false; 491} 492 493/// \brief Does "Op" fit into register class "RegClass" ? 494bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op, 495 unsigned RegClass) const { 496 497 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 498 SDNode *Node = Op.getNode(); 499 500 int OpClass; 501 if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) { 502 const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode()); 503 OpClass = Desc.OpInfo[Op.getResNo()].RegClass; 504 505 } else if (Node->getOpcode() == ISD::CopyFromReg) { 506 RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode()); 507 OpClass = MRI.getRegClass(Reg->getReg())->getID(); 508 509 } else 510 return false; 511 512 if (OpClass == -1) 513 return false; 514 515 return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass)); 516} 517 518/// \brief Make sure that we don't exeed the number of allowed scalars 519void SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, 520 unsigned RegClass, 521 bool &ScalarSlotUsed) const { 522 523 // First map the operands register class to a destination class 524 if (RegClass == AMDGPU::VSrc_32RegClassID) 525 RegClass = AMDGPU::VReg_32RegClassID; 526 else if (RegClass == AMDGPU::VSrc_64RegClassID) 527 RegClass = AMDGPU::VReg_64RegClassID; 528 else 529 return; 530 531 // Nothing todo if they fit naturaly 532 if (fitsRegClass(DAG, Operand, RegClass)) 533 return; 534 535 // If the scalar slot isn't used yet use it now 536 if (!ScalarSlotUsed) { 537 ScalarSlotUsed = true; 538 return; 539 } 540 541 // This is a conservative aproach, it is possible that we can't determine 542 // the correct register class and copy too often, but better save than sorry. 543 SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32); 544 SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(), 545 Operand.getValueType(), Operand, RC); 546 Operand = SDValue(Node, 0); 547} 548 549SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, 550 SelectionDAG &DAG) const { 551 552 // Original encoding (either e32 or e64) 553 int Opcode = Node->getMachineOpcode(); 554 const MCInstrDesc *Desc = &TII->get(Opcode); 555 556 unsigned NumDefs = Desc->getNumDefs(); 557 unsigned NumOps = Desc->getNumOperands(); 558 559 // e64 version if available, -1 otherwise 560 int OpcodeE64 = AMDGPU::getVOPe64(Opcode); 561 const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); 562 563 assert(!DescE64 || DescE64->getNumDefs() == NumDefs); 564 assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4)); 565 566 int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; 567 bool HaveVSrc = false, HaveSSrc = false; 568 569 // First figure out what we alread have in this instruction 570 for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; 571 i != e && Op < NumOps; ++i, ++Op) { 572 573 unsigned RegClass = Desc->OpInfo[Op].RegClass; 574 if (isVSrc(RegClass)) 575 HaveVSrc = true; 576 else if (isSSrc(RegClass)) 577 HaveSSrc = true; 578 else 579 continue; 580 581 int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode()); 582 if (Imm != -1 && Imm != 0) { 583 // Literal immediate 584 Immediate = Imm; 585 } 586 } 587 588 // If we neither have VSrc nor SSrc it makes no sense to continue 589 if (!HaveVSrc && !HaveSSrc) 590 return Node; 591 592 // No scalar allowed when we have both VSrc and SSrc 593 bool ScalarSlotUsed = HaveVSrc && HaveSSrc; 594 595 // Second go over the operands and try to fold them 596 std::vector<SDValue> Ops; 597 bool Promote2e64 = false; 598 for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; 599 i != e && Op < NumOps; ++i, ++Op) { 600 601 const SDValue &Operand = Node->getOperand(i); 602 Ops.push_back(Operand); 603 604 // Already folded immediate ? 605 if (isa<ConstantSDNode>(Operand.getNode()) || 606 isa<ConstantFPSDNode>(Operand.getNode())) 607 continue; 608 609 // Is this a VSrc or SSrc operand ? 610 unsigned RegClass = Desc->OpInfo[Op].RegClass; 611 if (!isVSrc(RegClass) && !isSSrc(RegClass)) { 612 613 if (i == 1 && Desc->isCommutable() && 614 fitsRegClass(DAG, Ops[0], RegClass) && 615 foldImm(Ops[1], Immediate, ScalarSlotUsed)) { 616 617 assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) || 618 isSSrc(Desc->OpInfo[NumDefs].RegClass)); 619 620 // Swap commutable operands 621 SDValue Tmp = Ops[1]; 622 Ops[1] = Ops[0]; 623 Ops[0] = Tmp; 624 625 } else if (DescE64 && !Immediate) { 626 // Test if it makes sense to switch to e64 encoding 627 628 RegClass = DescE64->OpInfo[Op].RegClass; 629 int32_t TmpImm = -1; 630 if ((isVSrc(RegClass) || isSSrc(RegClass)) && 631 foldImm(Ops[i], TmpImm, ScalarSlotUsed)) { 632 633 Immediate = -1; 634 Promote2e64 = true; 635 Desc = DescE64; 636 DescE64 = 0; 637 } 638 } 639 continue; 640 } 641 642 // Try to fold the immediates 643 if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { 644 // Folding didn't worked, make sure we don't hit the SReg limit 645 ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); 646 } 647 } 648 649 if (Promote2e64) { 650 // Add the modifier flags while promoting 651 for (unsigned i = 0; i < 4; ++i) 652 Ops.push_back(DAG.getTargetConstant(0, MVT::i32)); 653 } 654 655 // Add optional chain and glue 656 for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) 657 Ops.push_back(Node->getOperand(i)); 658 659 // Either create a complete new or update the current instruction 660 if (Promote2e64) 661 return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), 662 Node->getVTList(), Ops.data(), Ops.size()); 663 else 664 return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); 665} 666