R600ISelLowering.cpp revision a499d2bcef0c1001c60d752d356e50eed2402ca8
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief Custom DAG lowering for R600 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600Defines.h" 17#include "R600InstrInfo.h" 18#include "R600MachineFunctionInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22#include "llvm/IR/Argument.h" 23#include "llvm/IR/Function.h" 24 25using namespace llvm; 26 27R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 28 AMDGPUTargetLowering(TM), 29 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) { 30 setOperationAction(ISD::MUL, MVT::i64, Expand); 31 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 35 computeRegisterProperties(); 36 37 setOperationAction(ISD::FADD, MVT::v4f32, Expand); 38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand); 39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand); 41 42 setOperationAction(ISD::ADD, MVT::v4i32, Expand); 43 setOperationAction(ISD::AND, MVT::v4i32, Expand); 44 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); 45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); 46 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); 47 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); 48 setOperationAction(ISD::UDIV, MVT::v4i32, Expand); 49 setOperationAction(ISD::UREM, MVT::v4i32, Expand); 50 setOperationAction(ISD::SETCC, MVT::v4i32, Expand); 51 52 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 53 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 54 55 setOperationAction(ISD::FSUB, MVT::f32, Expand); 56 57 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 59 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); 60 setOperationAction(ISD::FPOW, MVT::f32, Custom); 61 62 setOperationAction(ISD::ROTL, MVT::i32, Custom); 63 64 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 65 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 66 67 setOperationAction(ISD::SETCC, MVT::i32, Custom); 68 setOperationAction(ISD::SETCC, MVT::f32, Custom); 69 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); 70 71 setOperationAction(ISD::SELECT, MVT::i32, Custom); 72 setOperationAction(ISD::SELECT, MVT::f32, Custom); 73 74 setOperationAction(ISD::STORE, MVT::i32, Custom); 75 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 76 77 setOperationAction(ISD::LOAD, MVT::i32, Custom); 78 setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 79 setTargetDAGCombine(ISD::FP_ROUND); 80 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 81 82 setSchedulingPreference(Sched::VLIW); 83} 84 85MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 86 MachineInstr * MI, MachineBasicBlock * BB) const { 87 MachineFunction * MF = BB->getParent(); 88 MachineRegisterInfo &MRI = MF->getRegInfo(); 89 MachineBasicBlock::iterator I = *MI; 90 91 switch (MI->getOpcode()) { 92 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 93 case AMDGPU::SHADER_TYPE: break; 94 case AMDGPU::CLAMP_R600: { 95 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 96 AMDGPU::MOV, 97 MI->getOperand(0).getReg(), 98 MI->getOperand(1).getReg()); 99 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); 100 break; 101 } 102 103 case AMDGPU::FABS_R600: { 104 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 105 AMDGPU::MOV, 106 MI->getOperand(0).getReg(), 107 MI->getOperand(1).getReg()); 108 TII->addFlag(NewMI, 0, MO_FLAG_ABS); 109 break; 110 } 111 112 case AMDGPU::FNEG_R600: { 113 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 114 AMDGPU::MOV, 115 MI->getOperand(0).getReg(), 116 MI->getOperand(1).getReg()); 117 TII->addFlag(NewMI, 0, MO_FLAG_NEG); 118 break; 119 } 120 121 case AMDGPU::MASK_WRITE: { 122 unsigned maskedRegister = MI->getOperand(0).getReg(); 123 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 124 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 125 TII->addFlag(defInstr, 0, MO_FLAG_MASK); 126 break; 127 } 128 129 case AMDGPU::MOV_IMM_F32: 130 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 131 MI->getOperand(1).getFPImm()->getValueAPF() 132 .bitcastToAPInt().getZExtValue()); 133 break; 134 case AMDGPU::MOV_IMM_I32: 135 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 136 MI->getOperand(1).getImm()); 137 break; 138 139 140 case AMDGPU::RAT_WRITE_CACHELESS_32_eg: 141 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { 142 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; 143 144 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 145 .addOperand(MI->getOperand(0)) 146 .addOperand(MI->getOperand(1)) 147 .addImm(EOP); // Set End of program bit 148 break; 149 } 150 151 case AMDGPU::TXD: { 152 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 153 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 154 155 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 156 .addOperand(MI->getOperand(3)) 157 .addOperand(MI->getOperand(4)) 158 .addOperand(MI->getOperand(5)) 159 .addOperand(MI->getOperand(6)); 160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 161 .addOperand(MI->getOperand(2)) 162 .addOperand(MI->getOperand(4)) 163 .addOperand(MI->getOperand(5)) 164 .addOperand(MI->getOperand(6)); 165 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 166 .addOperand(MI->getOperand(0)) 167 .addOperand(MI->getOperand(1)) 168 .addOperand(MI->getOperand(4)) 169 .addOperand(MI->getOperand(5)) 170 .addOperand(MI->getOperand(6)) 171 .addReg(T0, RegState::Implicit) 172 .addReg(T1, RegState::Implicit); 173 break; 174 } 175 176 case AMDGPU::TXD_SHADOW: { 177 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 178 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 179 180 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 181 .addOperand(MI->getOperand(3)) 182 .addOperand(MI->getOperand(4)) 183 .addOperand(MI->getOperand(5)) 184 .addOperand(MI->getOperand(6)); 185 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 186 .addOperand(MI->getOperand(2)) 187 .addOperand(MI->getOperand(4)) 188 .addOperand(MI->getOperand(5)) 189 .addOperand(MI->getOperand(6)); 190 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 191 .addOperand(MI->getOperand(0)) 192 .addOperand(MI->getOperand(1)) 193 .addOperand(MI->getOperand(4)) 194 .addOperand(MI->getOperand(5)) 195 .addOperand(MI->getOperand(6)) 196 .addReg(T0, RegState::Implicit) 197 .addReg(T1, RegState::Implicit); 198 break; 199 } 200 201 case AMDGPU::BRANCH: 202 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 203 .addOperand(MI->getOperand(0)) 204 .addReg(0); 205 break; 206 207 case AMDGPU::BRANCH_COND_f32: { 208 MachineInstr *NewMI = 209 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 210 AMDGPU::PREDICATE_BIT) 211 .addOperand(MI->getOperand(1)) 212 .addImm(OPCODE_IS_NOT_ZERO) 213 .addImm(0); // Flags 214 TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 215 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 216 .addOperand(MI->getOperand(0)) 217 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 218 break; 219 } 220 221 case AMDGPU::BRANCH_COND_i32: { 222 MachineInstr *NewMI = 223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 224 AMDGPU::PREDICATE_BIT) 225 .addOperand(MI->getOperand(1)) 226 .addImm(OPCODE_IS_NOT_ZERO_INT) 227 .addImm(0); // Flags 228 TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 229 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 230 .addOperand(MI->getOperand(0)) 231 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 232 break; 233 } 234 235 case AMDGPU::EG_ExportSwz: 236 case AMDGPU::R600_ExportSwz: { 237 // Instruction is left unmodified if its not the last one of its type 238 bool isLastInstructionOfItsType = true; 239 unsigned InstExportType = MI->getOperand(1).getImm(); 240 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), 241 EndBlock = BB->end(); NextExportInst != EndBlock; 242 NextExportInst = llvm::next(NextExportInst)) { 243 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || 244 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { 245 unsigned CurrentInstExportType = NextExportInst->getOperand(1) 246 .getImm(); 247 if (CurrentInstExportType == InstExportType) { 248 isLastInstructionOfItsType = false; 249 break; 250 } 251 } 252 } 253 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; 254 if (!EOP && !isLastInstructionOfItsType) 255 return BB; 256 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; 257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 258 .addOperand(MI->getOperand(0)) 259 .addOperand(MI->getOperand(1)) 260 .addOperand(MI->getOperand(2)) 261 .addOperand(MI->getOperand(3)) 262 .addOperand(MI->getOperand(4)) 263 .addOperand(MI->getOperand(5)) 264 .addOperand(MI->getOperand(6)) 265 .addImm(CfInst) 266 .addImm(EOP); 267 break; 268 } 269 case AMDGPU::RETURN: { 270 // RETURN instructions must have the live-out registers as implicit uses, 271 // otherwise they appear dead. 272 R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>(); 273 MachineInstrBuilder MIB(*MF, MI); 274 for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i) 275 MIB.addReg(MFI->LiveOuts[i], RegState::Implicit); 276 return BB; 277 } 278 } 279 280 MI->eraseFromParent(); 281 return BB; 282} 283 284//===----------------------------------------------------------------------===// 285// Custom DAG Lowering Operations 286//===----------------------------------------------------------------------===// 287 288using namespace llvm::Intrinsic; 289using namespace llvm::AMDGPUIntrinsic; 290 291static SDValue 292InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, 293 unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, 294 SDValue Scalar, SDValue Chain) { 295 if (!ExportMap[Slot]) { 296 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, 297 DL, MVT::v4f32, 298 DAG.getUNDEF(MVT::v4f32), 299 Scalar, 300 DAG.getConstant(Channel, MVT::i32)); 301 302 unsigned Mask = 1 << Channel; 303 304 const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), 305 DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), 306 DAG.getConstant(Mask, MVT::i32)}; 307 308 SDValue Res = DAG.getNode( 309 AMDGPUISD::EXPORT, 310 DL, 311 MVT::Other, 312 Ops, 6); 313 ExportMap[Slot] = Res.getNode(); 314 return Res; 315 } 316 317 SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; 318 SDValue PreviousVector = ExportInstruction->getOperand(1); 319 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, 320 DL, MVT::v4f32, 321 PreviousVector, 322 Scalar, 323 DAG.getConstant(Channel, MVT::i32)); 324 325 unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5)) 326 ->getZExtValue(); 327 Mask |= (1 << Channel); 328 329 const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, 330 DAG.getConstant(Inst, MVT::i32), 331 DAG.getConstant(Type, MVT::i32), 332 DAG.getConstant(Slot, MVT::i32), 333 DAG.getConstant(Mask, MVT::i32)}; 334 335 DAG.UpdateNodeOperands(ExportInstruction, 336 Ops, 6); 337 338 return Chain; 339 340} 341 342SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 343 switch (Op.getOpcode()) { 344 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 345 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 346 case ISD::ROTL: return LowerROTL(Op, DAG); 347 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 348 case ISD::SELECT: return LowerSELECT(Op, DAG); 349 case ISD::SETCC: return LowerSETCC(Op, DAG); 350 case ISD::STORE: return LowerSTORE(Op, DAG); 351 case ISD::LOAD: return LowerLOAD(Op, DAG); 352 case ISD::FPOW: return LowerFPOW(Op, DAG); 353 case ISD::INTRINSIC_VOID: { 354 SDValue Chain = Op.getOperand(0); 355 unsigned IntrinsicID = 356 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 357 switch (IntrinsicID) { 358 case AMDGPUIntrinsic::AMDGPU_store_output: { 359 MachineFunction &MF = DAG.getMachineFunction(); 360 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 361 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 362 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 363 MFI->LiveOuts.push_back(Reg); 364 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 365 } 366 case AMDGPUIntrinsic::R600_store_pixel_color: { 367 MachineFunction &MF = DAG.getMachineFunction(); 368 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 369 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 370 371 SDNode **OutputsMap = MFI->Outputs; 372 return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, 373 RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), 374 Chain); 375 376 } 377 378 // default for switch(IntrinsicID) 379 default: break; 380 } 381 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 382 break; 383 } 384 case ISD::INTRINSIC_WO_CHAIN: { 385 unsigned IntrinsicID = 386 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 387 EVT VT = Op.getValueType(); 388 DebugLoc DL = Op.getDebugLoc(); 389 switch(IntrinsicID) { 390 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 391 case AMDGPUIntrinsic::R600_load_input: { 392 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 393 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 394 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 395 } 396 397 case AMDGPUIntrinsic::R600_interp_input: { 398 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 399 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 400 MachineSDNode *interp; 401 if (ijb < 0) { 402 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, 403 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); 404 return DAG.getTargetExtractSubreg( 405 TII->getRegisterInfo().getSubRegFromChannel(slot % 4), 406 DL, MVT::f32, SDValue(interp, 0)); 407 } 408 409 if (slot % 4 < 2) 410 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, 411 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 412 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 413 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), 414 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 415 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); 416 else 417 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, 418 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 419 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 420 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), 421 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 422 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); 423 424 return SDValue(interp, slot % 2); 425 } 426 427 case r600_read_ngroups_x: 428 return LowerImplicitParameter(DAG, VT, DL, 0); 429 case r600_read_ngroups_y: 430 return LowerImplicitParameter(DAG, VT, DL, 1); 431 case r600_read_ngroups_z: 432 return LowerImplicitParameter(DAG, VT, DL, 2); 433 case r600_read_global_size_x: 434 return LowerImplicitParameter(DAG, VT, DL, 3); 435 case r600_read_global_size_y: 436 return LowerImplicitParameter(DAG, VT, DL, 4); 437 case r600_read_global_size_z: 438 return LowerImplicitParameter(DAG, VT, DL, 5); 439 case r600_read_local_size_x: 440 return LowerImplicitParameter(DAG, VT, DL, 6); 441 case r600_read_local_size_y: 442 return LowerImplicitParameter(DAG, VT, DL, 7); 443 case r600_read_local_size_z: 444 return LowerImplicitParameter(DAG, VT, DL, 8); 445 446 case r600_read_tgid_x: 447 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 448 AMDGPU::T1_X, VT); 449 case r600_read_tgid_y: 450 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 451 AMDGPU::T1_Y, VT); 452 case r600_read_tgid_z: 453 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 454 AMDGPU::T1_Z, VT); 455 case r600_read_tidig_x: 456 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 457 AMDGPU::T0_X, VT); 458 case r600_read_tidig_y: 459 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 460 AMDGPU::T0_Y, VT); 461 case r600_read_tidig_z: 462 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 463 AMDGPU::T0_Z, VT); 464 } 465 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 466 break; 467 } 468 } // end switch(Op.getOpcode()) 469 return SDValue(); 470} 471 472void R600TargetLowering::ReplaceNodeResults(SDNode *N, 473 SmallVectorImpl<SDValue> &Results, 474 SelectionDAG &DAG) const { 475 switch (N->getOpcode()) { 476 default: return; 477 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); 478 return; 479 case ISD::LOAD: { 480 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); 481 Results.push_back(SDValue(Node, 0)); 482 Results.push_back(SDValue(Node, 1)); 483 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode 484 // function 485 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); 486 return; 487 } 488 } 489} 490 491SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { 492 return DAG.getNode( 493 ISD::SETCC, 494 Op.getDebugLoc(), 495 MVT::i1, 496 Op, DAG.getConstantFP(0.0f, MVT::f32), 497 DAG.getCondCode(ISD::SETNE) 498 ); 499} 500 501SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 502 SDValue Chain = Op.getOperand(0); 503 SDValue CC = Op.getOperand(1); 504 SDValue LHS = Op.getOperand(2); 505 SDValue RHS = Op.getOperand(3); 506 SDValue JumpT = Op.getOperand(4); 507 SDValue CmpValue; 508 SDValue Result; 509 510 if (LHS.getValueType() == MVT::i32) { 511 CmpValue = DAG.getNode( 512 ISD::SELECT_CC, 513 Op.getDebugLoc(), 514 MVT::i32, 515 LHS, RHS, 516 DAG.getConstant(-1, MVT::i32), 517 DAG.getConstant(0, MVT::i32), 518 CC); 519 } else if (LHS.getValueType() == MVT::f32) { 520 CmpValue = DAG.getNode( 521 ISD::SELECT_CC, 522 Op.getDebugLoc(), 523 MVT::f32, 524 LHS, RHS, 525 DAG.getConstantFP(1.0f, MVT::f32), 526 DAG.getConstantFP(0.0f, MVT::f32), 527 CC); 528 } else { 529 assert(0 && "Not valid type for br_cc"); 530 } 531 Result = DAG.getNode( 532 AMDGPUISD::BRANCH_COND, 533 CmpValue.getDebugLoc(), 534 MVT::Other, Chain, 535 JumpT, CmpValue); 536 return Result; 537} 538 539SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 540 DebugLoc DL, 541 unsigned DwordOffset) const { 542 unsigned ByteOffset = DwordOffset * 4; 543 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 544 AMDGPUAS::PARAM_I_ADDRESS); 545 546 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 547 assert(isInt<16>(ByteOffset)); 548 549 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 550 DAG.getConstant(ByteOffset, MVT::i32), // PTR 551 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 552 false, false, false, 0); 553} 554 555SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { 556 DebugLoc DL = Op.getDebugLoc(); 557 EVT VT = Op.getValueType(); 558 559 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 560 Op.getOperand(0), 561 Op.getOperand(0), 562 DAG.getNode(ISD::SUB, DL, VT, 563 DAG.getConstant(32, MVT::i32), 564 Op.getOperand(1))); 565} 566 567bool R600TargetLowering::isZero(SDValue Op) const { 568 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { 569 return Cst->isNullValue(); 570 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ 571 return CstFP->isZero(); 572 } else { 573 return false; 574 } 575} 576 577SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 578 DebugLoc DL = Op.getDebugLoc(); 579 EVT VT = Op.getValueType(); 580 581 SDValue LHS = Op.getOperand(0); 582 SDValue RHS = Op.getOperand(1); 583 SDValue True = Op.getOperand(2); 584 SDValue False = Op.getOperand(3); 585 SDValue CC = Op.getOperand(4); 586 SDValue Temp; 587 588 // LHS and RHS are guaranteed to be the same value type 589 EVT CompareVT = LHS.getValueType(); 590 591 // Check if we can lower this to a native operation. 592 593 // Try to lower to a CND* instruction: 594 // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that 595 // can be lowered to CND* instructions can also be lowered to SET* 596 // instructions. CND* instructions are cheaper, because they dont't 597 // require additional instructions to convert their result to the correct 598 // value type, so this check should be first. 599 if (isZero(LHS) || isZero(RHS)) { 600 SDValue Cond = (isZero(LHS) ? RHS : LHS); 601 SDValue Zero = (isZero(LHS) ? LHS : RHS); 602 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 603 if (CompareVT != VT) { 604 // Bitcast True / False to the correct types. This will end up being 605 // a nop, but it allows us to define only a single pattern in the 606 // .TD files for each CND* instruction rather than having to have 607 // one pattern for integer True/False and one for fp True/False 608 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 609 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 610 } 611 if (isZero(LHS)) { 612 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); 613 } 614 615 switch (CCOpcode) { 616 case ISD::SETONE: 617 case ISD::SETUNE: 618 case ISD::SETNE: 619 case ISD::SETULE: 620 case ISD::SETULT: 621 case ISD::SETOLE: 622 case ISD::SETOLT: 623 case ISD::SETLE: 624 case ISD::SETLT: 625 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); 626 Temp = True; 627 True = False; 628 False = Temp; 629 break; 630 default: 631 break; 632 } 633 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 634 Cond, Zero, 635 True, False, 636 DAG.getCondCode(CCOpcode)); 637 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 638 } 639 640 // Try to lower to a SET* instruction: 641 // We need all the operands of SELECT_CC to have the same value type, so if 642 // necessary we need to change True and False to be the same type as LHS and 643 // RHS, and then convert the result of the select_cc back to the correct type. 644 645 // Move hardware True/False values to the correct operand. 646 if (isHWTrueValue(False) && isHWFalseValue(True)) { 647 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 648 std::swap(False, True); 649 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); 650 } 651 652 if (isHWTrueValue(True) && isHWFalseValue(False)) { 653 if (CompareVT != VT) { 654 if (VT == MVT::f32 && CompareVT == MVT::i32) { 655 SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 656 LHS, RHS, 657 DAG.getConstant(-1, MVT::i32), 658 DAG.getConstant(0, MVT::i32), 659 CC); 660 // Convert integer values of true (-1) and false (0) to fp values of 661 // true (1.0f) and false (0.0f). 662 SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, 663 DAG.getConstant(1, MVT::i32)); 664 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); 665 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 666 SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 667 LHS, RHS, 668 DAG.getConstantFP(1.0f, MVT::f32), 669 DAG.getConstantFP(0.0f, MVT::f32), 670 CC); 671 // Convert fp values of true (1.0f) and false (0.0f) to integer values 672 // of true (-1) and false (0). 673 SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt); 674 return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg); 675 } else { 676 // I don't think there will be any other type pairings. 677 assert(!"Unhandled operand type parings in SELECT_CC"); 678 } 679 } else { 680 // This SELECT_CC is already legal. 681 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 682 } 683 } 684 685 // Possible Min/Max pattern 686 SDValue MinMax = LowerMinMax(Op, DAG); 687 if (MinMax.getNode()) { 688 return MinMax; 689 } 690 691 // If we make it this for it means we have no native instructions to handle 692 // this SELECT_CC, so we must lower it. 693 SDValue HWTrue, HWFalse; 694 695 if (CompareVT == MVT::f32) { 696 HWTrue = DAG.getConstantFP(1.0f, CompareVT); 697 HWFalse = DAG.getConstantFP(0.0f, CompareVT); 698 } else if (CompareVT == MVT::i32) { 699 HWTrue = DAG.getConstant(-1, CompareVT); 700 HWFalse = DAG.getConstant(0, CompareVT); 701 } 702 else { 703 assert(!"Unhandled value type in LowerSELECT_CC"); 704 } 705 706 // Lower this unsupported SELECT_CC into a combination of two supported 707 // SELECT_CC operations. 708 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 709 710 return DAG.getNode(ISD::SELECT_CC, DL, VT, 711 Cond, HWFalse, 712 True, False, 713 DAG.getCondCode(ISD::SETNE)); 714} 715 716SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 717 return DAG.getNode(ISD::SELECT_CC, 718 Op.getDebugLoc(), 719 Op.getValueType(), 720 Op.getOperand(0), 721 DAG.getConstant(0, MVT::i32), 722 Op.getOperand(1), 723 Op.getOperand(2), 724 DAG.getCondCode(ISD::SETNE)); 725} 726 727SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 728 SDValue Cond; 729 SDValue LHS = Op.getOperand(0); 730 SDValue RHS = Op.getOperand(1); 731 SDValue CC = Op.getOperand(2); 732 DebugLoc DL = Op.getDebugLoc(); 733 assert(Op.getValueType() == MVT::i32); 734 if (LHS.getValueType() == MVT::i32) { 735 Cond = DAG.getNode( 736 ISD::SELECT_CC, 737 Op.getDebugLoc(), 738 MVT::i32, 739 LHS, RHS, 740 DAG.getConstant(-1, MVT::i32), 741 DAG.getConstant(0, MVT::i32), 742 CC); 743 } else if (LHS.getValueType() == MVT::f32) { 744 Cond = DAG.getNode( 745 ISD::SELECT_CC, 746 Op.getDebugLoc(), 747 MVT::f32, 748 LHS, RHS, 749 DAG.getConstantFP(1.0f, MVT::f32), 750 DAG.getConstantFP(0.0f, MVT::f32), 751 CC); 752 Cond = DAG.getNode( 753 ISD::FP_TO_SINT, 754 DL, 755 MVT::i32, 756 Cond); 757 } else { 758 assert(0 && "Not valid type for set_cc"); 759 } 760 Cond = DAG.getNode( 761 ISD::AND, 762 DL, 763 MVT::i32, 764 DAG.getConstant(1, MVT::i32), 765 Cond); 766 return Cond; 767} 768 769SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 770 DebugLoc DL = Op.getDebugLoc(); 771 StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 772 SDValue Chain = Op.getOperand(0); 773 SDValue Value = Op.getOperand(1); 774 SDValue Ptr = Op.getOperand(2); 775 776 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && 777 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { 778 // Convert pointer from byte address to dword address. 779 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), 780 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), 781 Ptr, DAG.getConstant(2, MVT::i32))); 782 783 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { 784 assert(!"Truncated and indexed stores not supported yet"); 785 } else { 786 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 787 } 788 return Chain; 789 } 790 return SDValue(); 791} 792 793// return (512 + (kc_bank << 12) 794static int 795ConstantAddressBlock(unsigned AddressSpace) { 796 switch (AddressSpace) { 797 case AMDGPUAS::CONSTANT_BUFFER_0: 798 return 512; 799 case AMDGPUAS::CONSTANT_BUFFER_1: 800 return 512 + 4096; 801 case AMDGPUAS::CONSTANT_BUFFER_2: 802 return 512 + 4096 * 2; 803 case AMDGPUAS::CONSTANT_BUFFER_3: 804 return 512 + 4096 * 3; 805 case AMDGPUAS::CONSTANT_BUFFER_4: 806 return 512 + 4096 * 4; 807 case AMDGPUAS::CONSTANT_BUFFER_5: 808 return 512 + 4096 * 5; 809 case AMDGPUAS::CONSTANT_BUFFER_6: 810 return 512 + 4096 * 6; 811 case AMDGPUAS::CONSTANT_BUFFER_7: 812 return 512 + 4096 * 7; 813 case AMDGPUAS::CONSTANT_BUFFER_8: 814 return 512 + 4096 * 8; 815 case AMDGPUAS::CONSTANT_BUFFER_9: 816 return 512 + 4096 * 9; 817 case AMDGPUAS::CONSTANT_BUFFER_10: 818 return 512 + 4096 * 10; 819 case AMDGPUAS::CONSTANT_BUFFER_11: 820 return 512 + 4096 * 11; 821 case AMDGPUAS::CONSTANT_BUFFER_12: 822 return 512 + 4096 * 12; 823 case AMDGPUAS::CONSTANT_BUFFER_13: 824 return 512 + 4096 * 13; 825 case AMDGPUAS::CONSTANT_BUFFER_14: 826 return 512 + 4096 * 14; 827 case AMDGPUAS::CONSTANT_BUFFER_15: 828 return 512 + 4096 * 15; 829 default: 830 return -1; 831 } 832} 833 834SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const 835{ 836 EVT VT = Op.getValueType(); 837 DebugLoc DL = Op.getDebugLoc(); 838 LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 839 SDValue Chain = Op.getOperand(0); 840 SDValue Ptr = Op.getOperand(1); 841 SDValue LoweredLoad; 842 843 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 844 if (ConstantBlock > -1) { 845 SDValue Result; 846 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || 847 dyn_cast<Constant>(LoadNode->getSrcValue())) { 848 SDValue Slots[4]; 849 for (unsigned i = 0; i < 4; i++) { 850 // We want Const position encoded with the following formula : 851 // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 852 // const_index is Ptr computed by llvm using an alignment of 16. 853 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 854 // then div by 4 at the ISel step 855 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 856 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); 857 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 858 } 859 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); 860 } else { 861 // non constant ptr cant be folded, keeps it as a v4f32 load 862 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 863 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) 864 ); 865 } 866 867 if (!VT.isVector()) { 868 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 869 DAG.getConstant(0, MVT::i32)); 870 } 871 872 SDValue MergedValues[2] = { 873 Result, 874 Chain 875 }; 876 return DAG.getMergeValues(MergedValues, 2, DL); 877 } 878 879 return SDValue(); 880} 881 882SDValue R600TargetLowering::LowerFPOW(SDValue Op, 883 SelectionDAG &DAG) const { 884 DebugLoc DL = Op.getDebugLoc(); 885 EVT VT = Op.getValueType(); 886 SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0)); 887 SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase); 888 return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase); 889} 890 891/// XXX Only kernel functions are supported, so we can assume for now that 892/// every function is a kernel function, but in the future we should use 893/// separate calling conventions for kernel and non-kernel functions. 894SDValue R600TargetLowering::LowerFormalArguments( 895 SDValue Chain, 896 CallingConv::ID CallConv, 897 bool isVarArg, 898 const SmallVectorImpl<ISD::InputArg> &Ins, 899 DebugLoc DL, SelectionDAG &DAG, 900 SmallVectorImpl<SDValue> &InVals) const { 901 unsigned ParamOffsetBytes = 36; 902 Function::const_arg_iterator FuncArg = 903 DAG.getMachineFunction().getFunction()->arg_begin(); 904 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { 905 EVT VT = Ins[i].VT; 906 Type *ArgType = FuncArg->getType(); 907 unsigned ArgSizeInBits = ArgType->isPointerTy() ? 908 32 : ArgType->getPrimitiveSizeInBits(); 909 unsigned ArgBytes = ArgSizeInBits >> 3; 910 EVT ArgVT; 911 if (ArgSizeInBits < VT.getSizeInBits()) { 912 assert(!ArgType->isFloatTy() && 913 "Extending floating point arguments not supported yet"); 914 ArgVT = MVT::getIntegerVT(ArgSizeInBits); 915 } else { 916 ArgVT = VT; 917 } 918 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 919 AMDGPUAS::PARAM_I_ADDRESS); 920 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), 921 DAG.getConstant(ParamOffsetBytes, MVT::i32), 922 MachinePointerInfo(new Argument(PtrTy)), 923 ArgVT, false, false, ArgBytes); 924 InVals.push_back(Arg); 925 ParamOffsetBytes += ArgBytes; 926 } 927 return Chain; 928} 929 930EVT R600TargetLowering::getSetCCResultType(EVT VT) const { 931 if (!VT.isVector()) return MVT::i32; 932 return VT.changeVectorElementTypeToInteger(); 933} 934 935//===----------------------------------------------------------------------===// 936// Custom DAG Optimizations 937//===----------------------------------------------------------------------===// 938 939SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 940 DAGCombinerInfo &DCI) const { 941 SelectionDAG &DAG = DCI.DAG; 942 943 switch (N->getOpcode()) { 944 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 945 case ISD::FP_ROUND: { 946 SDValue Arg = N->getOperand(0); 947 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 948 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), 949 Arg.getOperand(0)); 950 } 951 break; 952 } 953 // Extract_vec (Build_vector) generated by custom lowering 954 // also needs to be customly combined 955 case ISD::EXTRACT_VECTOR_ELT: { 956 SDValue Arg = N->getOperand(0); 957 if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 958 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 959 unsigned Element = Const->getZExtValue(); 960 return Arg->getOperand(Element); 961 } 962 } 963 if (Arg.getOpcode() == ISD::BITCAST && 964 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { 965 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 966 unsigned Element = Const->getZExtValue(); 967 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(), 968 Arg->getOperand(0).getOperand(Element)); 969 } 970 } 971 } 972 } 973 return SDValue(); 974} 975