R600ISelLowering.cpp revision 29b15a378045762ce09642ab9dd741ece41f59a3
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file 11/// \brief Custom DAG lowering for R600 12// 13//===----------------------------------------------------------------------===// 14 15#include "R600ISelLowering.h" 16#include "R600Defines.h" 17#include "R600InstrInfo.h" 18#include "R600MachineFunctionInfo.h" 19#include "llvm/CodeGen/MachineInstrBuilder.h" 20#include "llvm/CodeGen/MachineRegisterInfo.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22#include "llvm/IR/Argument.h" 23#include "llvm/IR/Function.h" 24 25using namespace llvm; 26 27R600TargetLowering::R600TargetLowering(TargetMachine &TM) : 28 AMDGPUTargetLowering(TM), 29 TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) { 30 setOperationAction(ISD::MUL, MVT::i64, Expand); 31 addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); 32 addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); 33 addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); 34 addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass); 35 computeRegisterProperties(); 36 37 setOperationAction(ISD::FADD, MVT::v4f32, Expand); 38 setOperationAction(ISD::FMUL, MVT::v4f32, Expand); 39 setOperationAction(ISD::FDIV, MVT::v4f32, Expand); 40 setOperationAction(ISD::FSUB, MVT::v4f32, Expand); 41 42 setOperationAction(ISD::ADD, MVT::v4i32, Expand); 43 setOperationAction(ISD::AND, MVT::v4i32, Expand); 44 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Expand); 45 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Expand); 46 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Expand); 47 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Expand); 48 setOperationAction(ISD::UDIV, MVT::v4i32, Expand); 49 setOperationAction(ISD::UREM, MVT::v4i32, Expand); 50 setOperationAction(ISD::SETCC, MVT::v4i32, Expand); 51 52 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 53 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 54 55 setOperationAction(ISD::FSUB, MVT::f32, Expand); 56 57 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 58 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 59 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); 60 setOperationAction(ISD::FPOW, MVT::f32, Custom); 61 62 setOperationAction(ISD::ROTL, MVT::i32, Custom); 63 64 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 65 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 66 67 setOperationAction(ISD::SETCC, MVT::i32, Custom); 68 setOperationAction(ISD::SETCC, MVT::f32, Custom); 69 setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom); 70 71 setOperationAction(ISD::SELECT, MVT::i32, Custom); 72 setOperationAction(ISD::SELECT, MVT::f32, Custom); 73 74 setOperationAction(ISD::STORE, MVT::i32, Custom); 75 setOperationAction(ISD::STORE, MVT::v4i32, Custom); 76 77 setOperationAction(ISD::LOAD, MVT::i32, Custom); 78 setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 79 setTargetDAGCombine(ISD::FP_ROUND); 80 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 81 82 setSchedulingPreference(Sched::VLIW); 83} 84 85MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( 86 MachineInstr * MI, MachineBasicBlock * BB) const { 87 MachineFunction * MF = BB->getParent(); 88 MachineRegisterInfo &MRI = MF->getRegInfo(); 89 MachineBasicBlock::iterator I = *MI; 90 91 switch (MI->getOpcode()) { 92 default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 93 case AMDGPU::SHADER_TYPE: break; 94 case AMDGPU::CLAMP_R600: { 95 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 96 AMDGPU::MOV, 97 MI->getOperand(0).getReg(), 98 MI->getOperand(1).getReg()); 99 TII->addFlag(NewMI, 0, MO_FLAG_CLAMP); 100 break; 101 } 102 103 case AMDGPU::FABS_R600: { 104 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 105 AMDGPU::MOV, 106 MI->getOperand(0).getReg(), 107 MI->getOperand(1).getReg()); 108 TII->addFlag(NewMI, 0, MO_FLAG_ABS); 109 break; 110 } 111 112 case AMDGPU::FNEG_R600: { 113 MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I, 114 AMDGPU::MOV, 115 MI->getOperand(0).getReg(), 116 MI->getOperand(1).getReg()); 117 TII->addFlag(NewMI, 0, MO_FLAG_NEG); 118 break; 119 } 120 121 case AMDGPU::MASK_WRITE: { 122 unsigned maskedRegister = MI->getOperand(0).getReg(); 123 assert(TargetRegisterInfo::isVirtualRegister(maskedRegister)); 124 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister); 125 TII->addFlag(defInstr, 0, MO_FLAG_MASK); 126 break; 127 } 128 129 case AMDGPU::MOV_IMM_F32: 130 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 131 MI->getOperand(1).getFPImm()->getValueAPF() 132 .bitcastToAPInt().getZExtValue()); 133 break; 134 case AMDGPU::MOV_IMM_I32: 135 TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(), 136 MI->getOperand(1).getImm()); 137 break; 138 139 140 case AMDGPU::RAT_WRITE_CACHELESS_32_eg: 141 case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { 142 unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0; 143 144 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 145 .addOperand(MI->getOperand(0)) 146 .addOperand(MI->getOperand(1)) 147 .addImm(EOP); // Set End of program bit 148 break; 149 } 150 151 case AMDGPU::TXD: { 152 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 153 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 154 155 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 156 .addOperand(MI->getOperand(3)) 157 .addOperand(MI->getOperand(4)) 158 .addOperand(MI->getOperand(5)) 159 .addOperand(MI->getOperand(6)); 160 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 161 .addOperand(MI->getOperand(2)) 162 .addOperand(MI->getOperand(4)) 163 .addOperand(MI->getOperand(5)) 164 .addOperand(MI->getOperand(6)); 165 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G)) 166 .addOperand(MI->getOperand(0)) 167 .addOperand(MI->getOperand(1)) 168 .addOperand(MI->getOperand(4)) 169 .addOperand(MI->getOperand(5)) 170 .addOperand(MI->getOperand(6)) 171 .addReg(T0, RegState::Implicit) 172 .addReg(T1, RegState::Implicit); 173 break; 174 } 175 176 case AMDGPU::TXD_SHADOW: { 177 unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 178 unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 179 180 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0) 181 .addOperand(MI->getOperand(3)) 182 .addOperand(MI->getOperand(4)) 183 .addOperand(MI->getOperand(5)) 184 .addOperand(MI->getOperand(6)); 185 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1) 186 .addOperand(MI->getOperand(2)) 187 .addOperand(MI->getOperand(4)) 188 .addOperand(MI->getOperand(5)) 189 .addOperand(MI->getOperand(6)); 190 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G)) 191 .addOperand(MI->getOperand(0)) 192 .addOperand(MI->getOperand(1)) 193 .addOperand(MI->getOperand(4)) 194 .addOperand(MI->getOperand(5)) 195 .addOperand(MI->getOperand(6)) 196 .addReg(T0, RegState::Implicit) 197 .addReg(T1, RegState::Implicit); 198 break; 199 } 200 201 case AMDGPU::BRANCH: 202 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 203 .addOperand(MI->getOperand(0)) 204 .addReg(0); 205 break; 206 207 case AMDGPU::BRANCH_COND_f32: { 208 MachineInstr *NewMI = 209 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 210 AMDGPU::PREDICATE_BIT) 211 .addOperand(MI->getOperand(1)) 212 .addImm(OPCODE_IS_NOT_ZERO) 213 .addImm(0); // Flags 214 TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 215 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 216 .addOperand(MI->getOperand(0)) 217 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 218 break; 219 } 220 221 case AMDGPU::BRANCH_COND_i32: { 222 MachineInstr *NewMI = 223 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X), 224 AMDGPU::PREDICATE_BIT) 225 .addOperand(MI->getOperand(1)) 226 .addImm(OPCODE_IS_NOT_ZERO_INT) 227 .addImm(0); // Flags 228 TII->addFlag(NewMI, 0, MO_FLAG_PUSH); 229 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP)) 230 .addOperand(MI->getOperand(0)) 231 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 232 break; 233 } 234 235 case AMDGPU::EG_ExportSwz: 236 case AMDGPU::R600_ExportSwz: { 237 // Instruction is left unmodified if its not the last one of its type 238 bool isLastInstructionOfItsType = true; 239 unsigned InstExportType = MI->getOperand(1).getImm(); 240 for (MachineBasicBlock::iterator NextExportInst = llvm::next(I), 241 EndBlock = BB->end(); NextExportInst != EndBlock; 242 NextExportInst = llvm::next(NextExportInst)) { 243 if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz || 244 NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) { 245 unsigned CurrentInstExportType = NextExportInst->getOperand(1) 246 .getImm(); 247 if (CurrentInstExportType == InstExportType) { 248 isLastInstructionOfItsType = false; 249 break; 250 } 251 } 252 } 253 bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0; 254 if (!EOP && !isLastInstructionOfItsType) 255 return BB; 256 unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40; 257 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) 258 .addOperand(MI->getOperand(0)) 259 .addOperand(MI->getOperand(1)) 260 .addOperand(MI->getOperand(2)) 261 .addOperand(MI->getOperand(3)) 262 .addOperand(MI->getOperand(4)) 263 .addOperand(MI->getOperand(5)) 264 .addOperand(MI->getOperand(6)) 265 .addImm(CfInst) 266 .addImm(EOP); 267 break; 268 } 269 } 270 271 MI->eraseFromParent(); 272 return BB; 273} 274 275//===----------------------------------------------------------------------===// 276// Custom DAG Lowering Operations 277//===----------------------------------------------------------------------===// 278 279using namespace llvm::Intrinsic; 280using namespace llvm::AMDGPUIntrinsic; 281 282static SDValue 283InsertScalarToRegisterExport(SelectionDAG &DAG, DebugLoc DL, SDNode **ExportMap, 284 unsigned Slot, unsigned Channel, unsigned Inst, unsigned Type, 285 SDValue Scalar, SDValue Chain) { 286 if (!ExportMap[Slot]) { 287 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, 288 DL, MVT::v4f32, 289 DAG.getUNDEF(MVT::v4f32), 290 Scalar, 291 DAG.getConstant(Channel, MVT::i32)); 292 293 unsigned Mask = 1 << Channel; 294 295 const SDValue Ops[] = {Chain, Vector, DAG.getConstant(Inst, MVT::i32), 296 DAG.getConstant(Type, MVT::i32), DAG.getConstant(Slot, MVT::i32), 297 DAG.getConstant(Mask, MVT::i32)}; 298 299 SDValue Res = DAG.getNode( 300 AMDGPUISD::EXPORT, 301 DL, 302 MVT::Other, 303 Ops, 6); 304 ExportMap[Slot] = Res.getNode(); 305 return Res; 306 } 307 308 SDNode *ExportInstruction = (SDNode *) ExportMap[Slot] ; 309 SDValue PreviousVector = ExportInstruction->getOperand(1); 310 SDValue Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, 311 DL, MVT::v4f32, 312 PreviousVector, 313 Scalar, 314 DAG.getConstant(Channel, MVT::i32)); 315 316 unsigned Mask = dyn_cast<ConstantSDNode>(ExportInstruction->getOperand(5)) 317 ->getZExtValue(); 318 Mask |= (1 << Channel); 319 320 const SDValue Ops[] = {ExportInstruction->getOperand(0), Vector, 321 DAG.getConstant(Inst, MVT::i32), 322 DAG.getConstant(Type, MVT::i32), 323 DAG.getConstant(Slot, MVT::i32), 324 DAG.getConstant(Mask, MVT::i32)}; 325 326 DAG.UpdateNodeOperands(ExportInstruction, 327 Ops, 6); 328 329 return Chain; 330 331} 332 333SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 334 switch (Op.getOpcode()) { 335 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 336 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 337 case ISD::ROTL: return LowerROTL(Op, DAG); 338 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 339 case ISD::SELECT: return LowerSELECT(Op, DAG); 340 case ISD::SETCC: return LowerSETCC(Op, DAG); 341 case ISD::STORE: return LowerSTORE(Op, DAG); 342 case ISD::LOAD: return LowerLOAD(Op, DAG); 343 case ISD::FPOW: return LowerFPOW(Op, DAG); 344 case ISD::INTRINSIC_VOID: { 345 SDValue Chain = Op.getOperand(0); 346 unsigned IntrinsicID = 347 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 348 switch (IntrinsicID) { 349 case AMDGPUIntrinsic::AMDGPU_store_output: { 350 MachineFunction &MF = DAG.getMachineFunction(); 351 MachineRegisterInfo &MRI = MF.getRegInfo(); 352 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 353 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 354 if (!MRI.isLiveOut(Reg)) { 355 MRI.addLiveOut(Reg); 356 } 357 return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2)); 358 } 359 case AMDGPUIntrinsic::R600_store_pixel_color: { 360 MachineFunction &MF = DAG.getMachineFunction(); 361 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 362 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue(); 363 364 SDNode **OutputsMap = MFI->Outputs; 365 return InsertScalarToRegisterExport(DAG, Op.getDebugLoc(), OutputsMap, 366 RegIndex / 4, RegIndex % 4, 0, 0, Op.getOperand(2), 367 Chain); 368 369 } 370 371 // default for switch(IntrinsicID) 372 default: break; 373 } 374 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode()) 375 break; 376 } 377 case ISD::INTRINSIC_WO_CHAIN: { 378 unsigned IntrinsicID = 379 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 380 EVT VT = Op.getValueType(); 381 DebugLoc DL = Op.getDebugLoc(); 382 switch(IntrinsicID) { 383 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 384 case AMDGPUIntrinsic::R600_load_input: { 385 int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 386 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex); 387 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT); 388 } 389 390 case AMDGPUIntrinsic::R600_interp_input: { 391 int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 392 int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue(); 393 MachineSDNode *interp; 394 if (ijb < 0) { 395 interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL, 396 MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32)); 397 return DAG.getTargetExtractSubreg( 398 TII->getRegisterInfo().getSubRegFromChannel(slot % 4), 399 DL, MVT::f32, SDValue(interp, 0)); 400 } 401 402 if (slot % 4 < 2) 403 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL, 404 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 405 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 406 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), 407 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 408 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); 409 else 410 interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL, 411 MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32), 412 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 413 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1), MVT::f32), 414 CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 415 AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb), MVT::f32)); 416 417 return SDValue(interp, slot % 2); 418 } 419 420 case r600_read_ngroups_x: 421 return LowerImplicitParameter(DAG, VT, DL, 0); 422 case r600_read_ngroups_y: 423 return LowerImplicitParameter(DAG, VT, DL, 1); 424 case r600_read_ngroups_z: 425 return LowerImplicitParameter(DAG, VT, DL, 2); 426 case r600_read_global_size_x: 427 return LowerImplicitParameter(DAG, VT, DL, 3); 428 case r600_read_global_size_y: 429 return LowerImplicitParameter(DAG, VT, DL, 4); 430 case r600_read_global_size_z: 431 return LowerImplicitParameter(DAG, VT, DL, 5); 432 case r600_read_local_size_x: 433 return LowerImplicitParameter(DAG, VT, DL, 6); 434 case r600_read_local_size_y: 435 return LowerImplicitParameter(DAG, VT, DL, 7); 436 case r600_read_local_size_z: 437 return LowerImplicitParameter(DAG, VT, DL, 8); 438 439 case r600_read_tgid_x: 440 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 441 AMDGPU::T1_X, VT); 442 case r600_read_tgid_y: 443 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 444 AMDGPU::T1_Y, VT); 445 case r600_read_tgid_z: 446 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 447 AMDGPU::T1_Z, VT); 448 case r600_read_tidig_x: 449 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 450 AMDGPU::T0_X, VT); 451 case r600_read_tidig_y: 452 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 453 AMDGPU::T0_Y, VT); 454 case r600_read_tidig_z: 455 return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, 456 AMDGPU::T0_Z, VT); 457 } 458 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) 459 break; 460 } 461 } // end switch(Op.getOpcode()) 462 return SDValue(); 463} 464 465void R600TargetLowering::ReplaceNodeResults(SDNode *N, 466 SmallVectorImpl<SDValue> &Results, 467 SelectionDAG &DAG) const { 468 switch (N->getOpcode()) { 469 default: return; 470 case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); 471 return; 472 case ISD::LOAD: { 473 SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); 474 Results.push_back(SDValue(Node, 0)); 475 Results.push_back(SDValue(Node, 1)); 476 // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode 477 // function 478 DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); 479 return; 480 } 481 } 482} 483 484SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { 485 return DAG.getNode( 486 ISD::SETCC, 487 Op.getDebugLoc(), 488 MVT::i1, 489 Op, DAG.getConstantFP(0.0f, MVT::f32), 490 DAG.getCondCode(ISD::SETNE) 491 ); 492} 493 494SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 495 SDValue Chain = Op.getOperand(0); 496 SDValue CC = Op.getOperand(1); 497 SDValue LHS = Op.getOperand(2); 498 SDValue RHS = Op.getOperand(3); 499 SDValue JumpT = Op.getOperand(4); 500 SDValue CmpValue; 501 SDValue Result; 502 503 if (LHS.getValueType() == MVT::i32) { 504 CmpValue = DAG.getNode( 505 ISD::SELECT_CC, 506 Op.getDebugLoc(), 507 MVT::i32, 508 LHS, RHS, 509 DAG.getConstant(-1, MVT::i32), 510 DAG.getConstant(0, MVT::i32), 511 CC); 512 } else if (LHS.getValueType() == MVT::f32) { 513 CmpValue = DAG.getNode( 514 ISD::SELECT_CC, 515 Op.getDebugLoc(), 516 MVT::f32, 517 LHS, RHS, 518 DAG.getConstantFP(1.0f, MVT::f32), 519 DAG.getConstantFP(0.0f, MVT::f32), 520 CC); 521 } else { 522 assert(0 && "Not valid type for br_cc"); 523 } 524 Result = DAG.getNode( 525 AMDGPUISD::BRANCH_COND, 526 CmpValue.getDebugLoc(), 527 MVT::Other, Chain, 528 JumpT, CmpValue); 529 return Result; 530} 531 532SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, 533 DebugLoc DL, 534 unsigned DwordOffset) const { 535 unsigned ByteOffset = DwordOffset * 4; 536 PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 537 AMDGPUAS::PARAM_I_ADDRESS); 538 539 // We shouldn't be using an offset wider than 16-bits for implicit parameters. 540 assert(isInt<16>(ByteOffset)); 541 542 return DAG.getLoad(VT, DL, DAG.getEntryNode(), 543 DAG.getConstant(ByteOffset, MVT::i32), // PTR 544 MachinePointerInfo(ConstantPointerNull::get(PtrType)), 545 false, false, false, 0); 546} 547 548SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { 549 DebugLoc DL = Op.getDebugLoc(); 550 EVT VT = Op.getValueType(); 551 552 return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT, 553 Op.getOperand(0), 554 Op.getOperand(0), 555 DAG.getNode(ISD::SUB, DL, VT, 556 DAG.getConstant(32, MVT::i32), 557 Op.getOperand(1))); 558} 559 560bool R600TargetLowering::isZero(SDValue Op) const { 561 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { 562 return Cst->isNullValue(); 563 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){ 564 return CstFP->isZero(); 565 } else { 566 return false; 567 } 568} 569 570SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 571 DebugLoc DL = Op.getDebugLoc(); 572 EVT VT = Op.getValueType(); 573 574 SDValue LHS = Op.getOperand(0); 575 SDValue RHS = Op.getOperand(1); 576 SDValue True = Op.getOperand(2); 577 SDValue False = Op.getOperand(3); 578 SDValue CC = Op.getOperand(4); 579 SDValue Temp; 580 581 // LHS and RHS are guaranteed to be the same value type 582 EVT CompareVT = LHS.getValueType(); 583 584 // Check if we can lower this to a native operation. 585 586 // Try to lower to a CND* instruction: 587 // CND* instructions requires RHS to be zero. Some SELECT_CC nodes that 588 // can be lowered to CND* instructions can also be lowered to SET* 589 // instructions. CND* instructions are cheaper, because they dont't 590 // require additional instructions to convert their result to the correct 591 // value type, so this check should be first. 592 if (isZero(LHS) || isZero(RHS)) { 593 SDValue Cond = (isZero(LHS) ? RHS : LHS); 594 SDValue Zero = (isZero(LHS) ? LHS : RHS); 595 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 596 if (CompareVT != VT) { 597 // Bitcast True / False to the correct types. This will end up being 598 // a nop, but it allows us to define only a single pattern in the 599 // .TD files for each CND* instruction rather than having to have 600 // one pattern for integer True/False and one for fp True/False 601 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True); 602 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False); 603 } 604 if (isZero(LHS)) { 605 CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode); 606 } 607 608 switch (CCOpcode) { 609 case ISD::SETONE: 610 case ISD::SETUNE: 611 case ISD::SETNE: 612 case ISD::SETULE: 613 case ISD::SETULT: 614 case ISD::SETOLE: 615 case ISD::SETOLT: 616 case ISD::SETLE: 617 case ISD::SETLT: 618 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32); 619 Temp = True; 620 True = False; 621 False = Temp; 622 break; 623 default: 624 break; 625 } 626 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 627 Cond, Zero, 628 True, False, 629 DAG.getCondCode(CCOpcode)); 630 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); 631 } 632 633 // Try to lower to a SET* instruction: 634 // We need all the operands of SELECT_CC to have the same value type, so if 635 // necessary we need to change True and False to be the same type as LHS and 636 // RHS, and then convert the result of the select_cc back to the correct type. 637 638 // Move hardware True/False values to the correct operand. 639 if (isHWTrueValue(False) && isHWFalseValue(True)) { 640 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 641 std::swap(False, True); 642 CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32)); 643 } 644 645 if (isHWTrueValue(True) && isHWFalseValue(False)) { 646 if (CompareVT != VT) { 647 if (VT == MVT::f32 && CompareVT == MVT::i32) { 648 SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 649 LHS, RHS, 650 DAG.getConstant(-1, MVT::i32), 651 DAG.getConstant(0, MVT::i32), 652 CC); 653 // Convert integer values of true (-1) and false (0) to fp values of 654 // true (1.0f) and false (0.0f). 655 SDValue LSB = DAG.getNode(ISD::AND, DL, MVT::i32, Boolean, 656 DAG.getConstant(1, MVT::i32)); 657 return DAG.getNode(ISD::UINT_TO_FP, DL, VT, LSB); 658 } else if (VT == MVT::i32 && CompareVT == MVT::f32) { 659 SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, 660 LHS, RHS, 661 DAG.getConstantFP(1.0f, MVT::f32), 662 DAG.getConstantFP(0.0f, MVT::f32), 663 CC); 664 // Convert fp values of true (1.0f) and false (0.0f) to integer values 665 // of true (-1) and false (0). 666 SDValue Neg = DAG.getNode(ISD::FNEG, DL, MVT::f32, BoolAsFlt); 667 return DAG.getNode(ISD::FP_TO_SINT, DL, VT, Neg); 668 } else { 669 // I don't think there will be any other type pairings. 670 assert(!"Unhandled operand type parings in SELECT_CC"); 671 } 672 } else { 673 // This SELECT_CC is already legal. 674 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC); 675 } 676 } 677 678 // Possible Min/Max pattern 679 SDValue MinMax = LowerMinMax(Op, DAG); 680 if (MinMax.getNode()) { 681 return MinMax; 682 } 683 684 // If we make it this for it means we have no native instructions to handle 685 // this SELECT_CC, so we must lower it. 686 SDValue HWTrue, HWFalse; 687 688 if (CompareVT == MVT::f32) { 689 HWTrue = DAG.getConstantFP(1.0f, CompareVT); 690 HWFalse = DAG.getConstantFP(0.0f, CompareVT); 691 } else if (CompareVT == MVT::i32) { 692 HWTrue = DAG.getConstant(-1, CompareVT); 693 HWFalse = DAG.getConstant(0, CompareVT); 694 } 695 else { 696 assert(!"Unhandled value type in LowerSELECT_CC"); 697 } 698 699 // Lower this unsupported SELECT_CC into a combination of two supported 700 // SELECT_CC operations. 701 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC); 702 703 return DAG.getNode(ISD::SELECT_CC, DL, VT, 704 Cond, HWFalse, 705 True, False, 706 DAG.getCondCode(ISD::SETNE)); 707} 708 709SDValue R600TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 710 return DAG.getNode(ISD::SELECT_CC, 711 Op.getDebugLoc(), 712 Op.getValueType(), 713 Op.getOperand(0), 714 DAG.getConstant(0, MVT::i32), 715 Op.getOperand(1), 716 Op.getOperand(2), 717 DAG.getCondCode(ISD::SETNE)); 718} 719 720SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 721 SDValue Cond; 722 SDValue LHS = Op.getOperand(0); 723 SDValue RHS = Op.getOperand(1); 724 SDValue CC = Op.getOperand(2); 725 DebugLoc DL = Op.getDebugLoc(); 726 assert(Op.getValueType() == MVT::i32); 727 if (LHS.getValueType() == MVT::i32) { 728 Cond = DAG.getNode( 729 ISD::SELECT_CC, 730 Op.getDebugLoc(), 731 MVT::i32, 732 LHS, RHS, 733 DAG.getConstant(-1, MVT::i32), 734 DAG.getConstant(0, MVT::i32), 735 CC); 736 } else if (LHS.getValueType() == MVT::f32) { 737 Cond = DAG.getNode( 738 ISD::SELECT_CC, 739 Op.getDebugLoc(), 740 MVT::f32, 741 LHS, RHS, 742 DAG.getConstantFP(1.0f, MVT::f32), 743 DAG.getConstantFP(0.0f, MVT::f32), 744 CC); 745 Cond = DAG.getNode( 746 ISD::FP_TO_SINT, 747 DL, 748 MVT::i32, 749 Cond); 750 } else { 751 assert(0 && "Not valid type for set_cc"); 752 } 753 Cond = DAG.getNode( 754 ISD::AND, 755 DL, 756 MVT::i32, 757 DAG.getConstant(1, MVT::i32), 758 Cond); 759 return Cond; 760} 761 762SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 763 DebugLoc DL = Op.getDebugLoc(); 764 StoreSDNode *StoreNode = cast<StoreSDNode>(Op); 765 SDValue Chain = Op.getOperand(0); 766 SDValue Value = Op.getOperand(1); 767 SDValue Ptr = Op.getOperand(2); 768 769 if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && 770 Ptr->getOpcode() != AMDGPUISD::DWORDADDR) { 771 // Convert pointer from byte address to dword address. 772 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(), 773 DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), 774 Ptr, DAG.getConstant(2, MVT::i32))); 775 776 if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) { 777 assert(!"Truncated and indexed stores not supported yet"); 778 } else { 779 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand()); 780 } 781 return Chain; 782 } 783 return SDValue(); 784} 785 786// return (512 + (kc_bank << 12) 787static int 788ConstantAddressBlock(unsigned AddressSpace) { 789 switch (AddressSpace) { 790 case AMDGPUAS::CONSTANT_BUFFER_0: 791 return 512; 792 case AMDGPUAS::CONSTANT_BUFFER_1: 793 return 512 + 4096; 794 case AMDGPUAS::CONSTANT_BUFFER_2: 795 return 512 + 4096 * 2; 796 case AMDGPUAS::CONSTANT_BUFFER_3: 797 return 512 + 4096 * 3; 798 case AMDGPUAS::CONSTANT_BUFFER_4: 799 return 512 + 4096 * 4; 800 case AMDGPUAS::CONSTANT_BUFFER_5: 801 return 512 + 4096 * 5; 802 case AMDGPUAS::CONSTANT_BUFFER_6: 803 return 512 + 4096 * 6; 804 case AMDGPUAS::CONSTANT_BUFFER_7: 805 return 512 + 4096 * 7; 806 case AMDGPUAS::CONSTANT_BUFFER_8: 807 return 512 + 4096 * 8; 808 case AMDGPUAS::CONSTANT_BUFFER_9: 809 return 512 + 4096 * 9; 810 case AMDGPUAS::CONSTANT_BUFFER_10: 811 return 512 + 4096 * 10; 812 case AMDGPUAS::CONSTANT_BUFFER_11: 813 return 512 + 4096 * 11; 814 case AMDGPUAS::CONSTANT_BUFFER_12: 815 return 512 + 4096 * 12; 816 case AMDGPUAS::CONSTANT_BUFFER_13: 817 return 512 + 4096 * 13; 818 case AMDGPUAS::CONSTANT_BUFFER_14: 819 return 512 + 4096 * 14; 820 case AMDGPUAS::CONSTANT_BUFFER_15: 821 return 512 + 4096 * 15; 822 default: 823 return -1; 824 } 825} 826 827SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const 828{ 829 EVT VT = Op.getValueType(); 830 DebugLoc DL = Op.getDebugLoc(); 831 LoadSDNode *LoadNode = cast<LoadSDNode>(Op); 832 SDValue Chain = Op.getOperand(0); 833 SDValue Ptr = Op.getOperand(1); 834 SDValue LoweredLoad; 835 836 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); 837 if (ConstantBlock > -1) { 838 SDValue Result; 839 if (dyn_cast<ConstantExpr>(LoadNode->getSrcValue()) || 840 dyn_cast<Constant>(LoadNode->getSrcValue())) { 841 SDValue Slots[4]; 842 for (unsigned i = 0; i < 4; i++) { 843 // We want Const position encoded with the following formula : 844 // (((512 + (kc_bank << 12) + const_index) << 2) + chan) 845 // const_index is Ptr computed by llvm using an alignment of 16. 846 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and 847 // then div by 4 at the ISel step 848 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 849 DAG.getConstant(4 * i + ConstantBlock * 16, MVT::i32)); 850 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr); 851 } 852 Result = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Slots, 4); 853 } else { 854 // non constant ptr cant be folded, keeps it as a v4f32 load 855 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, 856 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)) 857 ); 858 } 859 860 if (!VT.isVector()) { 861 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result, 862 DAG.getConstant(0, MVT::i32)); 863 } 864 865 SDValue MergedValues[2] = { 866 Result, 867 Chain 868 }; 869 return DAG.getMergeValues(MergedValues, 2, DL); 870 } 871 872 return SDValue(); 873} 874 875SDValue R600TargetLowering::LowerFPOW(SDValue Op, 876 SelectionDAG &DAG) const { 877 DebugLoc DL = Op.getDebugLoc(); 878 EVT VT = Op.getValueType(); 879 SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0)); 880 SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase); 881 return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase); 882} 883 884/// XXX Only kernel functions are supported, so we can assume for now that 885/// every function is a kernel function, but in the future we should use 886/// separate calling conventions for kernel and non-kernel functions. 887SDValue R600TargetLowering::LowerFormalArguments( 888 SDValue Chain, 889 CallingConv::ID CallConv, 890 bool isVarArg, 891 const SmallVectorImpl<ISD::InputArg> &Ins, 892 DebugLoc DL, SelectionDAG &DAG, 893 SmallVectorImpl<SDValue> &InVals) const { 894 unsigned ParamOffsetBytes = 36; 895 Function::const_arg_iterator FuncArg = 896 DAG.getMachineFunction().getFunction()->arg_begin(); 897 for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) { 898 EVT VT = Ins[i].VT; 899 Type *ArgType = FuncArg->getType(); 900 unsigned ArgSizeInBits = ArgType->isPointerTy() ? 901 32 : ArgType->getPrimitiveSizeInBits(); 902 unsigned ArgBytes = ArgSizeInBits >> 3; 903 EVT ArgVT; 904 if (ArgSizeInBits < VT.getSizeInBits()) { 905 assert(!ArgType->isFloatTy() && 906 "Extending floating point arguments not supported yet"); 907 ArgVT = MVT::getIntegerVT(ArgSizeInBits); 908 } else { 909 ArgVT = VT; 910 } 911 PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 912 AMDGPUAS::PARAM_I_ADDRESS); 913 SDValue Arg = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getRoot(), 914 DAG.getConstant(ParamOffsetBytes, MVT::i32), 915 MachinePointerInfo(new Argument(PtrTy)), 916 ArgVT, false, false, ArgBytes); 917 InVals.push_back(Arg); 918 ParamOffsetBytes += ArgBytes; 919 } 920 return Chain; 921} 922 923EVT R600TargetLowering::getSetCCResultType(EVT VT) const { 924 if (!VT.isVector()) return MVT::i32; 925 return VT.changeVectorElementTypeToInteger(); 926} 927 928//===----------------------------------------------------------------------===// 929// Custom DAG Optimizations 930//===----------------------------------------------------------------------===// 931 932SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, 933 DAGCombinerInfo &DCI) const { 934 SelectionDAG &DAG = DCI.DAG; 935 936 switch (N->getOpcode()) { 937 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) 938 case ISD::FP_ROUND: { 939 SDValue Arg = N->getOperand(0); 940 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) { 941 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0), 942 Arg.getOperand(0)); 943 } 944 break; 945 } 946 // Extract_vec (Build_vector) generated by custom lowering 947 // also needs to be customly combined 948 case ISD::EXTRACT_VECTOR_ELT: { 949 SDValue Arg = N->getOperand(0); 950 if (Arg.getOpcode() == ISD::BUILD_VECTOR) { 951 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 952 unsigned Element = Const->getZExtValue(); 953 return Arg->getOperand(Element); 954 } 955 } 956 if (Arg.getOpcode() == ISD::BITCAST && 957 Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { 958 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 959 unsigned Element = Const->getZExtValue(); 960 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getVTList(), 961 Arg->getOperand(0).getOperand(Element)); 962 } 963 } 964 } 965 } 966 return SDValue(); 967} 968