1f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// 2f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 3f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// The LLVM Compiler Infrastructure 4f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 5f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// This file is distributed under the University of Illinois Open Source 6f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// License. See LICENSE.TXT for details. 7f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 8f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 9f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 10f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is 11f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// mostly EmitInstrWithCustomInserter(). 12f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// 13f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 14f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 15f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "SIISelLowering.h" 16f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDIL.h" 17f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "AMDILIntrinsicInfo.h" 18f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "SIInstrInfo.h" 19f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "SIRegisterInfo.h" 20f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineInstrBuilder.h" 21f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/MachineRegisterInfo.h" 22f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#include "llvm/CodeGen/SelectionDAG.h" 23f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 24f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgusing namespace llvm; 25f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 26f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSITargetLowering::SITargetLowering(TargetMachine &TM) : 27f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AMDGPUTargetLowering(TM), 28f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())) 29f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 30f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); 31f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); 32f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass); 33f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass); 34f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass); 35f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass); 36f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 37f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); 38f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass); 39f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 40f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org computeRegisterProperties(); 41f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 42f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::AND, MVT::i1, Custom); 43f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 44f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::ADD, MVT::i64, Legal); 45f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::ADD, MVT::i32, Legal); 46f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 47f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::BR_CC, MVT::i32, Custom); 48f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 49f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 50f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 51f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // We need to custom lower loads from the USER_SGPR address space, so we can 52f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // add the SGPRs as livein registers. 53f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::LOAD, MVT::i32, Custom); 54f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::LOAD, MVT::i64, Custom); 55f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 56f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 57f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 58f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 59f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 60f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setTargetDAGCombine(ISD::SELECT_CC); 61f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 62f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org setTargetDAGCombine(ISD::SETCC); 63f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 64f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 65f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgMachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( 66f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineInstr * MI, MachineBasicBlock * BB) const 67f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 68f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const TargetInstrInfo * TII = getTargetMachine().getInstrInfo(); 69f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); 70f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock::iterator I = MI; 71f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 72f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) { 73f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AppendS_WAITCNT(MI, *BB, llvm::next(I)); 74f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return BB; 75f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 76f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 77f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (MI->getOpcode()) { 78f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 79f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 80f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 81f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::CLAMP_SI: 82f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 83f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(0)) 84f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 85f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // VSRC1-2 are unused, but we still need to fill all the 86f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // operand slots, so we just reuse the VSRC0 operand 87f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 88f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 89f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // ABS 90f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(1) // CLAMP 91f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // OMOD 92f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0); // NEG 93f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 94f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 95f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 96f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::FABS_SI: 97f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 98f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(0)) 99f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 100f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // VSRC1-2 are unused, but we still need to fill all the 101f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // operand slots, so we just reuse the VSRC0 operand 102f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 103f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 104f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(1) // ABS 105f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // CLAMP 106f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // OMOD 107f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0); // NEG 108f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 109f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 110f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 111f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::FNEG_SI: 112f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64)) 113f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(0)) 114f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 115f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // VSRC1-2 are unused, but we still need to fill all the 116f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // operand slots, so we just reuse the VSRC0 operand 117f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 118f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 119f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // ABS 120f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // CLAMP 121f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) // OMOD 122f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(1); // NEG 123f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 124f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 125f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 126f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::SI_INTERP: 127f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LowerSI_INTERP(MI, *BB, I, MRI); 128f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 129f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::SI_INTERP_CONST: 130f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LowerSI_INTERP_CONST(MI, *BB, I, MRI); 131f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 132f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::SI_KIL: 133f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LowerSI_KIL(MI, *BB, I, MRI); 134f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 135f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPU::SI_V_CNDLT: 136f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LowerSI_V_CNDLT(MI, *BB, I, MRI); 137f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 138f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 139f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return BB; 140f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 141f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 142f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB, 143f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock::iterator I) const 144f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 145f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT)) 146f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0); 147f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 148f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 149f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB, 150f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 151f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 152f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 153f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); 154f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand dst = MI->getOperand(0); 155f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand iReg = MI->getOperand(1); 156f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand jReg = MI->getOperand(2); 157f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand attr_chan = MI->getOperand(3); 158f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand attr = MI->getOperand(4); 159f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand params = MI->getOperand(5); 160f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 161f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) 162f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(params); 163f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 164f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp) 165f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(iReg) 166f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(attr_chan) 167f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(attr) 168f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(M0); 169f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 170f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32)) 171f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(dst) 172f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(tmp) 173f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(jReg) 174f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(attr_chan) 175f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(attr) 176f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(M0); 177f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 178f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 179f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 180f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 181f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI, 182f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock &BB, MachineBasicBlock::iterator I, 183f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineRegisterInfo &MRI) const 184f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 185f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand dst = MI->getOperand(0); 186f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand attr_chan = MI->getOperand(1); 187f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand attr = MI->getOperand(2); 188f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineOperand params = MI->getOperand(3); 189f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass); 190f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 191f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0) 192f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(params); 193f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 194f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32)) 195f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(dst) 196f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(attr_chan) 197f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(attr) 198f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(M0); 199f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 200f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 201f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 202f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 203f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB, 204f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 205f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 206f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Clear this pixel from the exec mask if the operand is negative 207f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32), 208f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AMDGPU::VCC) 209f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::SREG_LIT_0) 210f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(0)); 211f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 212f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // If the exec mask is non-zero, skip the next two instructions 213f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ)) 214f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(3) 215f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::EXEC); 216f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 217f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Exec mask is zero: Export to NULL target... 218f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP)) 219f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) 220f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0x09) // V_008DFC_SQ_EXP_NULL 221f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(0) 222f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(1) 223f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addImm(1) 224f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::SREG_LIT_0) 225f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::SREG_LIT_0) 226f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::SREG_LIT_0) 227f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::SREG_LIT_0); 228f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 229f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // ... and terminate wavefront 230f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM)); 231f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 232f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 233f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 234f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 235f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgvoid SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB, 236f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const 237f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 238f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_LT_F32_e32), 239f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AMDGPU::VCC) 240f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(1)) 241f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::SREG_LIT_0); 242f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 243f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32)) 244f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(0)) 245f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addReg(AMDGPU::VCC) 246f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(2)) 247f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org .addOperand(MI->getOperand(3)); 248f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 249f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MI->eraseFromParent(); 250f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 251f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 252f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgEVT SITargetLowering::getSetCCResultType(EVT VT) const 253f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 254f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return MVT::i1; 255f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 256f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 257f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 258f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Custom DAG Lowering Operations 259f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 260f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 261f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 262f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 263f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (Op.getOpcode()) { 264f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 265f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::BR_CC: return LowerBR_CC(Op, DAG); 266f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::LOAD: return LowerLOAD(Op, DAG); 267f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 268f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND); 269f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::INTRINSIC_WO_CHAIN: { 270f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned IntrinsicID = 271f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 272f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org EVT VT = Op.getValueType(); 273f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (IntrinsicID) { 274f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case AMDGPUIntrinsic::SI_vs_load_buffer_index: 275f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, 276f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AMDGPU::VGPR0, VT); 277f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 278f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 279f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 280f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 281f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 282f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return SDValue(); 283f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 284f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 285f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// Loweri1ContextSwitch - The function is for lowering i1 operations on the 286f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// VCC register. In the VALU context, VCC is a one bit register, but in the 287f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only 288f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// the SALU can perform operations on the VCC register, we need to promote 289f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// the operand types from i1 to i64 in order for tablegen to be able to match 290f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// this operation to the correct SALU instruction. We do this promotion by 291f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// wrapping the operands in a CopyToReg node. 292f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org/// 293f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op, 294f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SelectionDAG &DAG, 295f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned VCCNode) const 296f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 297f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DebugLoc DL = Op.getDebugLoc(); 298f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 299f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64, 300f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, 301f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Op.getOperand(0)), 302f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64, 303f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Op.getOperand(1))); 304f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 305f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode); 306f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 307f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 308f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 309f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 310f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue Chain = Op.getOperand(0); 311f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue CC = Op.getOperand(1); 312f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue LHS = Op.getOperand(2); 313f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue RHS = Op.getOperand(3); 314f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue JumpT = Op.getOperand(4); 315f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue CmpValue; 316f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue Result; 317f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CmpValue = DAG.getNode( 318f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ISD::SETCC, 319f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Op.getDebugLoc(), 320f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MVT::i1, 321f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LHS, RHS, 322f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CC); 323f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 324f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org Result = DAG.getNode( 325f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org AMDGPUISD::BRANCH_COND, 326f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org CmpValue.getDebugLoc(), 327f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org MVT::Other, Chain, 328f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org JumpT, CmpValue); 329f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return Result; 330f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 331f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 332f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const 333f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 334f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org EVT VT = Op.getValueType(); 335f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op); 336f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 337f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(Ptr); 338f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 339f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace(); 340f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 341f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // We only need to lower USER_SGPR address space loads 342f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) { 343f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return SDValue(); 344f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 345f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 346f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // Loads from the USER_SGPR address space can only have constant value 347f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // pointers. 348f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr()); 349f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(BasePtr); 350f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 351f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned TypeDwordWidth = VT.getSizeInBits() / 32; 352f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org const TargetRegisterClass * dstClass; 353f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (TypeDwordWidth) { 354f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: 355f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(!"USER_SGPR value size not implemented"); 356f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return SDValue(); 357f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case 1: 358f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dstClass = &AMDGPU::SReg_32RegClass; 359f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 360f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case 2: 361f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org dstClass = &AMDGPU::SReg_64RegClass; 362f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 363f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 364f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org uint64_t Index = BasePtr->getZExtValue(); 365f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned"); 366f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned SGPRIndex = Index / TypeDwordWidth; 367f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org unsigned Reg = dstClass->getRegister(SGPRIndex); 368f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 369f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg, 370f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org VT)); 371f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return SDValue(); 372f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 373f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 374f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 375f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 376f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue LHS = Op.getOperand(0); 377f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue RHS = Op.getOperand(1); 378f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue True = Op.getOperand(2); 379f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue False = Op.getOperand(3); 380f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue CC = Op.getOperand(4); 381f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org EVT VT = Op.getValueType(); 382f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DebugLoc DL = Op.getDebugLoc(); 383f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 384f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); 385f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); 386f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 387f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 388f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 389f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org// Custom DAG optimizations 390f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org//===----------------------------------------------------------------------===// 391f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 392f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgSDValue SITargetLowering::PerformDAGCombine(SDNode *N, 393f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DAGCombinerInfo &DCI) const { 394f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SelectionDAG &DAG = DCI.DAG; 395f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DebugLoc DL = N->getDebugLoc(); 396f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org EVT VT = N->getValueType(0); 397f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 398f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (N->getOpcode()) { 399f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: break; 400f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::SELECT_CC: { 401f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org N->dump(); 402f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantSDNode *True, *False; 403f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) 404f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) 405f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) 406f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && True->isAllOnesValue() 407f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && False->isNullValue() 408f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && VT == MVT::i1) { 409f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), 410f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org N->getOperand(1), N->getOperand(4)); 411f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 412f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 413f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 414f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 415f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org case ISD::SETCC: { 416f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue Arg0 = N->getOperand(0); 417f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue Arg1 = N->getOperand(1); 418f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org SDValue CC = N->getOperand(2); 419f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ConstantSDNode * C = NULL; 420f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); 421f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 422f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) 423f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org if (VT == MVT::i1 424f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && Arg0.getOpcode() == ISD::SIGN_EXTEND 425f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && Arg0.getOperand(0).getValueType() == MVT::i1 426f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && (C = dyn_cast<ConstantSDNode>(Arg1)) 427f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && C->isNullValue() 428f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org && CCOp == ISD::SETNE) { 429f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return SimplifySetCC(VT, Arg0.getOperand(0), 430f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); 431f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 432f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org break; 433f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 434f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 435f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org return SDValue(); 436f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 437f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 438f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org#define NODE_NAME_CASE(node) case SIISD::node: return #node; 439f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org 440f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.orgconst char* SITargetLowering::getTargetNodeName(unsigned Opcode) const 441f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org{ 442f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org switch (Opcode) { 443f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org default: return AMDGPUTargetLowering::getTargetNodeName(Opcode); 444f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org NODE_NAME_CASE(VCC_AND) 445f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org NODE_NAME_CASE(VCC_BITCAST) 446f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org } 447f2ba7591b1407a7ee9209f842c50696914dc2dedkbr@chromium.org} 448