1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \brief SI Implementation of TargetInstrInfo. 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIInstrInfo.h" 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUTargetMachine.h" 17de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "GCNHazardRecognizer.h" 18df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard#include "SIDefines.h" 1936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "SIMachineFunctionInfo.h" 2037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineFrameInfo.h" 21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 22f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 23de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar#include "llvm/CodeGen/ScheduleDAG.h" 24c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines#include "llvm/IR/Function.h" 2537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/RegisterScavenging.h" 26f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/MC/MCInstrDesc.h" 2737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/Debug.h" 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 31de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarSIInstrInfo::SIInstrInfo(const SISubtarget &ST) 32de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar : AMDGPUInstrInfo(ST), RI(), ST(ST) {} 33f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 34b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard//===----------------------------------------------------------------------===// 35b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard// TargetInstrInfo callbacks 36b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard//===----------------------------------------------------------------------===// 37b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 3837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic unsigned getNumOperandsNoGlue(SDNode *Node) { 3937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned N = Node->getNumOperands(); 4037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) 4137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --N; 4237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return N; 4337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 4437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic SDValue findChainOperand(SDNode *Load) { 4637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SDValue LastOp = Load->getOperand(getNumOperandsNoGlue(Load) - 1); 4737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(LastOp.getValueType() == MVT::Other && "Chain missing from load node"); 4837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return LastOp; 4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 5037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 5137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// \brief Returns true if both nodes have the same value for the given 5237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// operand \p Op, or if both nodes do not have this operand. 5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) { 5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Opc0 = N0->getMachineOpcode(); 5537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Opc1 = N1->getMachineOpcode(); 5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Op0Idx = AMDGPU::getNamedOperandIdx(Opc0, OpName); 5837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Op1Idx = AMDGPU::getNamedOperandIdx(Opc1, OpName); 5937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 6037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Op0Idx == -1 && Op1Idx == -1) 6137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 6237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 6337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 6437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if ((Op0Idx == -1 && Op1Idx != -1) || 6537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines (Op1Idx == -1 && Op0Idx != -1)) 6637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 6737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 6837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // getNamedOperandIdx returns the index for the MachineInstr's operands, 6937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // which includes the result as the first operand. We are indexing into the 7037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // MachineSDNode's operands, so we need to skip the result operand to get 7137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // the real index. 7237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --Op0Idx; 7337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --Op1Idx; 7437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 7537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return N0->getOperand(Op0Idx) == N1->getOperand(Op1Idx); 7637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 7737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 78de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI, 796948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar AliasAnalysis *AA) const { 806948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // TODO: The generic check fails for VALU instructions that should be 816948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // rematerializable due to implicit reads of exec. We really want all of the 826948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar // generic logic for this except for this. 83de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (MI.getOpcode()) { 846948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar case AMDGPU::V_MOV_B32_e32: 856948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar case AMDGPU::V_MOV_B32_e64: 86f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::V_MOV_B64_PSEUDO: 876948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return true; 886948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar default: 896948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar return false; 906948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar } 916948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar} 926948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 9337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, 9437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int64_t &Offset0, 9537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int64_t &Offset1) const { 9637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Load0->isMachineOpcode() || !Load1->isMachineOpcode()) 9737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 9837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 9937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Opc0 = Load0->getMachineOpcode(); 10037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Opc1 = Load1->getMachineOpcode(); 10137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 10237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Make sure both are actually loads. 10337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!get(Opc0).mayLoad() || !get(Opc1).mayLoad()) 10437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 10537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 10637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isDS(Opc0) && isDS(Opc1)) { 10737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 10837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // FIXME: Handle this case: 10937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (getNumOperandsNoGlue(Load0) != getNumOperandsNoGlue(Load1)) 11037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 11137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 11237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check base reg. 11337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Load0->getOperand(1) != Load1->getOperand(1)) 11437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 11537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 11637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check chain. 11737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (findChainOperand(Load0) != findChainOperand(Load1)) 11837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 11937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 12037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Skip read2 / write2 variants for simplicity. 12137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // TODO: We should report true if the used offsets are adjacent (excluded 12237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // st64 versions). 12337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 || 12437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1) 12537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 12637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 12737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue(); 12837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue(); 12937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 13037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 13137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 13237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isSMRD(Opc0) && isSMRD(Opc1)) { 13337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); 13437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 13537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check base reg. 13637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Load0->getOperand(0) != Load1->getOperand(0)) 13737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 13837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1394c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const ConstantSDNode *Load0Offset = 1404c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar dyn_cast<ConstantSDNode>(Load0->getOperand(1)); 1414c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const ConstantSDNode *Load1Offset = 1424c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar dyn_cast<ConstantSDNode>(Load1->getOperand(1)); 1434c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 1444c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar if (!Load0Offset || !Load1Offset) 1454c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar return false; 1464c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 14737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check chain. 14837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (findChainOperand(Load0) != findChainOperand(Load1)) 14937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 15037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1514c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Offset0 = Load0Offset->getZExtValue(); 1524c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar Offset1 = Load1Offset->getZExtValue(); 15337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 15437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 15537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 15637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // MUBUF and MTBUF can access the same addresses. 15737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1))) { 15837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 15937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // MUBUF and MTBUF have vaddr at different indices. 16037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::soffset) || 16137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines findChainOperand(Load0) != findChainOperand(Load1) || 16237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::vaddr) || 16337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines !nodesHaveSameOperandValue(Load0, Load1, AMDGPU::OpName::srsrc)) 16437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 16537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 16637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int OffIdx0 = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset); 16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int OffIdx1 = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset); 16837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 16937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (OffIdx0 == -1 || OffIdx1 == -1) 17037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 17137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 17237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // getNamedOperandIdx returns the index for MachineInstrs. Since they 17337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // inlcude the output in the operand list, but SDNodes don't, we need to 17437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // subtract the index by one. 17537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --OffIdx0; 17637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines --OffIdx1; 17737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 17837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SDValue Off0 = Load0->getOperand(OffIdx0); 17937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SDValue Off1 = Load1->getOperand(OffIdx1); 18037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 18137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The offset might be a FrameIndexSDNode. 18237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!isa<ConstantSDNode>(Off0) || !isa<ConstantSDNode>(Off1)) 18337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 18437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 18537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset0 = cast<ConstantSDNode>(Off0)->getZExtValue(); 18637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset1 = cast<ConstantSDNode>(Off1)->getZExtValue(); 18737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 18837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 18937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 19037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 19137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 19237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 19337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool isStride64(unsigned Opc) { 19437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines switch (Opc) { 19537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::DS_READ2ST64_B32: 19637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::DS_READ2ST64_B64: 19737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::DS_WRITE2ST64_B32: 19837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::DS_WRITE2ST64_B64: 19937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 20037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines default: 20137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 20237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 20437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 205de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, 206de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int64_t &Offset, 2076948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar const TargetRegisterInfo *TRI) const { 208de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = LdSt.getOpcode(); 209f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 210de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isDS(LdSt)) { 211de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *OffsetImm = 212de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::offset); 21337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (OffsetImm) { 21437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Normal, single offset LDS instruction. 215de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *AddrReg = 216de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::addr); 21737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 21837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BaseReg = AddrReg->getReg(); 21937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset = OffsetImm->getImm(); 22037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 22137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 22237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 22337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The 2 offset instructions use offset0 and offset1 instead. We can treat 22437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // these as a load with a single offset if the 2 offsets are consecutive. We 22537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // will use this for some partially aligned loads. 226de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Offset0Imm = 227de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::offset0); 228de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Offset1Imm = 229de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::offset1); 23037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 23137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines uint8_t Offset0 = Offset0Imm->getImm(); 23237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines uint8_t Offset1 = Offset1Imm->getImm(); 23337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 234f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Offset1 > Offset0 && Offset1 - Offset0 == 1) { 23537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Each of these offsets is in element sized units, so we need to convert 23637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // to bytes of the individual reads. 23737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 23837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned EltSize; 239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (LdSt.mayLoad()) 240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar EltSize = getOpRegClass(LdSt, 0)->getSize() / 2; 24137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines else { 242de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(LdSt.mayStore()); 24337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Data0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data0); 244de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar EltSize = getOpRegClass(LdSt, Data0Idx)->getSize(); 24537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 24637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 24737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (isStride64(Opc)) 24837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines EltSize *= 64; 24937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 250de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *AddrReg = 251de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::addr); 25237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BaseReg = AddrReg->getReg(); 25337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset = EltSize * Offset0; 25437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 25537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 25637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 25737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 25837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 25937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isMUBUF(LdSt) || isMTBUF(LdSt)) { 26137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1) 26237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 26337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 264de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *AddrReg = 265de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::vaddr); 26637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!AddrReg) 26737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 26837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 269de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *OffsetImm = 270de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::offset); 27137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BaseReg = AddrReg->getReg(); 27237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset = OffsetImm->getImm(); 27337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 27437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 27537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 276de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isSMRD(LdSt)) { 277de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *OffsetImm = 278de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::offset); 27937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!OffsetImm) 28037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 28137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 282de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *SBaseReg = 283de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(LdSt, AMDGPU::OpName::sbase); 28437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BaseReg = SBaseReg->getReg(); 28537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Offset = OffsetImm->getImm(); 28637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 28737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 28837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 289de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isFLAT(LdSt)) { 290de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr); 291de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BaseReg = AddrReg->getReg(); 292de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Offset = 0; 293de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 294de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 295de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 29637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 29737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 29837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 299de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, 300de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &SecondLdSt, 301de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned NumLoads) const { 302de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *FirstDst = nullptr; 303de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *SecondDst = nullptr; 304de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 305de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isDS(FirstLdSt) && isDS(SecondLdSt)) { 306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst); 307de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst); 308de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 309de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 310de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) { 311de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst); 312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst); 313de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 314de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 315de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) || 316de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) { 317de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata); 318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata); 319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 321de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!FirstDst || !SecondDst) 32237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 32337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 324de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Try to limit clustering based on the total number of bytes loaded 325de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // rather than the number of instructions. This is done to help reduce 326de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // register pressure. The method used is somewhat inexact, though, 327de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // because it assumes that all loads in the cluster will load the 328de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // same number of bytes as FirstLdSt. 32937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 330de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // The unit of this value is bytes. 331de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // FIXME: This needs finer tuning. 332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned LoadClusterThreshold = 16; 33337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 334de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineRegisterInfo &MRI = 335de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FirstLdSt.getParent()->getParent()->getRegInfo(); 336de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const TargetRegisterClass *DstRC = MRI.getRegClass(FirstDst->getReg()); 33737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 338de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return (NumLoads * DstRC->getSize()) <= LoadClusterThreshold; 33937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 34037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 341de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, 342de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock::iterator MI, 343de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const DebugLoc &DL, unsigned DestReg, 344de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SrcReg, bool KillSrc) const { 34566501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 346f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // If we are trying to copy to or from SCC, there is a bug somewhere else in 347f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // the backend. While it may be theoretically possible to do this, it should 348f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // never be necessary. 349f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC); 350f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 351787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper static const int16_t Sub0_15[] = { 35266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 35366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 35466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, 355f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 356f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar }; 357f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 358f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar static const int16_t Sub0_15_64[] = { 359f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 360f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 361f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, 362f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, 36366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig }; 36466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 365787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper static const int16_t Sub0_7[] = { 36666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 367f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 368f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar }; 369f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 370f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar static const int16_t Sub0_7_64[] = { 371f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 372f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, 37366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig }; 37466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 375787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper static const int16_t Sub0_3[] = { 376f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 377f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar }; 378f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 379f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar static const int16_t Sub0_3_64[] = { 380f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, 38166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig }; 38266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 383787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper static const int16_t Sub0_2[] = { 384f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 3854d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig }; 3864d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 387787e71df693e94cc512f3e439bf91609a8ec9baeCraig Topper static const int16_t Sub0_1[] = { 388f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::sub0, AMDGPU::sub1, 38966501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig }; 39066501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 39166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig unsigned Opcode; 392f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ArrayRef<int16_t> SubIndices; 393f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool Forward; 39466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 39566501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig if (AMDGPU::SReg_32RegClass.contains(DestReg)) { 39666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); 39766501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) 39866501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig .addReg(SrcReg, getKillRegState(KillSrc)); 39966501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig return; 40066501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 40160fc58262f4dba20c1ea5ede63e5a2c322489d32Tom Stellard } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { 402ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DestReg == AMDGPU::VCC) { 403ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { 404ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) 405ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines .addReg(SrcReg, getKillRegState(KillSrc)); 406ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else { 407ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: Hack until VReg_1 removed. 408ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(AMDGPU::VGPR_32RegClass.contains(SrcReg)); 409f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32)) 410ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines .addImm(0) 411ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines .addReg(SrcReg, getKillRegState(KillSrc)); 412ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 413ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 414ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return; 415ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 416ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 417f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard assert(AMDGPU::SReg_64RegClass.contains(SrcReg)); 418f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) 419f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard .addReg(SrcReg, getKillRegState(KillSrc)); 42066501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig return; 42166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 42266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { 42366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); 424f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Opcode = AMDGPU::S_MOV_B64; 425f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SubIndices = Sub0_3_64; 42666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 42766501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { 42866501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); 429f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Opcode = AMDGPU::S_MOV_B64; 430f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SubIndices = Sub0_7_64; 43166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 43266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { 43366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); 434f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Opcode = AMDGPU::S_MOV_B64; 435f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SubIndices = Sub0_15_64; 43666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 437ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { 438ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || 439661bd3df7518a3d984dada66473602a0401618baNAKAMURA Takumi AMDGPU::SReg_32RegClass.contains(SrcReg)); 440f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) 441f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard .addReg(SrcReg, getKillRegState(KillSrc)); 44266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig return; 44366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 44466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { 44566501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || 446661bd3df7518a3d984dada66473602a0401618baNAKAMURA Takumi AMDGPU::SReg_64RegClass.contains(SrcReg)); 44766501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig Opcode = AMDGPU::V_MOV_B32_e32; 44866501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig SubIndices = Sub0_1; 44966501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 4504d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { 4514d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); 4524d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig Opcode = AMDGPU::V_MOV_B32_e32; 4534d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig SubIndices = Sub0_2; 4544d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 45566501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { 45666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || 457661bd3df7518a3d984dada66473602a0401618baNAKAMURA Takumi AMDGPU::SReg_128RegClass.contains(SrcReg)); 45866501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig Opcode = AMDGPU::V_MOV_B32_e32; 45966501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig SubIndices = Sub0_3; 46066501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 46166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { 46266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || 463661bd3df7518a3d984dada66473602a0401618baNAKAMURA Takumi AMDGPU::SReg_256RegClass.contains(SrcReg)); 46466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig Opcode = AMDGPU::V_MOV_B32_e32; 46566501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig SubIndices = Sub0_7; 46666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 46766501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { 46866501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || 469661bd3df7518a3d984dada66473602a0401618baNAKAMURA Takumi AMDGPU::SReg_512RegClass.contains(SrcReg)); 47066501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig Opcode = AMDGPU::V_MOV_B32_e32; 47166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig SubIndices = Sub0_15; 47266501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 473f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } else { 47466501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig llvm_unreachable("Can't copy register!"); 47566501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig } 47666501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 477f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg)) 478f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Forward = true; 479f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar else 480f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Forward = false; 481f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 482f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { 483f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned SubIdx; 484f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Forward) 485f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SubIdx = SubIndices[Idx]; 486f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar else 487f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SubIdx = SubIndices[SubIndices.size() - Idx - 1]; 488f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 48966501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, 49066501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig get(Opcode), RI.getSubReg(DestReg, SubIdx)); 49166501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 492f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Builder.addReg(RI.getSubReg(SrcReg, SubIdx)); 49366501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig 494f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Idx == SubIndices.size() - 1) 495de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Builder.addReg(SrcReg, getKillRegState(KillSrc) | RegState::Implicit); 496f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 497f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Idx == 0) 49866501123d1b7b0395a9de091bf72b2cd42a04dc6Christian Konig Builder.addReg(DestReg, RegState::Define | RegState::Implicit); 499f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 500f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 501f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 502f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarint SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { 5034c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const unsigned Opcode = MI.getOpcode(); 5044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 505e49230895d9c666b84beaa748259fbf1f6715122Christian Konig int NewOpc; 506e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 507e49230895d9c666b84beaa748259fbf1f6715122Christian Konig // Try to map original to commuted opcode 508ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpc = AMDGPU::getCommuteRev(Opcode); 509f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (NewOpc != -1) 510f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Check if the commuted (REV) opcode exists on the target. 511f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 512e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 513e49230895d9c666b84beaa748259fbf1f6715122Christian Konig // Try to map commuted to original opcode 514ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpc = AMDGPU::getCommuteOrig(Opcode); 515f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (NewOpc != -1) 516f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Check if the original (non-REV) opcode exists on the target. 517f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return pseudoToMCOpcode(NewOpc) != -1 ? NewOpc : -1; 518e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 519e49230895d9c666b84beaa748259fbf1f6715122Christian Konig return Opcode; 520e49230895d9c666b84beaa748259fbf1f6715122Christian Konig} 521e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 522ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesunsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const { 52337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 524ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DstRC->getSize() == 4) { 525ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32; 526ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else if (DstRC->getSize() == 8 && RI.isSGPRClass(DstRC)) { 527ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AMDGPU::S_MOV_B64; 528ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else if (DstRC->getSize() == 8 && !RI.isSGPRClass(DstRC)) { 529ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AMDGPU::V_MOV_B64_PSEUDO; 530ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 531ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return AMDGPU::COPY; 53237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 53337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 534f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic unsigned getSGPRSpillSaveOpcode(unsigned Size) { 535f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar switch (Size) { 536f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 4: 537f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S32_SAVE; 538f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 8: 539f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S64_SAVE; 540f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 16: 541f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S128_SAVE; 542f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 32: 543f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S256_SAVE; 544f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 64: 545f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S512_SAVE; 546f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar default: 547f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar llvm_unreachable("unknown register size"); 548f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 549f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 550f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 551f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic unsigned getVGPRSpillSaveOpcode(unsigned Size) { 552f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar switch (Size) { 553f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 4: 554f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V32_SAVE; 555f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 8: 556f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V64_SAVE; 557de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case 12: 558de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::SI_SPILL_V96_SAVE; 559f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 16: 560f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V128_SAVE; 561f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 32: 562f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V256_SAVE; 563f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 64: 564f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V512_SAVE; 565f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar default: 566f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar llvm_unreachable("unknown register size"); 567f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 568f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 569f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 57036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 57136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MI, 57236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SrcReg, bool isKill, 57336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines int FrameIndex, 57436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *RC, 57536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterInfo *TRI) const { 576c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineFunction *MF = MBB.getParent(); 577ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 57837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 57936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DebugLoc DL = MBB.findDebugLoc(MI); 580f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 581f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Size = FrameInfo->getObjectSize(FrameIndex); 582f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 583f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachinePointerInfo PtrInfo 584f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 585f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineMemOperand *MMO 586f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, 587f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Size, Align); 58836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 58937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (RI.isSGPRClass(RC)) { 590f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MFI->setHasSpilledSGPRs(); 591f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) { 593de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // m0 may not be allowed for readlane. 594de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineRegisterInfo &MRI = MF->getRegInfo(); 595de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass); 596de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 597de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 598dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We are only allowed to create one new instruction when spilling 59937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // registers, so we need to use pseudo instruction for spilling 60037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // SGPRs. 601f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize()); 602f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(Opcode)) 603de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SrcReg, getKillRegState(isKill)) // src 604f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addFrameIndex(FrameIndex) // frame_idx 605f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addMemOperand(MMO); 606f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 607f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 60837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 609dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 610de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 61137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LLVMContext &Ctx = MF->getFunction()->getContext(); 61237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to" 61337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines " spill register"); 614ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::KILL)) 615f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(SrcReg); 616f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 617f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 618f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 619f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 620f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 621f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 622f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); 623f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MFI->setHasSpilledVGPRs(); 624f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(Opcode)) 625de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SrcReg, getKillRegState(isKill)) // src 626f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addFrameIndex(FrameIndex) // frame_idx 627f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 628f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 629de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) // offset 630f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addMemOperand(MMO); 631f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 632f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 633f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic unsigned getSGPRSpillRestoreOpcode(unsigned Size) { 634f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar switch (Size) { 635f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 4: 636f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S32_RESTORE; 637f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 8: 638f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S64_RESTORE; 639f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 16: 640f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S128_RESTORE; 641f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 32: 642f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S256_RESTORE; 643f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 64: 644f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_S512_RESTORE; 645f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar default: 646f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar llvm_unreachable("unknown register size"); 647f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 648f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 649f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 650f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic unsigned getVGPRSpillRestoreOpcode(unsigned Size) { 651f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar switch (Size) { 652f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 4: 653f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V32_RESTORE; 654f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 8: 655f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V64_RESTORE; 656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case 12: 657de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::SI_SPILL_V96_RESTORE; 658f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 16: 659f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V128_RESTORE; 660f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 32: 661f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V256_RESTORE; 662f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case 64: 663f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::SI_SPILL_V512_RESTORE; 664f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar default: 665f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar llvm_unreachable("unknown register size"); 66636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 66736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 66836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 66936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesvoid SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 67036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MI, 67136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned DestReg, int FrameIndex, 67236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *RC, 67336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterInfo *TRI) const { 674c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineFunction *MF = MBB.getParent(); 675ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 67637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineFrameInfo *FrameInfo = MF->getFrameInfo(); 67736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DebugLoc DL = MBB.findDebugLoc(MI); 678f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Align = FrameInfo->getObjectAlignment(FrameIndex); 679f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Size = FrameInfo->getObjectSize(FrameIndex); 680dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 681f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachinePointerInfo PtrInfo 682f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar = MachinePointerInfo::getFixedStack(*MF, FrameIndex); 683f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 684f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineMemOperand *MMO = MF->getMachineMemOperand( 685f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar PtrInfo, MachineMemOperand::MOLoad, Size, Align); 686f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 687f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (RI.isSGPRClass(RC)) { 688f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // FIXME: Maybe this should not include a memoperand because it will be 689f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // lowered to non-memory instructions. 690f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize()); 691de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 692de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) { 693de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // m0 may not be allowed for readlane. 694de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineRegisterInfo &MRI = MF->getRegInfo(); 695de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass); 696de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 697de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 698dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildMI(MBB, MI, DL, get(Opcode), DestReg) 699f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addFrameIndex(FrameIndex) // frame_idx 700f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addMemOperand(MMO); 701ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 702f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 703f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 704f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 705de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!ST.isVGPRSpillingEnabled(*MF->getFunction())) { 70637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LLVMContext &Ctx = MF->getFunction()->getContext(); 70737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to" 70837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines " restore register"); 709ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg); 710f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 711f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 712dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 713f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 714f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); 715f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 716f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); 717f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(Opcode), DestReg) 718f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addFrameIndex(FrameIndex) // frame_idx 719f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc 720f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset 721de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) // offset 722f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addMemOperand(MMO); 723dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 724dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 72537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines/// \param @Offset Offset in bytes of the FrameIndex being spilled 726de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::calculateLDSSpillAddress( 727de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB, MachineInstr &MI, RegScavenger *RS, unsigned TmpReg, 728de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned FrameOffset, unsigned Size) const { 72937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineFunction *MF = MBB.getParent(); 73037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); 731de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); 732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const SIRegisterInfo *TRI = ST.getRegisterInfo(); 73337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DebugLoc DL = MBB.findDebugLoc(MI); 73437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF); 73537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned WavefrontSize = ST.getWavefrontSize(); 73637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 73737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned TIDReg = MFI->getTIDReg(); 73837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!MFI->hasCalculatedTID()) { 73937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineBasicBlock &Entry = MBB.getParent()->front(); 74037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineBasicBlock::iterator Insert = Entry.front(); 74137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DebugLoc DL = Insert->getDebugLoc(); 74237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 743ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass); 74437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (TIDReg == AMDGPU::NoRegister) 74537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return TIDReg; 74637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 747de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!AMDGPU::isShader(MF->getFunction()->getCallingConv()) && 74837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines WorkGroupSize > WavefrontSize) { 74937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 750f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned TIDIGXReg 751f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X); 752f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned TIDIGYReg 753f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y); 754f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned TIDIGZReg 755f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z); 75637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned InputPtrReg = 757f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); 7584c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) { 75937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Entry.isLiveIn(Reg)) 76037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Entry.addLiveIn(Reg); 76137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 76237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 763de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar RS->enterBasicBlock(Entry); 764f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // FIXME: Can we scavenge an SReg_64 and access the subregs? 76537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 76637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); 76737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0) 76837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(InputPtrReg) 76937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(SI::KernelInputOffsets::NGROUPS_Z); 77037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp1) 77137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(InputPtrReg) 77237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(SI::KernelInputOffsets::NGROUPS_Y); 77337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 77437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // NGROUPS.X * NGROUPS.Y 77537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::S_MUL_I32), STmp1) 77637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(STmp1) 77737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(STmp0); 77837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // (NGROUPS.X * NGROUPS.Y) * TIDIG.X 77937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::V_MUL_U32_U24_e32), TIDReg) 78037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(STmp1) 78137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDIGXReg); 78237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X) 78337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::V_MAD_U32_U24), TIDReg) 78437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(STmp0) 78537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDIGYReg) 78637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDReg); 78737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // (NGROUPS.Z * TIDIG.Y + (NGROUPS.X * NGROPUS.Y * TIDIG.X)) + TIDIG.Z 78837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::V_ADD_I32_e32), TIDReg) 78937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDReg) 79037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDIGZReg); 79137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 79237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Get the wave id 79337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_LO_U32_B32_e64), 79437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TIDReg) 79537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(-1) 79637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(0); 79737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 798ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::V_MBCNT_HI_U32_B32_e64), 79937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TIDReg) 80037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(-1) 80137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDReg); 80237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 80337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 80437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(Entry, Insert, DL, get(AMDGPU::V_LSHLREV_B32_e32), 80537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines TIDReg) 80637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(2) 80737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDReg); 80837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MFI->setTIDReg(TIDReg); 809dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 81037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Add FrameIndex to LDS offset 81237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize); 81337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg) 81437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(LDSOffset) 81537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TIDReg); 81637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 81737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return TmpReg; 818dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 819dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 820de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB, 821de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock::iterator MI, 822f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int Count) const { 823de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = MBB.findDebugLoc(MI); 824dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines while (Count > 0) { 825dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int Arg; 826dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Count >= 8) 827dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Arg = 7; 828dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines else 829dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Arg = Count - 1; 830dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Count -= 8; 831de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, DL, get(AMDGPU::S_NOP)) 832dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(Arg); 833dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 834dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 835dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 836de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::insertNoop(MachineBasicBlock &MBB, 837de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock::iterator MI) const { 838de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar insertWaitStates(MBB, MI, 1); 839de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 840de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 841de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const { 842de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (MI.getOpcode()) { 843de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar default: return 1; // FIXME: Do wait states equal cycles? 844de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 845de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_NOP: 846de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return MI.getOperand(0).getImm() + 1; 847de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 848de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 849de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 850de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { 851de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *MI.getParent(); 852dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DebugLoc DL = MBB.findDebugLoc(MI); 853de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (MI.getOpcode()) { 854dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); 855dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 856ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::V_MOV_B64_PSEUDO: { 857de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Dst = MI.getOperand(0).getReg(); 858ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 859ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 860ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 861de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &SrcOp = MI.getOperand(1); 862ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: Will this work for 64-bit floating point immediates? 863ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(!SrcOp.isFPImm()); 864ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (SrcOp.isImm()) { 865ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines APInt Imm(64, SrcOp.getImm()); 866ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 867de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(Imm.getLoBits(32).getZExtValue()) 868de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Dst, RegState::Implicit | RegState::Define); 869ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 870de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(Imm.getHiBits(32).getZExtValue()) 871de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Dst, RegState::Implicit | RegState::Define); 872ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } else { 873ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(SrcOp.isReg()); 874ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstLo) 875de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub0)) 876de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Dst, RegState::Implicit | RegState::Define); 877ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DstHi) 878de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RI.getSubReg(SrcOp.getReg(), AMDGPU::sub1)) 879de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Dst, RegState::Implicit | RegState::Define); 880ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 881de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.eraseFromParent(); 882ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 883ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 8844c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 8854c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AMDGPU::V_CNDMASK_B64_PSEUDO: { 886de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Dst = MI.getOperand(0).getReg(); 8874c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0); 8884c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1); 889de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Src0 = MI.getOperand(1).getReg(); 890de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Src1 = MI.getOperand(2).getReg(); 891de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &SrcCond = MI.getOperand(3); 8924c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar 8934c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo) 894de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RI.getSubReg(Src0, AMDGPU::sub0)) 895de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RI.getSubReg(Src1, AMDGPU::sub0)) 896de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SrcCond.getReg()) 897de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Dst, RegState::Implicit | RegState::Define); 8984c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi) 899de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RI.getSubReg(Src0, AMDGPU::sub1)) 900de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RI.getSubReg(Src1, AMDGPU::sub1)) 901de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill())) 902de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Dst, RegState::Implicit | RegState::Define); 903de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.eraseFromParent(); 9044c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar break; 9054c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar } 906f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 907de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::SI_PC_ADD_REL_OFFSET: { 908de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const SIRegisterInfo *TRI 909de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo()); 910f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineFunction &MF = *MBB.getParent(); 911de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Reg = MI.getOperand(0).getReg(); 912f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); 913f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); 914f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 915f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Create a bundle so these instructions won't be re-ordered by the 916f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // post-RA scheduler. 917f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MIBundleBuilder Bundler(MBB, MI); 918f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); 919f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 920f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Add 32-bit offset from this instruction to the start of the 921f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // constant data. 922f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) 923de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(RegLo) 924de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(MI.getOperand(1))); 925f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) 926f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(RegHi) 927f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addImm(0)); 928f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 929f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar llvm::finalizeBundle(MBB, Bundler.begin()); 930f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 931de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.eraseFromParent(); 932f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar break; 933f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 93436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 935dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return true; 93636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 93736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 938f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Commutes the operands in the given instruction. 939f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// The commutable operands are specified by their indices OpIdx0 and OpIdx1. 940f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// 941f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Do not call this method for a non-commutable instruction or for 942f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// non-commutable pair of operand indices OpIdx0 and OpIdx1. 943f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// Even though the instruction is commutable, the method may still 944f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar/// fail to commute the operands, null pointer is returned in such cases. 945de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarMachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, 946f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned OpIdx0, 947f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned OpIdx1) const { 948de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int CommutedOpcode = commuteOpcode(MI); 949f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (CommutedOpcode == -1) 95037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return nullptr; 951b3d1eaded7d7a874bbda2b0d322df7389c724bfcChristian Konig 952de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int Src0Idx = 953de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 954de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src0 = MI.getOperand(Src0Idx); 95537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Src0.isReg()) 95637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return nullptr; 95737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 958de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int Src1Idx = 959de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); 960f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 961f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if ((OpIdx0 != static_cast<unsigned>(Src0Idx) || 962f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar OpIdx1 != static_cast<unsigned>(Src1Idx)) && 963f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar (OpIdx0 != static_cast<unsigned>(Src1Idx) || 964f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar OpIdx1 != static_cast<unsigned>(Src0Idx))) 965dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 966b3d1eaded7d7a874bbda2b0d322df7389c724bfcChristian Konig 967de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src1 = MI.getOperand(Src1Idx); 96837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 969de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isVOP2(MI) || isVOPC(MI)) { 970de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MCInstrDesc &InstrDesc = MI.getDesc(); 971de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // For VOP2 and VOPC instructions, any operand type is valid to use for 972de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // src0. Make sure we can use the src0 as src1. 973f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // 974f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We could be stricter here and only allow commuting if there is a reason 975f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // to do so. i.e. if both operands are VGPRs there is no real benefit, 976f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // although MachineCSE attempts to find matches by commuting. 977de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 978f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) 979f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return nullptr; 980ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 981b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 982de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr *CommutedMI = &MI; 98337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Src1.isReg()) { 984ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Allow commuting instructions with Imm operands. 985de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) { 986dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 987b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 98837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Be sure to copy the source modifiers to the right place. 989de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MachineOperand *Src0Mods = 990de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) { 991de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *Src1Mods = 992de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); 993b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 99437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Src0ModsVal = Src0Mods->getImm(); 99537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!Src1Mods && Src0ModsVal != 0) 99637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return nullptr; 99737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 99837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // XXX - This assert might be a lie. It might be useful to have a neg 99937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // modifier with 0.0. 100037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Src1ModsVal = Src1Mods->getImm(); 100137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); 100237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 100337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Src1Mods->setImm(Src0ModsVal); 100437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Src0Mods->setImm(Src1ModsVal); 100537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 100637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 100737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Reg = Src0.getReg(); 100837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned SubReg = Src0.getSubReg(); 100937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Src1.isImm()) 101037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Src0.ChangeToImmediate(Src1.getImm()); 101137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines else 101237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines llvm_unreachable("Should only have immediates"); 101337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 101437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Src1.ChangeToRegister(Reg, false); 101537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Src1.setSubReg(SubReg); 1016b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } else { 1017de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CommutedMI = 1018de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); 1019b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 1020e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 1021de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (CommutedMI) 1022de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar CommutedMI->setDesc(get(CommutedOpcode)); 1023e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 1024de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return CommutedMI; 1025b3d1eaded7d7a874bbda2b0d322df7389c724bfcChristian Konig} 1026b3d1eaded7d7a874bbda2b0d322df7389c724bfcChristian Konig 102737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This needs to be implemented because the source modifiers may be inserted 102837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// between the true commutable operands, and the base 102937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// TargetInstrInfo::commuteInstruction uses it. 1030de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, 1031f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned &SrcOpIdx1) const { 1032de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MCInstrDesc &MCID = MI.getDesc(); 103337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!MCID.isCommutable()) 103437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 103537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1036de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = MI.getOpcode(); 103737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 103837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Src0Idx == -1) 103937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 104037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 104137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on 1042f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // immediate. Also, immediate src0 operand is not handled in 1043f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // SIInstrInfo::commuteInstruction(); 1044de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MI.getOperand(Src0Idx).isReg()) 104537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 104637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 104737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 104837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Src1Idx == -1) 104937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 105037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1051de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src1 = MI.getOperand(Src1Idx); 1052f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Src1.isImm()) { 1053f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // SIInstrInfo::commuteInstruction() does support commuting the immediate 1054f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // operand src1 in 2 and 3 operand instructions. 1055de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode())) 1056f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 1057f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else if (Src1.isReg()) { 1058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If any source modifiers are set, the generic instruction commuting won't 1059f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // understand how to copy the source modifiers. 1060de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || 1061de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)) 1062f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 1063f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else 106437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 106537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1066f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); 106737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 106837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1069de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { 1070de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (Cond) { 1071de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case SIInstrInfo::SCC_TRUE: 1072de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::S_CBRANCH_SCC1; 1073de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case SIInstrInfo::SCC_FALSE: 1074de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::S_CBRANCH_SCC0; 1075de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case SIInstrInfo::VCCNZ: 1076de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::S_CBRANCH_VCCNZ; 1077de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case SIInstrInfo::VCCZ: 1078de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::S_CBRANCH_VCCZ; 1079de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case SIInstrInfo::EXECNZ: 1080de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::S_CBRANCH_EXECNZ; 1081de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case SIInstrInfo::EXECZ: 1082de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return AMDGPU::S_CBRANCH_EXECZ; 1083de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar default: 1084de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar llvm_unreachable("invalid branch predicate"); 1085de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1086de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1087de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1088de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarSIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) { 1089de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (Opcode) { 1090de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_SCC0: 1091de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return SCC_FALSE; 1092de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_SCC1: 1093de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return SCC_TRUE; 1094de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_VCCNZ: 1095de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return VCCNZ; 1096de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_VCCZ: 1097de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return VCCZ; 1098de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_EXECNZ: 1099de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return EXECNZ; 1100de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_EXECZ: 1101de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return EXECZ; 1102de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar default: 1103de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return INVALID_BR; 1104de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 110504c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard} 110604c559569f87d755c3f2828a765f5eb7308e6753Tom Stellard 1107de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, 1108de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *&FBB, 1109de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<MachineOperand> &Cond, 1110de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool AllowModify) const { 1111de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1112de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1113de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (I == MBB.end()) 1114de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1115de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1116de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (I->getOpcode() == AMDGPU::S_BRANCH) { 1117de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Unconditional Branch 1118de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TBB = I->getOperand(0).getMBB(); 1119de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1120de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1121de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1122de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BranchPredicate Pred = getBranchPredicate(I->getOpcode()); 1123de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Pred == INVALID_BR) 1124f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return true; 1125de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1126de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *CondBB = I->getOperand(0).getMBB(); 1127de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Cond.push_back(MachineOperand::CreateImm(Pred)); 1128de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1129de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ++I; 1130de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1131de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (I == MBB.end()) { 1132de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Conditional branch followed by fall-through. 1133de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TBB = CondBB; 1134de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1135f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 1136de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1137de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (I->getOpcode() == AMDGPU::S_BRANCH) { 1138de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TBB = CondBB; 1139de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar FBB = I->getOperand(0).getMBB(); 1140de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1141de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1142de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1143de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 1144de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1145de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1146de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1147de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock::iterator I = MBB.getFirstTerminator(); 1148de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1149de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Count = 0; 1150de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar while (I != MBB.end()) { 1151de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock::iterator Next = std::next(I); 1152de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar I->eraseFromParent(); 1153de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ++Count; 1154de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar I = Next; 1155de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1156de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1157de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return Count; 1158de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1159de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1160de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB, 1161de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *TBB, 1162de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *FBB, 1163de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ArrayRef<MachineOperand> Cond, 1164de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const DebugLoc &DL) const { 1165de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1166de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!FBB && Cond.empty()) { 1167de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1168de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addMBB(TBB); 1169de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 1; 1170de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1171de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1172de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(TBB && Cond[0].isImm()); 1173de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1174de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opcode 1175de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm())); 1176de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1177de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!FBB) { 1178de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(&MBB, DL, get(Opcode)) 1179de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addMBB(TBB); 1180de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 1; 1181de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1182de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1183de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(TBB && FBB); 1184de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1185de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(&MBB, DL, get(Opcode)) 1186de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addMBB(TBB); 1187de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH)) 1188de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addMBB(FBB); 1189de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1190de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 2; 1191de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1192de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1193de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::ReverseBranchCondition( 1194de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<MachineOperand> &Cond) const { 1195de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(Cond.size() == 1); 1196de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Cond[0].setImm(-Cond[0].getImm()); 1197de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1198f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 1199f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 1200ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic void removeModOperands(MachineInstr &MI) { 1201ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned Opc = MI.getOpcode(); 1202ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1203ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AMDGPU::OpName::src0_modifiers); 1204ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1205ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AMDGPU::OpName::src1_modifiers); 1206ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines int Src2ModIdx = AMDGPU::getNamedOperandIdx(Opc, 1207ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines AMDGPU::OpName::src2_modifiers); 1208ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1209ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MI.RemoveOperand(Src2ModIdx); 1210ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MI.RemoveOperand(Src1ModIdx); 1211ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines MI.RemoveOperand(Src0ModIdx); 1212ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 1213ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1214de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// TODO: Maybe this should be removed this and custom fold everything in 1215de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// SIFoldOperands? 1216de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, 1217ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned Reg, MachineRegisterInfo *MRI) const { 1218ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!MRI->hasOneNonDBGUse(Reg)) 1219ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1220ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1221de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = UseMI.getOpcode(); 1222f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) { 1223ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Don't fold if we are using source modifiers. The new VOP2 instructions 1224ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // don't have them. 1225de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) || 1226de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar hasModifiersSet(UseMI, AMDGPU::OpName::src1_modifiers) || 1227de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar hasModifiersSet(UseMI, AMDGPU::OpName::src2_modifiers)) { 1228ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1229ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1230ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1231de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &ImmOp = DefMI.getOperand(1); 1232de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If this is a free constant, there's no reason to do this. 1234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // TODO: We could fold this here instead of letting SIFoldOperands do it 1235de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // later. 1236de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isInlineConstant(ImmOp, 4)) 1237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1238de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1239de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0); 1240de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); 1241de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); 1242ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1243ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Multiplied part is the constant: Use v_madmk_f32 1244ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // We should only expect these to be on src0 due to canonicalizations. 1245ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Src0->isReg() && Src0->getReg() == Reg) { 1246de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1247ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1248ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1249de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) 1250ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1251ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1252de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // We need to swap operands 0 and 1 since madmk constant is at operand 1. 1253ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1254de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const int64_t Imm = DefMI.getOperand(1).getImm(); 1255ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1256ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: This would be a lot easier if we could return a new instruction 1257ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // instead of having to modify in place. 1258ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1259ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Remove these first since they are at the end. 1260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.RemoveOperand( 1261de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); 1262de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.RemoveOperand( 1263de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); 1264ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1265ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned Src1Reg = Src1->getReg(); 1266ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned Src1SubReg = Src1->getSubReg(); 1267ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Src0->setReg(Src1Reg); 1268ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Src0->setSubReg(Src1SubReg); 12696948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar Src0->setIsKill(Src1->isKill()); 12706948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar 1271f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Opc == AMDGPU::V_MAC_F32_e64) { 1272de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.untieRegOperand( 1273de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1274f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1275f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1276de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Src1->ChangeToImmediate(Imm); 1277ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1278de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar removeModOperands(UseMI); 1279de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.setDesc(get(AMDGPU::V_MADMK_F32)); 1280ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1281ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1282ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DeleteDef) 1283de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DefMI.eraseFromParent(); 1284ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1285ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return true; 1286ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1287ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1288ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Added part is the constant: Use v_madak_f32 1289ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Src2->isReg() && Src2->getReg() == Reg) { 1290ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Not allowed to use constant bus for another operand. 1291ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // We can however allow an inline immediate as src0. 1292ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (!Src0->isImm() && 1293ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))) 1294ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1295ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1296de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) 1297ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1298ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1299de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const int64_t Imm = DefMI.getOperand(1).getImm(); 1300ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1301ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // FIXME: This would be a lot easier if we could return a new instruction 1302ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // instead of having to modify in place. 1303ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1304ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Remove these first since they are at the end. 1305de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.RemoveOperand( 1306de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod)); 1307de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.RemoveOperand( 1308de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp)); 1309ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1310f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Opc == AMDGPU::V_MAC_F32_e64) { 1311de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.untieRegOperand( 1312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); 1313f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1314f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1315f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // ChangingToImmediate adds Src2 back to the instruction. 1316ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Src2->ChangeToImmediate(Imm); 1317ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1318ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // These come before src2. 1319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar removeModOperands(UseMI); 1320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar UseMI.setDesc(get(AMDGPU::V_MADAK_F32)); 1321ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1322ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines bool DeleteDef = MRI->hasOneNonDBGUse(Reg); 1323ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (DeleteDef) 1324de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DefMI.eraseFromParent(); 1325ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1326ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return true; 1327ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1328ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1329ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1330ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1331ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 1332ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 133337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool offsetsDoNotOverlap(int WidthA, int OffsetA, 133437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int WidthB, int OffsetB) { 133537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB; 133637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA; 133737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB; 133837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return LowOffset + LowWidth <= HighOffset; 133937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 134037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1341de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr &MIa, 1342de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &MIb) const { 1343de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned BaseReg0, BaseReg1; 1344de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int64_t Offset0, Offset1; 134537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 13466948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) && 13476948897e478cbd66626159776a8017b3c18579b9Pirama Arumuga Nainar getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) { 1348de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1349de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand()) { 1350de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // FIXME: Handle ds_read2 / ds_write2. 1351de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1352de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1353de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Width0 = (*MIa.memoperands_begin())->getSize(); 1354de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Width1 = (*MIb.memoperands_begin())->getSize(); 135537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BaseReg0 == BaseReg1 && 135637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines offsetsDoNotOverlap(Width0, Offset0, Width1, Offset1)) { 135737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 135837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 135937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 136037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 136137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 136237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 136337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1364de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa, 1365de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &MIb, 136637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AliasAnalysis *AA) const { 1367de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert((MIa.mayLoad() || MIa.mayStore()) && 136837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "MIa must load from or modify a memory location"); 1369de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert((MIb.mayLoad() || MIb.mayStore()) && 137037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "MIb must load from or modify a memory location"); 137137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects()) 137337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 137437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 137537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // XXX - Can we relax this between address spaces? 1376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef()) 137737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 137837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 137937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // TODO: Should we check the address space from the MachineMemOperand? That 138037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // would allow us to distinguish objects we know don't alias based on the 1381f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // underlying address space, even if it was lowered to a different one, 138237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // e.g. private accesses lowered to use MUBUF instructions on a scratch 138337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // buffer. 1384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isDS(MIa)) { 1385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isDS(MIb)) 138637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return checkInstOffsetsDoNotOverlap(MIa, MIb); 138737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return !isFLAT(MIb); 138937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 139037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1391de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isMUBUF(MIa) || isMTBUF(MIa)) { 1392de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isMUBUF(MIb) || isMTBUF(MIb)) 139337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return checkInstOffsetsDoNotOverlap(MIa, MIb); 139437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1395de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return !isFLAT(MIb) && !isSMRD(MIb); 139637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 139737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1398de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isSMRD(MIa)) { 1399de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isSMRD(MIb)) 140037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return checkInstOffsetsDoNotOverlap(MIa, MIb); 140137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1402de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return !isFLAT(MIb) && !isMUBUF(MIa) && !isMTBUF(MIa); 140337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 140437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1405de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isFLAT(MIa)) { 1406de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isFLAT(MIb)) 140737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return checkInstOffsetsDoNotOverlap(MIa, MIb); 140837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 140937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 141037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 141137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 141237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 141337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 141437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1415f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga NainarMachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, 1416de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &MI, 1417de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar LiveVariables *LV) const { 1418de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1419de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (MI.getOpcode()) { 1420de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar default: 1421de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return nullptr; 1422de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_MAC_F32_e64: 1423de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 1424de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_MAC_F32_e32: { 1425de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); 1426de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Src0->isImm() && !isInlineConstant(*Src0, 4)) 1427de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return nullptr; 1428de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 1429de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1430f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1431f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1432de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst); 1433de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0); 1434de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1); 1435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2); 1436f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1437de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::V_MAD_F32)) 1438de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*Dst) 1439de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) // Src0 mods 1440de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*Src0) 1441de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) // Src1 mods 1442de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*Src1) 1443de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) // Src mods 1444de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*Src2) 1445de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) // clamp 1446de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0); // omod 1447de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1448de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1449de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI, 1450de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineBasicBlock *MBB, 1451de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineFunction &MF) const { 1452de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // XXX - Do we want the SP check in the base implementation? 1453de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1454de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Target-independent instructions do not have an implicit-use of EXEC, even 1455de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // when they operate on VGPRs. Treating EXEC modifications as scheduling 1456de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // boundaries prevents incorrect movements of such instructions. 1457de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) || 1458de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.modifiesRegister(AMDGPU::EXEC, &RI); 1459f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 1460f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 146136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool SIInstrInfo::isInlineConstant(const APInt &Imm) const { 1462ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines int64_t SVal = Imm.getSExtValue(); 1463ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (SVal >= -16 && SVal <= 64) 146436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return true; 146536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1466ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Imm.getBitWidth() == 64) { 1467ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines uint64_t Val = Imm.getZExtValue(); 1468ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return (DoubleToBits(0.0) == Val) || 1469ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(1.0) == Val) || 1470ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(-1.0) == Val) || 1471ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(0.5) == Val) || 1472ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(-0.5) == Val) || 1473ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(2.0) == Val) || 1474ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(-2.0) == Val) || 1475ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(4.0) == Val) || 1476ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (DoubleToBits(-4.0) == Val); 1477ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1478ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 147936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // The actual type of the operand does not seem to matter as long 148036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // as the bits match one of the inline immediate values. For example: 148136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 148236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 148336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // so it is a legal inline immediate. 148436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 148536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 148636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // floating-point, so it is a legal inline immediate. 1487ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines uint32_t Val = Imm.getZExtValue(); 1488ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1489ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return (FloatToBits(0.0f) == Val) || 1490ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(1.0f) == Val) || 1491ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(-1.0f) == Val) || 1492ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(0.5f) == Val) || 1493ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(-0.5f) == Val) || 1494ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(2.0f) == Val) || 1495ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(-2.0f) == Val) || 1496ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(4.0f) == Val) || 1497ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines (FloatToBits(-4.0f) == Val); 1498ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 1499ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1500ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesbool SIInstrInfo::isInlineConstant(const MachineOperand &MO, 1501ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned OpSize) const { 1502ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MO.isImm()) { 1503ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // MachineOperand provides no way to tell the true operand size, since it 1504ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // only records a 64-bit value. We need to know the size to determine if a 1505ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // 32-bit floating point immediate bit pattern is legal for an integer 1506ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // immediate. It would be for any 32-bit integer operand, but would not be 1507ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // for a 64-bit one. 1508ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1509ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned BitSize = 8 * OpSize; 1510ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return isInlineConstant(APInt(BitSize, MO.getImm(), true)); 151147fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard } 151236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 151347fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard return false; 151447fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard} 151547fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard 1516ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesbool SIInstrInfo::isLiteralConstant(const MachineOperand &MO, 1517ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned OpSize) const { 1518ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return MO.isImm() && !isInlineConstant(MO, OpSize); 151947fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard} 152047fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard 1521c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesstatic bool compareMachineOp(const MachineOperand &Op0, 1522c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const MachineOperand &Op1) { 1523c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Op0.getType() != Op1.getType()) 1524c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return false; 1525c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1526c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines switch (Op0.getType()) { 1527c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case MachineOperand::MO_Register: 1528c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Op0.getReg() == Op1.getReg(); 1529c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case MachineOperand::MO_Immediate: 1530c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return Op0.getImm() == Op1.getImm(); 1531c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines default: 1532c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines llvm_unreachable("Didn't expect to be comparing these operand types"); 1533c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1534c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 1535c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1536de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo, 1537de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &MO) const { 1538de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MCOperandInfo &OpInfo = get(MI.getOpcode()).OpInfo[OpNo]; 153937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1540ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 154137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 154237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE) 154337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 154437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 154537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (OpInfo.RegClass < 0) 154637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 154737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1548ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize(); 1549ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (isLiteralConstant(MO, OpSize)) 1550ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return RI.opCanUseLiteralConstant(OpInfo.OperandType); 155137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1552ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return RI.opCanUseInlineConstant(OpInfo.OperandType); 155337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 155437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 155537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const { 1556ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines int Op32 = AMDGPU::getVOPe32(Opcode); 1557ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (Op32 == -1) 1558ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1559ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1560ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return pseudoToMCOpcode(Op32) != -1; 156137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 156237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 156337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool SIInstrInfo::hasModifiers(unsigned Opcode) const { 156437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The src0_modifier operand is present on all instructions 156537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // that have modifiers. 156637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 156737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return AMDGPU::getNamedOperandIdx(Opcode, 156837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AMDGPU::OpName::src0_modifiers) != -1; 156937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 157037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 157137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool SIInstrInfo::hasModifiersSet(const MachineInstr &MI, 157237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned OpName) const { 157337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const MachineOperand *Mods = getNamedOperand(MI, OpName); 157437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return Mods && Mods->getImm(); 157537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 157637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 157737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI, 1578ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines const MachineOperand &MO, 1579ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned OpSize) const { 158037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Literal constants use the constant bus. 1581ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (isLiteralConstant(MO, OpSize)) 158237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 158337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 158437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!MO.isReg() || !MO.isUse()) 158537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 158637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 158737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) 158837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return RI.isSGPRClass(MRI.getRegClass(MO.getReg())); 158937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 159037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // FLAT_SCR is just an SGPR pair. 159137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!MO.isImplicit() && (MO.getReg() == AMDGPU::FLAT_SCR)) 159237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 159337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 159437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // EXEC register uses the constant bus. 159537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!MO.isImplicit() && MO.getReg() == AMDGPU::EXEC) 159637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 159737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 159837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // SGPRs use the constant bus 1599de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return (MO.getReg() == AMDGPU::VCC || MO.getReg() == AMDGPU::M0 || 1600de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar (!MO.isImplicit() && 1601de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar (AMDGPU::SGPR_32RegClass.contains(MO.getReg()) || 1602de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::SGPR_64RegClass.contains(MO.getReg())))); 160337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 160437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1605f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarstatic unsigned findImplicitSGPRRead(const MachineInstr &MI) { 1606f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (const MachineOperand &MO : MI.implicit_operands()) { 1607f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We only care about reads. 1608f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (MO.isDef()) 1609f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 1610f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1611f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar switch (MO.getReg()) { 1612f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::VCC: 1613f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::M0: 1614f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::FLAT_SCR: 1615f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return MO.getReg(); 1616f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1617f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar default: 1618f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar break; 1619f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1620f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1621f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1622f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return AMDGPU::NoRegister; 1623f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 1624f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1625de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic bool shouldReadExec(const MachineInstr &MI) { 1626de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (SIInstrInfo::isVALU(MI)) { 1627de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (MI.getOpcode()) { 1628de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_READLANE_B32: 1629de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_READLANE_B32_si: 1630de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_READLANE_B32_vi: 1631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_WRITELANE_B32: 1632de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_WRITELANE_B32_si: 1633de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::V_WRITELANE_B32_vi: 1634de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1635de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1636de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1637de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 1638de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 1639de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1640de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (SIInstrInfo::isGenericOpcode(MI.getOpcode()) || 1641de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SIInstrInfo::isSALU(MI) || 1642de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SIInstrInfo::isSMRD(MI)) 1643de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 1644de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1645de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return true; 1646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 1647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::verifyInstruction(const MachineInstr &MI, 164947fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard StringRef &ErrInfo) const { 1650de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar uint16_t Opcode = MI.getOpcode(); 1651de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 165247fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 165347fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 165447fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 165547fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard 165636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Make sure the number of operands is correct. 165736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MCInstrDesc &Desc = get(Opcode); 165836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!Desc.isVariadic() && 1659de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Desc.getNumOperands() != MI.getNumExplicitOperands()) { 1660de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ErrInfo = "Instruction has wrong number of operands."; 1661de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 166236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 166336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1664f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Make sure the register classes are correct. 166537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { 1666de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.getOperand(i).isFPImm()) { 1667ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast " 1668ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines "all fp values to integers."; 1669ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1670ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1671ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1672ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines int RegClass = Desc.OpInfo[i].RegClass; 1673ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 167436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch (Desc.OpInfo[i].OperandType) { 1675ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case MCOI::OPERAND_REGISTER: 1676de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.getOperand(i).isImm()) { 1677ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ErrInfo = "Illegal immediate value for operand."; 1678ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1679ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1680ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 1681ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::OPERAND_REG_IMM32: 1682ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 1683ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::OPERAND_REG_INLINE_C: 1684de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isLiteralConstant(MI.getOperand(i), 1685ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines RI.getRegClass(RegClass)->getSize())) { 1686ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines ErrInfo = "Illegal immediate value for operand."; 1687ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return false; 1688c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 168936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 169036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case MCOI::OPERAND_IMMEDIATE: 1691de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::OPERAND_KIMM32: 169237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Check if this operand is an immediate. 169337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // FrameIndex operands will be replaced by immediates, so they are 169437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // allowed. 1695de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MI.getOperand(i).isImm() && !MI.getOperand(i).isFI()) { 169636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ErrInfo = "Expected immediate, but got non-immediate"; 169736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 169836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 169936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Fall-through 170036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 170136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 170236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 170336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1704de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MI.getOperand(i).isReg()) 170536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 170636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 170736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (RegClass != -1) { 1708de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Reg = MI.getOperand(i).getReg(); 1709de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Reg == AMDGPU::NoRegister || 1710de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar TargetRegisterInfo::isVirtualRegister(Reg)) 171136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 171236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 171336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *RC = RI.getRegClass(RegClass); 171436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (!RC->contains(Reg)) { 171536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines ErrInfo = "Operand has incorrect register class."; 171636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return false; 171736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 171836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 171936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 172036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 172147fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard // Verify VOP* 1722de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isVOPC(MI)) { 1723ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // Only look at the true operands. Only a real operand can use the constant 1724ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // bus, and we don't want to check pseudo-operands like the source modifier 1725ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // flags. 1726ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 1727ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 172847fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard unsigned ConstantBusCount = 0; 1729de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1730de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) 1731de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar ++ConstantBusCount; 1732de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1733de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SGPRUsed = findImplicitSGPRRead(MI); 1734f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SGPRUsed != AMDGPU::NoRegister) 1735f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ++ConstantBusCount; 1736f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1737ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines for (int OpIdx : OpIndices) { 1738ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (OpIdx == -1) 1739ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 1740de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &MO = MI.getOperand(OpIdx); 1741ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) { 174237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MO.isReg()) { 174337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MO.getReg() != SGPRUsed) 174447fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard ++ConstantBusCount; 174537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SGPRUsed = MO.getReg(); 174637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 174737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines ++ConstantBusCount; 174847fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard } 174947fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard } 175047fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard } 175147fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard if (ConstantBusCount > 1) { 175247fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard ErrInfo = "VOP* instruction uses the constant bus more than once"; 175347fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard return false; 175447fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard } 175547fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard } 175647fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard 1757c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // Verify misc. restrictions on specific instructions. 1758c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || 1759c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { 1760de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &Src0 = MI.getOperand(Src0Idx); 1761de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &Src1 = MI.getOperand(Src1Idx); 1762de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &Src2 = MI.getOperand(Src2Idx); 1763c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { 1764c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines if (!compareMachineOp(Src0, Src1) && 1765c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines !compareMachineOp(Src0, Src2)) { 1766c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; 1767c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines return false; 1768c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1769c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1770c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } 1771c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 1772f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Make sure we aren't losing exec uses in the td files. This mostly requires 1773f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // being careful when using let Uses to try to add other use registers. 1774de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (shouldReadExec(MI)) { 1775de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MI.hasRegisterImplicitUseOperand(AMDGPU::EXEC)) { 1776f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar ErrInfo = "VALU instruction does not implicitly read exec mask"; 1777f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 1778f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1779f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1780f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 178147fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard return true; 178247fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard} 178347fbbc2dc5696d27f4e3c8a5432777976dd8da0aTom Stellard 1784c3ec7e2273a26d8ae3b8d98160e13f8f44299ad2Matt Arsenaultunsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { 1785b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard switch (MI.getOpcode()) { 1786b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard default: return AMDGPU::INSTRUCTION_LIST_END; 1787b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; 1788b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::COPY: return AMDGPU::COPY; 1789b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::PHI: return AMDGPU::PHI; 1790dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; 179136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_MOV_B32: 179236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MI.getOperand(1).isReg() ? 179336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; 179437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::S_ADD_I32: 179537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; 17963e38856f04a01651819c6bc16fac4434a5d2b4c6Matt Arsenault case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; 179737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::S_SUB_I32: 179837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; 17993e38856f04a01651819c6bc16fac4434a5d2b4c6Matt Arsenault case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; 180037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32; 180136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; 180236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; 180336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32; 180436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32; 180536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32; 180636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32; 180736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32; 1808b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32; 1809b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64; 1810b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32; 1811b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; 1812b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; 1813b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; 1814dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; 1815dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; 1816dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; 1817dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; 18184c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AMDGPU::S_BFM_B32: return AMDGPU::V_BFM_B32_e64; 1819c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; 1820dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; 1821c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; 1822dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; 1823dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; 1824dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; 1825dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; 1826dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; 1827dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; 1828de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32; 1829de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32; 1830de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32; 1831de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32; 1832de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32; 1833de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32; 1834ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64; 1835c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; 1836c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; 18374c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar case AMDGPU::S_FLBIT_I32: return AMDGPU::V_FFBH_I32_e64; 1838de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_SCC0: return AMDGPU::S_CBRANCH_VCCZ; 1839de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_SCC1: return AMDGPU::S_CBRANCH_VCCNZ; 1840b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 1841b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard} 1842b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 1843b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellardbool SIInstrInfo::isSALUOpSupportedOnVALU(const MachineInstr &MI) const { 1844b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard return getVALUOp(MI) != AMDGPU::INSTRUCTION_LIST_END; 1845b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard} 1846b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 1847b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellardconst TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, 1848b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard unsigned OpNo) const { 1849b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 1850b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard const MCInstrDesc &Desc = get(MI.getOpcode()); 1851b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard if (MI.isVariadic() || OpNo >= Desc.getNumOperands() || 1852ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines Desc.OpInfo[OpNo].RegClass == -1) { 1853ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines unsigned Reg = MI.getOperand(OpNo).getReg(); 1854ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1855ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (TargetRegisterInfo::isVirtualRegister(Reg)) 1856ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return MRI.getRegClass(Reg); 1857ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return RI.getPhysRegClass(Reg); 1858ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 1859b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 1860b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1861b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard return RI.getRegClass(RCID); 1862b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard} 1863b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 1864b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellardbool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { 1865b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard switch (MI.getOpcode()) { 1866b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::COPY: 1867b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard case AMDGPU::REG_SEQUENCE: 1868dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::PHI: 1869dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::INSERT_SUBREG: 1870b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard return RI.hasVGPRs(getOpRegClass(MI, 0)); 1871b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard default: 1872b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard return RI.hasVGPRs(getOpRegClass(MI, OpNo)); 1873b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 1874b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard} 1875b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 1876de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const { 1877b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard MachineBasicBlock::iterator I = MI; 1878de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *MBB = MI.getParent(); 1879de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &MO = MI.getOperand(OpIdx); 188037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 1881de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned RCID = get(MI.getOpcode()).OpInfo[OpIdx].RegClass; 1882b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard const TargetRegisterClass *RC = RI.getRegClass(RCID); 1883b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard unsigned Opcode = AMDGPU::V_MOV_B32_e32; 188437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MO.isReg()) 1885b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard Opcode = AMDGPU::COPY; 188637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines else if (RI.isSGPRClass(RC)) 188729a651af8a4b522daf1f9102c93e4c8ecc2ef3c2Matt Arsenault Opcode = AMDGPU::S_MOV_B32; 188837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 18893560dd2dcd67d42eeb8e59975581d598d71669dfMatt Arsenault const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC); 189037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (RI.getCommonSubClass(&AMDGPU::VReg_64RegClass, VRC)) 189137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines VRC = &AMDGPU::VReg_64RegClass; 189237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines else 1893ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines VRC = &AMDGPU::VGPR_32RegClass; 189437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 18953560dd2dcd67d42eeb8e59975581d598d71669dfMatt Arsenault unsigned Reg = MRI.createVirtualRegister(VRC); 189637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DebugLoc DL = MBB->findDebugLoc(I); 1897de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).addOperand(MO); 1898b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard MO.ChangeToRegister(Reg, false); 1899b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard} 1900b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 190136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesunsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, 190236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineRegisterInfo &MRI, 190336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand &SuperReg, 190436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *SuperRC, 190536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubIdx, 190636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *SubRC) 190736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const { 1908f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineBasicBlock *MBB = MI->getParent(); 1909f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar DebugLoc DL = MI->getDebugLoc(); 191036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubReg = MRI.createVirtualRegister(SubRC); 191136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1912f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { 1913f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 1914f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(SuperReg.getReg(), 0, SubIdx); 1915f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return SubReg; 1916f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 1917f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 191836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Just in case the super register is itself a sub-register, copy it to a new 1919c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // value so we don't need to worry about merging its subreg index with the 1920c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // SubIdx passed to this function. The register coalescer should be able to 192136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // eliminate this extra copy. 1922f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC); 192337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 192437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) 192537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); 192637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 192737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) 192837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(NewSuperReg, 0, SubIdx); 192936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 193036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return SubReg; 193136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 193236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 193336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesMachineOperand SIInstrInfo::buildExtractSubRegOrImm( 193436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MII, 193536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineRegisterInfo &MRI, 193636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand &Op, 193736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *SuperRC, 193836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubIdx, 193936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *SubRC) const { 194036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Op.isImm()) { 194136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // XXX - Is there a better way to do this? 194236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (SubIdx == AMDGPU::sub0) 194336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF); 194436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (SubIdx == AMDGPU::sub1) 194536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MachineOperand::CreateImm(Op.getImm() >> 32); 194636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 194736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Unhandled register index for immediate"); 194836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 194936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 195036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubReg = buildExtractSubReg(MII, MRI, Op, SuperRC, 195136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SubIdx, SubRC); 195236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return MachineOperand::CreateReg(SubReg, false); 195336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 195436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 1955ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines// Change the order of operands from (0, 1, 2) to (0, 2, 1) 1956de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::swapOperands(MachineInstr &Inst) const { 1957de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(Inst.getNumExplicitOperands() == 3); 1958de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand Op1 = Inst.getOperand(1); 1959de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.RemoveOperand(1); 1960de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addOperand(Op1); 1961ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 1962ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 1963f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarbool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, 1964f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const MCOperandInfo &OpInfo, 1965f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const MachineOperand &MO) const { 1966f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!MO.isReg()) 1967f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return false; 1968f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1969f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Reg = MO.getReg(); 1970f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *RC = 1971f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar TargetRegisterInfo::isVirtualRegister(Reg) ? 1972f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MRI.getRegClass(Reg) : 1973f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar RI.getPhysRegClass(Reg); 1974f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1975de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const SIRegisterInfo *TRI = 1976de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); 1977de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar RC = TRI->getSubRegClass(RC, MO.getSubReg()); 1978de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 1979f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // In order to be legal, the common sub-class must be equal to the 1980f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // class of the current operand. For example: 1981f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // 1982f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // v_mov_b32 s0 ; Operand defined as vsrc_32 1983f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL 1984f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // 1985f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // s_sendmsg 0, s0 ; Operand defined as m0reg 1986f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL 1987f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1988f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; 1989f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 1990f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1991f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarbool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, 1992f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const MCOperandInfo &OpInfo, 1993f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const MachineOperand &MO) const { 1994f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (MO.isReg()) 1995f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return isLegalRegOperand(MRI, OpInfo, MO); 1996f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 1997f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Handle non-register types that are treated like immediates. 1998f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar assert(MO.isImm() || MO.isTargetIndex() || MO.isFI()); 1999f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return true; 2000f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 2001f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2002de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, 200337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const MachineOperand *MO) const { 2004de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2005de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MCInstrDesc &InstDesc = MI.getDesc(); 200637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx]; 200737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const TargetRegisterClass *DefinedRC = 200837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr; 200937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!MO) 2010de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MO = &MI.getOperand(OpIdx); 2011de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2012de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isVALU(MI) && usesConstantBus(MRI, *MO, DefinedRC->getSize())) { 201337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2014de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar RegSubRegPair SGPRUsed; 2015de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MO->isReg()) 2016de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg()); 2017de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2018de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 201937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (i == OpIdx) 202037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 2021de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &Op = MI.getOperand(i); 2022de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Op.isReg()) { 2023de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) && 2024de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar usesConstantBus(MRI, Op, getOpSize(MI, i))) { 2025de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return false; 2026de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2027de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) { 202837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 202937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 203037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 203137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 203237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 203337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (MO->isReg()) { 203437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(DefinedRC); 2035f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return isLegalRegOperand(MRI, OpInfo, *MO); 203637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 203737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 203837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Handle non-register types that are treated like immediates. 2039ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines assert(MO->isImm() || MO->isTargetIndex() || MO->isFI()); 204037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 204137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!DefinedRC) { 204237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // This operand expects an immediate. 204337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 204437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 204537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 204637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return isImmOperandLegal(MI, OpIdx, *MO); 204737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 204837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2049f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarvoid SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI, 2050de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &MI) const { 2051de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = MI.getOpcode(); 2052f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const MCInstrDesc &InstrDesc = get(Opc); 205337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2054f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 2055de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src1 = MI.getOperand(Src1Idx); 2056b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2057f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32 2058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // we need to only have one constant bus use. 2059f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // 2060f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Note we do not need to worry about literal constants here. They are 2061f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // disabled for the operand type for instructions because they will always 2062f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // violate the one constant bus use rule. 2063de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister; 2064f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (HasImplicitSGPR) { 2065f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2066de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src0 = MI.getOperand(Src0Idx); 2067f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2068f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg())) 20699bc4b2c0dae143e72624984dfd5e3a4ff2e95eb2Matt Arsenault legalizeOpWithMove(MI, Src0Idx); 2070f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 20719bc4b2c0dae143e72624984dfd5e3a4ff2e95eb2Matt Arsenault 2072f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // VOP2 src0 instructions support all operand types, so we don't need to check 2073f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // their legality. If src1 is already legal, we don't need to do anything. 2074f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1)) 2075f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 20769bc4b2c0dae143e72624984dfd5e3a4ff2e95eb2Matt Arsenault 2077f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We do not use commuteInstruction here because it is too aggressive and will 2078f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // commute if it is possible. We only want to commute here if it improves 2079f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // legality. This can be called a fairly large number of times so don't waste 2080f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // compile time pointlessly swapping and checking legality again. 2081de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (HasImplicitSGPR || !MI.isCommutable()) { 2082f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOpWithMove(MI, Src1Idx); 2083f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 2084f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2085f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2086f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 2087de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src0 = MI.getOperand(Src0Idx); 208837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2089f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If src0 can be used as src1, commuting will make the operands legal. 2090f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Otherwise we have to give up and insert a move. 2091f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // 2092f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // TODO: Other immediate-like operand kinds could be commuted if there was a 2093f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // MachineOperand::ChangeTo* for them. 2094f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if ((!Src1.isImm() && !Src1.isReg()) || 2095f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) { 209637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines legalizeOpWithMove(MI, Src1Idx); 209737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 2098b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2099b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2100de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int CommutedOpc = commuteOpcode(MI); 2101f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (CommutedOpc == -1) { 2102f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOpWithMove(MI, Src1Idx); 2103f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 2104f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 210537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2106de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.setDesc(get(CommutedOpc)); 210737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2108f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Src0Reg = Src0.getReg(); 2109f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Src0SubReg = Src0.getSubReg(); 2110f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool Src0Kill = Src0.isKill(); 2111b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2112f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Src1.isImm()) 2113f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Src0.ChangeToImmediate(Src1.getImm()); 2114f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar else if (Src1.isReg()) { 2115f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill()); 2116f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Src0.setSubReg(Src1.getSubReg()); 2117f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else 2118f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar llvm_unreachable("Should only have register or immediate operands"); 2119b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2120f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill); 2121f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Src1.setSubReg(Src0SubReg); 2122f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 2123836c5133c66edecedeaa79448964b4c103f99271Matt Arsenault 2124f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// Legalize VOP3 operands. Because all operand types are supported for any 2125f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// operand, and since literal constants are not allowed and should never be 2126f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// seen, we only need to worry about inserting copies if we use multiple SGPR 2127f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// operands. 2128de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, 2129de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &MI) const { 2130de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = MI.getOpcode(); 2131f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2132f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int VOP3Idx[3] = { 2133f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0), 2134f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1), 2135f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) 2136f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar }; 2137f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2138f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Find the one SGPR operand we are allowed to use. 2139f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx); 2140f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2141f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (unsigned i = 0; i < 3; ++i) { 2142f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar int Idx = VOP3Idx[i]; 2143f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (Idx == -1) 2144f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar break; 2145de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &MO = MI.getOperand(Idx); 2146f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2147f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We should never see a VOP3 instruction with an illegal immediate operand. 2148f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!MO.isReg()) 2149f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 2150f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2151f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg()))) 2152f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; // VGPRs are legal 2153f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2154f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) { 2155f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SGPRReg = MO.getReg(); 2156f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We can use one SGPR in each VOP3 instruction. 2157f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 2158b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2159f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2160f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If we make it this far, then the operand is not legal and we must 2161f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // legalize it. 2162f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOpWithMove(MI, Idx); 2163f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2164f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 2165f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2166de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::readlaneVGPRToSGPR(unsigned SrcReg, MachineInstr &UseMI, 2167de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineRegisterInfo &MRI) const { 2168de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const TargetRegisterClass *VRC = MRI.getRegClass(SrcReg); 2169de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const TargetRegisterClass *SRC = RI.getEquivalentSGPRClass(VRC); 2170de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned DstReg = MRI.createVirtualRegister(SRC); 2171de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SubRegs = VRC->getSize() / 4; 2172de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2173de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVector<unsigned, 8> SRegs; 2174de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = 0; i < SubRegs; ++i) { 2175de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SGPR = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 2176de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 2177de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar get(AMDGPU::V_READFIRSTLANE_B32), SGPR) 2178de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); 2179de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SRegs.push_back(SGPR); 2180de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2181de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2182de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstrBuilder MIB = 2183de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), 2184de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar get(AMDGPU::REG_SEQUENCE), DstReg); 2185de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = 0; i < SubRegs; ++i) { 2186de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MIB.addReg(SRegs[i]); 2187de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MIB.addImm(RI.getSubRegFromChannel(i)); 2188de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2189de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return DstReg; 2190de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 2191de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2192de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI, 2193de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &MI) const { 2194de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2195de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If the pointer is store in VGPRs, then we need to move them to 2196de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // SGPRs using v_readfirstlane. This is safe because we only select 2197de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // loads with uniform pointers to SMRD instruction so we know the 2198de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // pointer value is uniform. 2199de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *SBase = getNamedOperand(MI, AMDGPU::OpName::sbase); 2200de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (SBase && !RI.isSGPRClass(MRI.getRegClass(SBase->getReg()))) { 2201de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SGPR = readlaneVGPRToSGPR(SBase->getReg(), MI, MRI); 2202de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SBase->setReg(SGPR); 2203de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2204de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 2205de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2206de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::legalizeOperands(MachineInstr &MI) const { 2207de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 2208f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2209f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Legalize VOP2 2210de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isVOP2(MI) || isVOPC(MI)) { 2211f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOperandsVOP2(MRI, MI); 2212f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 2213f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2214f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2215f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Legalize VOP3 2216de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isVOP3(MI)) { 2217f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOperandsVOP3(MRI, MI); 2218f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 2219b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2220b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2221de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Legalize SMRD 2222de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isSMRD(MI)) { 2223de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar legalizeOperandsSMRD(MRI, MI); 2224de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 2225de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2226de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2227dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Legalize REG_SEQUENCE and PHI 2228b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // The register class of the operands much be the same type as the register 2229b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // class of the output. 2230de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.getOpcode() == AMDGPU::PHI) { 2231dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; 2232de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 2233de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!MI.getOperand(i).isReg() || 2234de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 2235b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard continue; 2236b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard const TargetRegisterClass *OpRC = 2237de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MRI.getRegClass(MI.getOperand(i).getReg()); 2238b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard if (RI.hasVGPRs(OpRC)) { 2239b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard VRC = OpRC; 2240b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } else { 2241b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard SRC = OpRC; 2242b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2243b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2244b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2245b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // If any of the operands are VGPR registers, then they all most be 2246b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // otherwise we will create illegal VGPR->SGPR copies when legalizing 2247b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // them. 2248de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (VRC || !RI.isSGPRClass(getOpRegClass(MI, 0))) { 2249b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard if (!VRC) { 2250b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard assert(SRC); 2251b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard VRC = RI.getEquivalentVGPRClass(SRC); 2252b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2253b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard RC = VRC; 2254b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } else { 2255b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard RC = SRC; 2256b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2257b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2258b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // Update all the operands so they have the same type. 2259de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2260de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Op = MI.getOperand(I); 2261f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2262b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard continue; 2263b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard unsigned DstReg = MRI.createVirtualRegister(RC); 2264f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2265f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // MI is a PHI instruction. 2266de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB(); 2267f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator(); 2268f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2269de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) 2270de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(Op); 2271f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Op.setReg(DstReg); 2272f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2273f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2274f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2275f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // REG_SEQUENCE doesn't really require operand legalization, but if one has a 2276f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // VGPR dest type and SGPR sources, insert copies so all operands are 2277f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // VGPRs. This seems to help operand folding / the register coalescer. 2278de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.getOpcode() == AMDGPU::REG_SEQUENCE) { 2279de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *MBB = MI.getParent(); 2280de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const TargetRegisterClass *DstRC = getOpRegClass(MI, 0); 2281f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (RI.hasVGPRs(DstRC)) { 2282f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Update all the operands so they are VGPR register classes. These may 2283f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // not be the same register class because REG_SEQUENCE supports mixing 2284f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // subregister index types e.g. sub0_sub1 + sub2 + sub3 2285de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { 2286de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Op = MI.getOperand(I); 2287f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) 2288f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 2289f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2290f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg()); 2291f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC); 2292f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (VRC == OpRC) 2293f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 2294f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2295f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned DstReg = MRI.createVirtualRegister(VRC); 2296f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2297de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg) 2298de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(Op); 2299f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2300f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Op.setReg(DstReg); 2301f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Op.setIsKill(); 2302dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2303b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2304f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2305f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return; 2306b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 230736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Legalize INSERT_SUBREG 2309dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // src0 must have the same register class as dst 2310de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.getOpcode() == AMDGPU::INSERT_SUBREG) { 2311de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Dst = MI.getOperand(0).getReg(); 2312de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Src0 = MI.getOperand(1).getReg(); 2313dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 2314dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); 2315dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (DstRC != Src0RC) { 2316de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *MI.getParent(); 2317dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); 2318de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0) 2319de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Src0); 2320de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.getOperand(1).setReg(NewSrc0); 2321de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2322de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 2323de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2324de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2325de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Legalize MIMG 2326de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isMIMG(MI)) { 2327de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc); 2328de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg()))) { 2329de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SGPR = readlaneVGPRToSGPR(SRsrc->getReg(), MI, MRI); 2330de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SRsrc->setReg(SGPR); 2331de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2332de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2333de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp); 2334de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg()))) { 2335de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SGPR = readlaneVGPRToSGPR(SSamp->getReg(), MI, MRI); 2336de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SSamp->setReg(SGPR); 2337dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2338dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return; 2339dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2340dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 234136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Legalize MUBUF* instructions 234236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // FIXME: If we start using the non-addr64 instructions for compute, we 234336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // may need to legalize them here. 234437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int SRsrcIdx = 2345de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc); 234637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SRsrcIdx != -1) { 234737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // We have an MUBUF instruction 2348de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *SRsrc = &MI.getOperand(SRsrcIdx); 2349de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SRsrcRC = get(MI.getOpcode()).OpInfo[SRsrcIdx].RegClass; 235037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (RI.getCommonSubClass(MRI.getRegClass(SRsrc->getReg()), 235137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines RI.getRegClass(SRsrcRC))) { 235237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The operands are legal. 235337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // FIXME: We may need to legalize operands besided srsrc. 235437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 235537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 235636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2357de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *MI.getParent(); 235836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2359f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Extract the ptr from the resource descriptor. 2360f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc, 2361f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass); 236237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 236337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Create an empty resource descriptor 236437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 236537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 236637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 236737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); 2368ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines uint64_t RsrcDataFormat = getDefaultRsrcDataFormat(); 236937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 237037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Zero64 = 0 2371de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B64), Zero64) 2372de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0); 237337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 237437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0} 2375de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatLo) 2376de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(RsrcDataFormat & 0xFFFFFFFF); 237737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 237837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32} 2379de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::S_MOV_B32), SRsrcFormatHi) 2380de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(RsrcDataFormat >> 32); 238137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 238237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // NewSRsrc = {Zero64, SRsrcFormat} 2383de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc) 2384de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Zero64) 2385de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub0_sub1) 2386de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SRsrcFormatLo) 2387de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub2) 2388de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SRsrcFormatHi) 2389de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub3); 2390de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2391de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr); 239237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 239337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (VAddr) { 239437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // This is already an ADDR64 instruction so we need to add the pointer 239537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // extracted from the resource descriptor to the current value of VAddr. 2396f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2397f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2398f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2399f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0 2400de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = MI.getDebugLoc(); 2401f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo) 2402f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2403f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(VAddr->getReg(), 0, AMDGPU::sub0); 2404f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2405f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1 2406f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi) 2407f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2408f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(VAddr->getReg(), 0, AMDGPU::sub1); 2409f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2410f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // NewVaddr = {NewVaddrHi, NewVaddrLo} 2411de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr) 2412de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(NewVAddrLo) 2413de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub0) 2414de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(NewVAddrHi) 2415de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub1); 241637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 241737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // This instructions is the _OFFSET variant, so we need to convert it to 241837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // ADDR64. 2419de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(MBB.getParent()->getSubtarget<SISubtarget>().getGeneration() 2420de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar < SISubtarget::VOLCANIC_ISLANDS && 2421f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar "FIXME: Need to emit flat atomics here"); 2422f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2423de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *VData = getNamedOperand(MI, AMDGPU::OpName::vdata); 2424de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *Offset = getNamedOperand(MI, AMDGPU::OpName::offset); 2425de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *SOffset = getNamedOperand(MI, AMDGPU::OpName::soffset); 2426de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI.getOpcode()); 2427f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2428f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Atomics rith return have have an additional tied operand and are 2429f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // missing some of the special bits. 2430de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand *VDataIn = getNamedOperand(MI, AMDGPU::OpName::vdata_in); 2431f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineInstr *Addr64; 2432f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2433f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!VDataIn) { 2434f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Regular buffer load / store. 2435de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstrBuilder MIB = 2436de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 2437de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*VData) 2438de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2439de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // This will be replaced later 2440de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // with the new value of vaddr. 2441de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*SRsrc) 2442de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*SOffset) 2443de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*Offset); 2444f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2445f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Atomics do not have this operand. 2446de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (const MachineOperand *GLC = 2447de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(MI, AMDGPU::OpName::glc)) { 2448f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MIB.addImm(GLC->getImm()); 2449f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2450f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2451de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)); 2452f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2453de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (const MachineOperand *TFE = 2454de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar getNamedOperand(MI, AMDGPU::OpName::tfe)) { 2455f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MIB.addImm(TFE->getImm()); 2456f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 2457f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2458de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 2459f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Addr64 = MIB; 2460f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } else { 2461f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Atomics with return. 2462de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Addr64 = BuildMI(MBB, MI, MI.getDebugLoc(), get(Addr64Opcode)) 2463de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*VData) 2464de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*VDataIn) 2465de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(AMDGPU::NoRegister) // Dummy value for vaddr. 2466de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // This will be replaced later 2467de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // with the new value of vaddr. 2468de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*SRsrc) 2469de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*SOffset) 2470de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(*Offset) 2471de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(getNamedImmOperand(MI, AMDGPU::OpName::slc)) 2472de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); 2473f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 247437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2475de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MI.removeFromParent(); 247637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2477f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // NewVaddr = {NewVaddrHi, NewVaddrLo} 2478de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(MBB, Addr64, Addr64->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), 2479de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NewVAddr) 2480de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SRsrcPtr, 0, AMDGPU::sub0) 2481de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub0) 2482de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(SRsrcPtr, 0, AMDGPU::sub1) 2483de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(AMDGPU::sub1); 2484de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2485de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VAddr = getNamedOperand(*Addr64, AMDGPU::OpName::vaddr); 2486de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SRsrc = getNamedOperand(*Addr64, AMDGPU::OpName::srsrc); 248737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 248836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 248937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Update the instruction to use NewVaddr 249037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines VAddr->setReg(NewVAddr); 249137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Update the instruction to use NewSRsrc 249237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SRsrc->setReg(NewSRsrc); 249337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 249437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 249536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2496b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellardvoid SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { 2497b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard SmallVector<MachineInstr *, 128> Worklist; 2498b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard Worklist.push_back(&TopInst); 2499b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2500b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard while (!Worklist.empty()) { 2501de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &Inst = *Worklist.pop_back_val(); 2502de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock *MBB = Inst.getParent(); 250336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); 250436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2505de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opcode = Inst.getOpcode(); 2506de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned NewOpcode = getVALUOp(Inst); 2507dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 250836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Handle some special cases 2509dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines switch (Opcode) { 2510dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: 2511dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 251236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_AND_B64: 2513f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64); 2514de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 2515b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard continue; 2516b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 251736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_OR_B64: 2518f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64); 2519de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 252036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 252136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 252236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_XOR_B64: 2523f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64); 2524de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 252536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 252636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 252736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_NOT_B64: 2528f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32); 2529de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 2530c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines continue; 2531c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2532c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines case AMDGPU::S_BCNT1_I32_B64: 2533c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines splitScalar64BitBCNT(Worklist, Inst); 2534de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 253536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 253636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 253737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AMDGPU::S_BFE_I64: { 253837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines splitScalar64BitBFE(Worklist, Inst); 2539de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 254037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 254137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 254237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2543ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_LSHL_B32: 2544de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2545ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpcode = AMDGPU::V_LSHLREV_B32_e64; 2546ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines swapOperands(Inst); 2547ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2548ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2549ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_ASHR_I32: 2550de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2551ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpcode = AMDGPU::V_ASHRREV_I32_e64; 2552ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines swapOperands(Inst); 2553ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2554ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2555ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_LSHR_B32: 2556de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2557ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpcode = AMDGPU::V_LSHRREV_B32_e64; 2558ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines swapOperands(Inst); 2559ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2560ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2561ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_LSHL_B64: 2562de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2563ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpcode = AMDGPU::V_LSHLREV_B64; 2564ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines swapOperands(Inst); 2565ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2566ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2567ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_ASHR_I64: 2568de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2569ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpcode = AMDGPU::V_ASHRREV_I64; 2570ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines swapOperands(Inst); 2571ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2572ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2573ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPU::S_LSHR_B64: 2574de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { 2575ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines NewOpcode = AMDGPU::V_LSHRREV_B64; 2576ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines swapOperands(Inst); 2577ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 2578ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines break; 2579ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 2580f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::S_ABS_I32: 2581f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar lowerScalarAbs(Worklist, Inst); 2582de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.eraseFromParent(); 2583f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 2584f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2585de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_SCC0: 2586de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPU::S_CBRANCH_SCC1: 2587de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Clear unused bits of vcc 2588de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64), 2589de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar AMDGPU::VCC) 2590de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(AMDGPU::EXEC) 2591de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(AMDGPU::VCC); 2592de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar break; 2593de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 259436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_BFE_U64: 259536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case AMDGPU::S_BFM_B64: 259636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines llvm_unreachable("Moving this op to VALU not implemented"); 259736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 259836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 259936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { 260036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We cannot move this instruction to the VALU, so we should try to 260136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // legalize its operands instead. 260236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines legalizeOperands(Inst); 260336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines continue; 260436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 2605b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2606b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard // Use the new VALU Opcode. 2607b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard const MCInstrDesc &NewDesc = get(NewOpcode); 2608de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.setDesc(NewDesc); 2609b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2610836c5133c66edecedeaa79448964b4c103f99271Matt Arsenault // Remove any references to SCC. Vector instructions can't read from it, and 2611836c5133c66edecedeaa79448964b4c103f99271Matt Arsenault // We're just about to add the implicit use / defs of VCC, and we don't want 2612836c5133c66edecedeaa79448964b4c103f99271Matt Arsenault // both. 2613de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (unsigned i = Inst.getNumOperands() - 1; i > 0; --i) { 2614de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Op = Inst.getOperand(i); 2615de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Op.isReg() && Op.getReg() == AMDGPU::SCC) { 2616de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.RemoveOperand(i); 2617de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar addSCCDefUsersToVALUWorklist(Inst, Worklist); 2618de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2619836c5133c66edecedeaa79448964b4c103f99271Matt Arsenault } 2620836c5133c66edecedeaa79448964b4c103f99271Matt Arsenault 2621dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { 2622dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // We are converting these to a BFE, so we need to add the missing 2623dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // operands for the size and offset. 2624dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; 2625de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addOperand(MachineOperand::CreateImm(0)); 2626de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addOperand(MachineOperand::CreateImm(Size)); 2627dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2628c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { 2629c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // The VALU version adds the second operand to the result, so insert an 2630c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines // extra 0 operand. 2631de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addOperand(MachineOperand::CreateImm(0)); 2632b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2633b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2634de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addImplicitDefUseOperands(*Inst.getParent()->getParent()); 2635dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2636dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { 2637de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &OffsetWidthOp = Inst.getOperand(2); 2638dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // If we need to move this to VGPRs, we need to unpack the second operand 2639dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // back into the 2 separate ones for bit offset and width. 2640dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines assert(OffsetWidthOp.isImm() && 2641dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines "Scalar BFE is only implemented for constant width and offset"); 2642dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines uint32_t Imm = OffsetWidthOp.getImm(); 2643dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2644dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 2645dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 2646de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.RemoveOperand(2); // Remove old immediate. 2647de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addOperand(MachineOperand::CreateImm(Offset)); 2648de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Inst.addOperand(MachineOperand::CreateImm(BitWidth)); 2649b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2650b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2651de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar bool HasDst = Inst.getOperand(0).isReg() && Inst.getOperand(0).isDef(); 2652de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned NewDstReg = AMDGPU::NoRegister; 2653de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (HasDst) { 2654de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Update the destination register class. 2655de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst); 2656de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (!NewDstRC) 2657de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar continue; 2658b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2659de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned DstReg = Inst.getOperand(0).getReg(); 2660de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar NewDstReg = MRI.createVirtualRegister(NewDstRC); 2661de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MRI.replaceRegWith(DstReg, NewDstReg); 2662de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2663b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2664dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Legalize the operands 2665dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines legalizeOperands(Inst); 2666dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2667de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (HasDst) 2668de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist); 2669b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard } 2670b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard} 2671b52bf6a3b31596a309f4b12884522e9b4a344654Tom Stellard 2672f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarvoid SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist, 2673de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &Inst) const { 2674de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *Inst.getParent(); 2675f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2676f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineBasicBlock::iterator MII = Inst; 2677de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = Inst.getDebugLoc(); 2678f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2679de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Dest = Inst.getOperand(0); 2680de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src = Inst.getOperand(1); 2681f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2682f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2683f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2684f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg) 2685f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addImm(0) 2686f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(Src.getReg()); 2687f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2688f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg) 2689f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(Src.getReg()) 2690f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar .addReg(TmpReg); 2691f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2692f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MRI.replaceRegWith(Dest.getReg(), ResultReg); 2693f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2694f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 2695f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2696c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesvoid SIInstrInfo::splitScalar64BitUnaryOp( 2697de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst, 2698de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opcode) const { 2699de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *Inst.getParent(); 2700c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2701c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2702de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Dest = Inst.getOperand(0); 2703de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src0 = Inst.getOperand(1); 2704de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = Inst.getDebugLoc(); 2705c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2706c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineBasicBlock::iterator MII = Inst; 2707c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2708c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const MCInstrDesc &InstDesc = get(Opcode); 2709c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const TargetRegisterClass *Src0RC = Src0.isReg() ? 2710c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MRI.getRegClass(Src0.getReg()) : 2711c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines &AMDGPU::SGPR_32RegClass; 2712c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2713c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 2714c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2715c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2716c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines AMDGPU::sub0, Src0SubRC); 2717c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2718c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2719f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2720f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 2721c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2722f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2723f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2724c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addOperand(SrcReg0Sub0); 2725c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2726c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 2727c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines AMDGPU::sub1, Src0SubRC); 2728c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2729f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2730f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2731c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addOperand(SrcReg0Sub1); 2732c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2733f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 2734c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 2735c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addReg(DestSub0) 2736c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addImm(AMDGPU::sub0) 2737c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addReg(DestSub1) 2738c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addImm(AMDGPU::sub1); 2739c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2740c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MRI.replaceRegWith(Dest.getReg(), FullDestReg); 2741c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2742f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We don't need to legalizeOperands here because for a single operand, src0 2743f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // will support any kind of input. 2744f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2745f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Move all users of this moved value. 2746f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 2747c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 2748c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2749c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hinesvoid SIInstrInfo::splitScalar64BitBinaryOp( 2750de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst, 2751de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opcode) const { 2752de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *Inst.getParent(); 275336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 275436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2755de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Dest = Inst.getOperand(0); 2756de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src0 = Inst.getOperand(1); 2757de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src1 = Inst.getOperand(2); 2758de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = Inst.getDebugLoc(); 275936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 276036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineBasicBlock::iterator MII = Inst; 276136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 276236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const MCInstrDesc &InstDesc = get(Opcode); 276336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *Src0RC = Src0.isReg() ? 276436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MRI.getRegClass(Src0.getReg()) : 276536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines &AMDGPU::SGPR_32RegClass; 276636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 276736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); 276836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *Src1RC = Src1.isReg() ? 276936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MRI.getRegClass(Src1.getReg()) : 277036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines &AMDGPU::SGPR_32RegClass; 277136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 277236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); 277336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 277436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 277536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AMDGPU::sub0, Src0SubRC); 277636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand SrcReg1Sub0 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 277736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AMDGPU::sub0, Src1SubRC); 277836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 277936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); 2780f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); 2781f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); 278236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2783f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC); 2784de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) 2785de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(SrcReg0Sub0) 2786de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(SrcReg1Sub0); 278736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 278836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, 278936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AMDGPU::sub1, Src0SubRC); 279036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, 279136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AMDGPU::sub1, Src1SubRC); 279236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2793f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC); 2794de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &HiHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub1) 2795de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(SrcReg0Sub1) 2796de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addOperand(SrcReg1Sub1); 279736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2798f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC); 279936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) 280036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addReg(DestSub0) 280136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(AMDGPU::sub0) 280236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addReg(DestSub1) 280336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(AMDGPU::sub1); 280436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 280536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MRI.replaceRegWith(Dest.getReg(), FullDestReg); 280636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 280736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Try to legalize the operands in case we need to swap the order to keep it 280836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // valid. 2809f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOperands(LoHalf); 2810f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar legalizeOperands(HiHalf); 2811f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2812f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Move all users of this moved vlaue. 2813f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist); 281436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 281536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 2816de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::splitScalar64BitBCNT( 2817de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SmallVectorImpl<MachineInstr *> &Worklist, MachineInstr &Inst) const { 2818de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *Inst.getParent(); 2819c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 2820c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2821c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineBasicBlock::iterator MII = Inst; 2822de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = Inst.getDebugLoc(); 2823c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2824de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Dest = Inst.getOperand(0); 2825de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src = Inst.getOperand(1); 2826c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2827ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e64); 2828c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const TargetRegisterClass *SrcRC = Src.isReg() ? 2829c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MRI.getRegClass(Src.getReg()) : 2830c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines &AMDGPU::SGPR_32RegClass; 2831c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2832c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2833c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 2834c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2835c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); 2836c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2837c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2838c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines AMDGPU::sub0, SrcSubRC); 2839c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, 2840c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines AMDGPU::sub1, SrcSubRC); 2841c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2842f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MII, DL, InstDesc, MidReg) 2843c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addOperand(SrcRegSub0) 2844c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addImm(0); 2845c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2846f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar BuildMI(MBB, MII, DL, InstDesc, ResultReg) 2847c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addOperand(SrcRegSub1) 2848c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines .addReg(MidReg); 2849c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2850c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines MRI.replaceRegWith(Dest.getReg(), ResultReg); 2851c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 2852f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // We don't need to legalize operands here. src0 for etiher instruction can be 2853f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // an SGPR, and the second input is unused or determined here. 2854f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 2855c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines} 2856c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines 285737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist, 2858de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &Inst) const { 2859de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineBasicBlock &MBB = *Inst.getParent(); 286037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); 286137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MachineBasicBlock::iterator MII = Inst; 2862de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar DebugLoc DL = Inst.getDebugLoc(); 286337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2864de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Dest = Inst.getOperand(0); 2865de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar uint32_t Imm = Inst.getOperand(2).getImm(); 286637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. 286737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. 286837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 286937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines (void) Offset; 287037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Only sext_inreg cases handled. 2872de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(Inst.getOpcode() == AMDGPU::S_BFE_I64 && BitWidth <= 32 && 2873de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Offset == 0 && "Not implemented"); 287437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 287537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (BitWidth < 32) { 287637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned MidRegLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 287737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned MidRegHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 287837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 287937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 288037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(MBB, MII, DL, get(AMDGPU::V_BFE_I32), MidRegLo) 2881de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addReg(Inst.getOperand(1).getReg(), 0, AMDGPU::sub0) 2882de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(0) 2883de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar .addImm(BitWidth); 288437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 288537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e32), MidRegHi) 288637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(31) 288737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(MidRegLo); 288837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 288937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 289037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(MidRegLo) 289137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(AMDGPU::sub0) 289237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(MidRegHi) 289337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(AMDGPU::sub1); 289437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 289537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MRI.replaceRegWith(Dest.getReg(), ResultReg); 2896f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 289737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return; 289837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 289937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2900de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineOperand &Src = Inst.getOperand(1); 290137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 290237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass); 290337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 290437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(MBB, MII, DL, get(AMDGPU::V_ASHRREV_I32_e64), TmpReg) 290537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(31) 290637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(Src.getReg(), 0, AMDGPU::sub0); 290737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 290837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), ResultReg) 290937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(Src.getReg(), 0, AMDGPU::sub0) 291037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(AMDGPU::sub0) 291137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addReg(TmpReg) 291237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines .addImm(AMDGPU::sub1); 291337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 291437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MRI.replaceRegWith(Dest.getReg(), ResultReg); 2915f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); 291637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 291737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2918f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarvoid SIInstrInfo::addUsersToMoveToVALUWorklist( 2919f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned DstReg, 2920f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineRegisterInfo &MRI, 2921f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar SmallVectorImpl<MachineInstr *> &Worklist) const { 2922f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg), 2923f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar E = MRI.use_end(); I != E; ++I) { 2924f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar MachineInstr &UseMI = *I->getParent(); 2925f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!canReadVGPR(UseMI, I.getOperandNo())) { 2926f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Worklist.push_back(&UseMI); 2927dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2928dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2929f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 2930dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2931de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarvoid SIInstrInfo::addSCCDefUsersToVALUWorklist( 2932de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar MachineInstr &SCCDefInst, SmallVectorImpl<MachineInstr *> &Worklist) const { 2933de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // This assumes that all the users of SCC are in the same block 2934de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // as the SCC def. 2935de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar for (MachineInstr &MI : 2936de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar llvm::make_range(MachineBasicBlock::iterator(SCCDefInst), 2937de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SCCDefInst.getParent()->end())) { 2938de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Exit if we find another SCC def. 2939de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.findRegisterDefOperandIdx(AMDGPU::SCC) != -1) 2940de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return; 2941de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2942de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (MI.findRegisterUseOperandIdx(AMDGPU::SCC) != -1) 2943de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Worklist.push_back(&MI); 2944de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 2945de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 2946de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 2947f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainarconst TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass( 2948f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const MachineInstr &Inst) const { 2949f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0); 2950f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2951f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar switch (Inst.getOpcode()) { 2952f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // For target instructions, getOpRegClass just returns the virtual register 2953f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // class associated with the operand, so we need to find an equivalent VGPR 2954f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // register class in order to move the instruction to the VALU. 2955f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::COPY: 2956f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::PHI: 2957f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::REG_SEQUENCE: 2958f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar case AMDGPU::INSERT_SUBREG: 2959f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (RI.hasVGPRs(NewDstRC)) 2960f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return nullptr; 2961f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 2962f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); 2963f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!NewDstRC) 2964f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return nullptr; 2965f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return NewDstRC; 2966f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar default: 2967f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return NewDstRC; 2968dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 2969dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 2970dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 2971f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar// Find the one SGPR operand we are allowed to use. 2972de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::findUsedSGPR(const MachineInstr &MI, 297337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int OpIndices[3]) const { 2974de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MCInstrDesc &Desc = MI.getDesc(); 297537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 297637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Find the one SGPR operand we are allowed to use. 2977f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // 297837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // First we need to consider the instruction's operand requirements before 297937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // legalizing. Some operands are required to be SGPRs, such as implicit uses 298037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // of VCC, but we are still bound by the constant bus requirement to only use 298137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // one. 298237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // 298337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // If the operand's class is an SGPR, we can never move it. 298437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2985de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned SGPRReg = findImplicitSGPRRead(MI); 2986f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (SGPRReg != AMDGPU::NoRegister) 2987f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return SGPRReg; 298837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 298937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned UsedSGPRs[3] = { AMDGPU::NoRegister }; 2990de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 299137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 299237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0; i < 3; ++i) { 299337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Idx = OpIndices[i]; 299437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Idx == -1) 299537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines break; 299637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2997de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineOperand &MO = MI.getOperand(Idx); 2998f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (!MO.isReg()) 2999f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar continue; 300037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3001f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Is this operand statically required to be an SGPR based on the operand 3002f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // constraints? 3003f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass); 3004f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar bool IsRequiredSGPR = RI.isSGPRClass(OpRC); 3005f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (IsRequiredSGPR) 3006f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return MO.getReg(); 3007f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3008f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If this could be a VGPR or an SGPR, Check the dynamic register class. 3009f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar unsigned Reg = MO.getReg(); 3010f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar const TargetRegisterClass *RegRC = MRI.getRegClass(Reg); 3011f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (RI.isSGPRClass(RegRC)) 3012f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar UsedSGPRs[i] = Reg; 301337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 301437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 301537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // We don't have a required SGPR operand, so we have a bit more freedom in 301637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // selecting operands to move. 301737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 301837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Try to select the most used SGPR. If an SGPR is equal to one of the 301937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // others, we choose that. 302037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // 302137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // e.g. 302237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // V_FMA_F32 v0, s0, s0, s0 -> No moves 302337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // V_FMA_F32 v0, s0, s1, s0 -> Move s1 302437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3025f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // TODO: If some of the operands are 64-bit SGPRs and some 32, we should 3026f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // prefer those. 3027f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 302837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UsedSGPRs[0] != AMDGPU::NoRegister) { 302937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2]) 303037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SGPRReg = UsedSGPRs[0]; 303137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 303237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 303337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) { 303437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (UsedSGPRs[1] == UsedSGPRs[2]) 303537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SGPRReg = UsedSGPRs[1]; 303637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 303737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 303837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return SGPRReg; 303937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 304037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 304137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen HinesMachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, 304237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned OperandName) const { 304337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); 304437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Idx == -1) 304537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return nullptr; 304637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 304737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return &MI.getOperand(Idx); 304837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 3049ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3050ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesuint64_t SIInstrInfo::getDefaultRsrcDataFormat() const { 3051ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT; 3052f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar if (ST.isAmdHsaOS()) { 3053ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines RsrcDataFormat |= (1ULL << 56); 3054ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 3055de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) 3056de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // Set MTYPE = 2 3057de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar RsrcDataFormat |= (2ULL << 59); 3058f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar } 3059f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3060ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return RsrcDataFormat; 3061ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 3062f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3063f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainaruint64_t SIInstrInfo::getScratchRsrcWords23() const { 3064f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar uint64_t Rsrc23 = getDefaultRsrcDataFormat() | 3065f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar AMDGPU::RSRC_TID_ENABLE | 3066f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 0xffffffff; // Size; 3067f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3068de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1; 3069de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3070de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) | 3071de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // IndexStride = 64 3072de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar (UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT); 3073de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3074f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17]. 3075f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar // Clear them unless we want a huge stride. 3076de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) 3077f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar Rsrc23 &= ~AMDGPU::RSRC_DATA_FORMAT; 3078f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar 3079f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar return Rsrc23; 3080f3ef5332fa3f4d5ec72c178a2b19dac363a19383Pirama Arumuga Nainar} 3081de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3082de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::isLowLatencyInstruction(const MachineInstr &MI) const { 3083de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = MI.getOpcode(); 3084de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3085de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return isSMRD(Opc); 3086de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3087de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3088de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarbool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const { 3089de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = MI.getOpcode(); 3090de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3091de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); 3092de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3093de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3094de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarunsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { 3095de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned Opc = MI.getOpcode(); 3096de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); 3097de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar unsigned DescSize = Desc.getSize(); 3098de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3099de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // If we have a definitive size, we can use it. Otherwise we need to inspect 3100de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // the operands to know the size. 3101de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (DescSize == 8 || DescSize == 4) 3102de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return DescSize; 3103de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3104de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar assert(DescSize == 0); 3105de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3106de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // 4-byte instructions may have a 32-bit literal encoded after them. Check 3107de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // operands that coud ever be literals. 3108de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isVALU(MI) || isSALU(MI)) { 3109de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3110de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Src0Idx == -1) 3111de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 4; // No operands. 3112de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3113de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isLiteralConstant(MI.getOperand(Src0Idx), getOpSize(MI, Src0Idx))) 3114de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 8; 3115de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3116de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 3117de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (Src1Idx == -1) 3118de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 4; 3119de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3120de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar if (isLiteralConstant(MI.getOperand(Src1Idx), getOpSize(MI, Src1Idx))) 3121de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 8; 3122de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3123de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 4; 3124de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3125de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3126de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (Opc) { 3127de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case TargetOpcode::IMPLICIT_DEF: 3128de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case TargetOpcode::KILL: 3129de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case TargetOpcode::DBG_VALUE: 3130de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case TargetOpcode::BUNDLE: 3131de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case TargetOpcode::EH_LABEL: 3132de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return 0; 3133de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case TargetOpcode::INLINEASM: { 3134de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const MachineFunction *MF = MI.getParent()->getParent(); 3135de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const char *AsmStr = MI.getOperand(0).getSymbolName(); 3136de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo()); 3137de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3138de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar default: 3139de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar llvm_unreachable("unable to find instruction size"); 3140de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar } 3141de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3142de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3143de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarArrayRef<std::pair<int, const char *>> 3144de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarSIInstrInfo::getSerializableTargetIndices() const { 3145de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar static const std::pair<int, const char *> TargetIndices[] = { 3146de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"}, 3147de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"}, 3148de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"}, 3149de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"}, 3150de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}}; 3151de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return makeArrayRef(TargetIndices); 3152de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3153de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3154de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The 3155de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// post-RA version of misched uses CreateTargetMIHazardRecognizer. 3156de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarScheduleHazardRecognizer * 3157de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarSIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, 3158de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar const ScheduleDAG *DAG) const { 3159de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return new GCNHazardRecognizer(DAG->MF); 3160de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3161de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 3162de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer 3163de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar/// pass. 3164de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarScheduleHazardRecognizer * 3165de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarSIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const { 3166de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return new GCNHazardRecognizer(MF); 3167de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar} 3168