AMDGPUInstrInfo.cpp revision de2d8694e25a814696358e95141f4b1aa4d8847e
1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \brief Implementation of the TargetInstrInfo class that is common to all 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// AMD GPUs. 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUInstrInfo.h" 17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPURegisterInfo.h" 18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUTargetMachine.h" 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineFrameInfo.h" 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 22f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 23dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesusing namespace llvm; 24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 25354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzka#define GET_INSTRINFO_CTOR_DTOR 265e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard#define GET_INSTRINFO_NAMED_OPS 27f767018b1048f228b0c2a71d7e4008750aff0ef5Christian Konig#define GET_INSTRMAP_INFO 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUGenInstrInfo.inc" 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 30354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzka// Pin the vtable to this file. 31354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzkavoid AMDGPUInstrInfo::anchor() {} 32354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzka 33de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarAMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) 34de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} 35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 3637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool AMDGPUInstrInfo::enableClusterLoads() const { 3737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 3837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 3937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 4037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// FIXME: This behaves strangely. If, for example, you have 32 load + stores, 4137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// the first 16 loads will be interleaved with the stores, and the next 16 will 4237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// be clustered as expected. It should really split into 2 16 store batches. 4337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// 4437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// Loads are clustered until this returns false, rather than trying to schedule 4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// groups of stores. This also means we have to deal with saying different 4637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// address space loads should be clustered, and ones which might cause bank 4737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// conflicts. 4837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// 4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This might be deprecated so it might not be worth that much effort to fix. 5037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, 5137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines int64_t Offset0, int64_t Offset1, 5237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned NumLoads) const { 5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(Offset1 > Offset0 && 5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "Second offset should be larger than first offset!"); 5537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // If we have less than 16 loads in a row, and the offsets are within 64 5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // bytes, then schedule together. 5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 5837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // A cacheline is 64 bytes (for global memory). 5937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return (NumLoads <= 16 && (Offset1 - Offset0) < 64); 60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 620f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellardint AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { 630f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard switch (Channels) { 640f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard default: return Opcode; 650f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1); 660f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2); 670f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); 680f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard } 690f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard} 70dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 71de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td 72de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarenum SIEncodingFamily { 73de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar SI = 0, 74de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar VI = 1 75de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar}; 76de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// Wrapper for Tablegen'd function. enum Subtarget is not defined in any 7837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// header files, so we need to wrap it in a function that takes unsigned 79dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// instead. 80dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace llvm { 81dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace AMDGPU { 82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic int getMCOpcode(uint16_t Opcode, unsigned Gen) { 83de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 84dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 85dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines} 87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 88de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { 89de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar switch (ST.getGeneration()) { 90de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPUSubtarget::SOUTHERN_ISLANDS: 91de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPUSubtarget::SEA_ISLANDS: 92de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return SIEncodingFamily::SI; 93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines case AMDGPUSubtarget::VOLCANIC_ISLANDS: 94de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return SIEncodingFamily::VI; 95de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 96de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar // FIXME: This should never be called for r600 GPUs. 97de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPUSubtarget::R600: 98de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPUSubtarget::R700: 99de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPUSubtarget::EVERGREEN: 100de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar case AMDGPUSubtarget::NORTHERN_ISLANDS: 101de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar return SIEncodingFamily::SI; 102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines } 103de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar 104de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar llvm_unreachable("Unknown subtarget generation!"); 105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesint AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { 108de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST)); 109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // -1 means that Opcode is already a native instruction. 111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MCOp == -1) 112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return Opcode; 113ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 114ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // (uint16_t)-1 means that Opcode is a pseudo instruction that has 115ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines // no encoding in the given subtarget generation. 116ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines if (MCOp == (uint16_t)-1) 117ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return -1; 118ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines 119ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines return MCOp; 120ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines} 121