1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//                     The LLVM Compiler Infrastructure
4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source
6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details.
7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file
11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \brief Implementation of the TargetInstrInfo class that is common to all
12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// AMD GPUs.
13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
16f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUInstrInfo.h"
17f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPURegisterInfo.h"
18f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUTargetMachine.h"
19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineFrameInfo.h"
20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h"
21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h"
22f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
23dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesusing namespace llvm;
24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
25354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzka#define GET_INSTRINFO_CTOR_DTOR
265e48a0e9ae2365a130dd1ec2e0b4beb337ab79e0Tom Stellard#define GET_INSTRINFO_NAMED_OPS
27f767018b1048f228b0c2a71d7e4008750aff0ef5Christian Konig#define GET_INSTRMAP_INFO
28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "AMDGPUGenInstrInfo.inc"
29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
30354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzka// Pin the vtable to this file.
31354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzkavoid AMDGPUInstrInfo::anchor() {}
32354362524a72b3fa43a6c09380b7ae3b2380cbbaJuergen Ributzka
33de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga NainarAMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
34de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
35f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
3637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool AMDGPUInstrInfo::enableClusterLoads() const {
3737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return true;
3837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
3937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
4037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
4137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// the first 16 loads will be interleaved with the stores, and the next 16 will
4237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// be clustered as expected. It should really split into 2 16 store batches.
4337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//
4437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// Loads are clustered until this returns false, rather than trying to schedule
4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// groups of stores. This also means we have to deal with saying different
4637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// address space loads should be clustered, and ones which might cause bank
4737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// conflicts.
4837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//
4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This might be deprecated so it might not be worth that much effort to fix.
5037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
5137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                              int64_t Offset0, int64_t Offset1,
5237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                              unsigned NumLoads) const {
5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(Offset1 > Offset0 &&
5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         "Second offset should be larger than first offset!");
5537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // If we have less than 16 loads in a row, and the offsets are within 64
5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // bytes, then schedule together.
5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
5837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // A cacheline is 64 bytes (for global memory).
5937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
60f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
61f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
620f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellardint AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
630f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  switch (Channels) {
640f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  default: return Opcode;
650f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
660f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
670f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
680f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  }
690f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard}
70dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
71de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
72de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarenum SIEncodingFamily {
73de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  SI = 0,
74de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  VI = 1
75de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar};
76de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
7837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// header files, so we need to wrap it in a function that takes unsigned
79dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// instead.
80dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace llvm {
81dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace AMDGPU {
82ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesstatic int getMCOpcode(uint16_t Opcode, unsigned Gen) {
83de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
84dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
85dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
87ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
88de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainarstatic SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
89de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  switch (ST.getGeneration()) {
90de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case AMDGPUSubtarget::SOUTHERN_ISLANDS:
91de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case AMDGPUSubtarget::SEA_ISLANDS:
92de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return SIEncodingFamily::SI;
93ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  case AMDGPUSubtarget::VOLCANIC_ISLANDS:
94de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return SIEncodingFamily::VI;
95de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
96de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  // FIXME: This should never be called for r600 GPUs.
97de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case AMDGPUSubtarget::R600:
98de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case AMDGPUSubtarget::R700:
99de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case AMDGPUSubtarget::EVERGREEN:
100de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  case AMDGPUSubtarget::NORTHERN_ISLANDS:
101de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar    return SIEncodingFamily::SI;
102ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  }
103de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar
104de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  llvm_unreachable("Unknown subtarget generation!");
105ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
106ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
107ebe69fe11e48d322045d5949c83283927a0d790bStephen Hinesint AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
108de2d8694e25a814696358e95141f4b1aa4d8847ePirama Arumuga Nainar  int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST));
109ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
110ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // -1 means that Opcode is already a native instruction.
111ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (MCOp == -1)
112ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return Opcode;
113ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
114ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // (uint16_t)-1 means that Opcode is a pseudo instruction that has
115ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  // no encoding in the given subtarget generation.
116ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  if (MCOp == (uint16_t)-1)
117ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines    return -1;
118ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines
119ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  return MCOp;
120ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines}
121