1//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8/// \file
9//===----------------------------------------------------------------------===//
10//
11
12#include "AMDGPU.h"
13#include "AMDGPUSubtarget.h"
14#include "SIInstrInfo.h"
15#include "llvm/CodeGen/LiveIntervalAnalysis.h"
16#include "llvm/CodeGen/MachineDominators.h"
17#include "llvm/CodeGen/MachineFunctionPass.h"
18#include "llvm/CodeGen/MachineInstrBuilder.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/LLVMContext.h"
22#include "llvm/Support/Debug.h"
23#include "llvm/Support/raw_ostream.h"
24#include "llvm/Target/TargetMachine.h"
25
26#define DEBUG_TYPE "si-fold-operands"
27using namespace llvm;
28
29namespace {
30
31class SIFoldOperands : public MachineFunctionPass {
32public:
33  static char ID;
34
35public:
36  SIFoldOperands() : MachineFunctionPass(ID) {
37    initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
38  }
39
40  bool runOnMachineFunction(MachineFunction &MF) override;
41
42  const char *getPassName() const override {
43    return "SI Fold Operands";
44  }
45
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
47    AU.addRequired<MachineDominatorTree>();
48    AU.setPreservesCFG();
49    MachineFunctionPass::getAnalysisUsage(AU);
50  }
51};
52
53struct FoldCandidate {
54  MachineInstr *UseMI;
55  unsigned UseOpNo;
56  MachineOperand *OpToFold;
57  uint64_t ImmToFold;
58
59  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
60                UseMI(MI), UseOpNo(OpNo) {
61
62    if (FoldOp->isImm()) {
63      OpToFold = nullptr;
64      ImmToFold = FoldOp->getImm();
65    } else {
66      assert(FoldOp->isReg());
67      OpToFold = FoldOp;
68    }
69  }
70
71  bool isImm() const {
72    return !OpToFold;
73  }
74};
75
76} // End anonymous namespace.
77
78INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
79                      "SI Fold Operands", false, false)
80INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
81INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
82                    "SI Fold Operands", false, false)
83
84char SIFoldOperands::ID = 0;
85
86char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
87
88FunctionPass *llvm::createSIFoldOperandsPass() {
89  return new SIFoldOperands();
90}
91
92static bool isSafeToFold(unsigned Opcode) {
93  switch(Opcode) {
94  case AMDGPU::V_MOV_B32_e32:
95  case AMDGPU::V_MOV_B32_e64:
96  case AMDGPU::V_MOV_B64_PSEUDO:
97  case AMDGPU::S_MOV_B32:
98  case AMDGPU::S_MOV_B64:
99  case AMDGPU::COPY:
100    return true;
101  default:
102    return false;
103  }
104}
105
106static bool updateOperand(FoldCandidate &Fold,
107                          const TargetRegisterInfo &TRI) {
108  MachineInstr *MI = Fold.UseMI;
109  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
110  assert(Old.isReg());
111
112  if (Fold.isImm()) {
113    Old.ChangeToImmediate(Fold.ImmToFold);
114    return true;
115  }
116
117  MachineOperand *New = Fold.OpToFold;
118  if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
119      TargetRegisterInfo::isVirtualRegister(New->getReg())) {
120    Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
121    return true;
122  }
123
124  // FIXME: Handle physical registers.
125
126  return false;
127}
128
129static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
130                             MachineInstr *MI, unsigned OpNo,
131                             MachineOperand *OpToFold,
132                             const SIInstrInfo *TII) {
133  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
134    // Operand is not legal, so try to commute the instruction to
135    // see if this makes it possible to fold.
136    unsigned CommuteIdx0;
137    unsigned CommuteIdx1;
138    bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
139
140    if (CanCommute) {
141      if (CommuteIdx0 == OpNo)
142        OpNo = CommuteIdx1;
143      else if (CommuteIdx1 == OpNo)
144        OpNo = CommuteIdx0;
145    }
146
147    if (!CanCommute || !TII->commuteInstruction(MI))
148      return false;
149
150    if (!TII->isOperandLegal(MI, OpNo, OpToFold))
151      return false;
152  }
153
154  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
155  return true;
156}
157
158bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
159  MachineRegisterInfo &MRI = MF.getRegInfo();
160  const SIInstrInfo *TII =
161      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
162  const SIRegisterInfo &TRI = TII->getRegisterInfo();
163
164  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
165                                                  BI != BE; ++BI) {
166
167    MachineBasicBlock &MBB = *BI;
168    MachineBasicBlock::iterator I, Next;
169    for (I = MBB.begin(); I != MBB.end(); I = Next) {
170      Next = std::next(I);
171      MachineInstr &MI = *I;
172
173      if (!isSafeToFold(MI.getOpcode()))
174        continue;
175
176      unsigned OpSize = TII->getOpSize(MI, 1);
177      MachineOperand &OpToFold = MI.getOperand(1);
178      bool FoldingImm = OpToFold.isImm();
179
180      // FIXME: We could also be folding things like FrameIndexes and
181      // TargetIndexes.
182      if (!FoldingImm && !OpToFold.isReg())
183        continue;
184
185      // Folding immediates with more than one use will increase program size.
186      // FIXME: This will also reduce register usage, which may be better
187      // in some cases.  A better heuristic is needed.
188      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
189          !MRI.hasOneUse(MI.getOperand(0).getReg()))
190        continue;
191
192      // FIXME: Fold operands with subregs.
193      if (OpToFold.isReg() &&
194          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
195           OpToFold.getSubReg()))
196        continue;
197
198      std::vector<FoldCandidate> FoldList;
199      for (MachineRegisterInfo::use_iterator
200           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
201           Use != E; ++Use) {
202
203        MachineInstr *UseMI = Use->getParent();
204        const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
205
206        // FIXME: Fold operands with subregs.
207        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
208            UseOp.isImplicit())) {
209          continue;
210        }
211
212        APInt Imm;
213
214        if (FoldingImm) {
215          unsigned UseReg = UseOp.getReg();
216          const TargetRegisterClass *UseRC
217            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
218            MRI.getRegClass(UseReg) :
219            TRI.getRegClass(UseReg);
220
221          Imm = APInt(64, OpToFold.getImm());
222
223          // Split 64-bit constants into 32-bits for folding.
224          if (UseOp.getSubReg()) {
225            if (UseRC->getSize() != 8)
226              continue;
227
228            if (UseOp.getSubReg() == AMDGPU::sub0) {
229              Imm = Imm.getLoBits(32);
230            } else {
231              assert(UseOp.getSubReg() == AMDGPU::sub1);
232              Imm = Imm.getHiBits(32);
233            }
234          }
235
236          // In order to fold immediates into copies, we need to change the
237          // copy to a MOV.
238          if (UseMI->getOpcode() == AMDGPU::COPY) {
239            unsigned DestReg = UseMI->getOperand(0).getReg();
240            const TargetRegisterClass *DestRC
241              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
242              MRI.getRegClass(DestReg) :
243              TRI.getRegClass(DestReg);
244
245            unsigned MovOp = TII->getMovOpcode(DestRC);
246            if (MovOp == AMDGPU::COPY)
247              continue;
248
249            UseMI->setDesc(TII->get(MovOp));
250          }
251        }
252
253        const MCInstrDesc &UseDesc = UseMI->getDesc();
254
255        // Don't fold into target independent nodes.  Target independent opcodes
256        // don't have defined register classes.
257        if (UseDesc.isVariadic() ||
258            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
259          continue;
260
261        if (FoldingImm) {
262          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
263          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
264          continue;
265        }
266
267        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
268
269        // FIXME: We could try to change the instruction from 64-bit to 32-bit
270        // to enable more folding opportunites.  The shrink operands pass
271        // already does this.
272      }
273
274      for (FoldCandidate &Fold : FoldList) {
275        if (updateOperand(Fold, TRI)) {
276          // Clear kill flags.
277          if (!Fold.isImm()) {
278            assert(Fold.OpToFold && Fold.OpToFold->isReg());
279            Fold.OpToFold->setIsKill(false);
280          }
281          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
282                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
283        }
284      }
285    }
286  }
287  return false;
288}
289