1//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// Copies from VGPR to SGPR registers are illegal and the register coalescer
12/// will sometimes generate these illegal copies in situations like this:
13///
14///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
15///
16/// BB0:
17///   %vreg0 <sgpr> = SCALAR_INST
18///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
19///    ...
20///    BRANCH %cond BB1, BB2
21///  BB1:
22///    %vreg2 <vgpr> = VECTOR_INST
23///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
24///  BB2:
25///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
26///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
27///
28///
29/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
30/// code will look like this:
31///
32/// BB0:
33///   %vreg0 <sgpr> = SCALAR_INST
34///    ...
35///    BRANCH %cond BB1, BB2
36/// BB1:
37///   %vreg2 <vgpr> = VECTOR_INST
38///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
39/// BB2:
40///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
41///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
42///
43/// Now that the result of the PHI instruction is an SGPR, the register
44/// allocator is now forced to constrain the register class of %vreg3 to
45/// <sgpr> so we end up with final code like this:
46///
47/// BB0:
48///   %vreg0 <sgpr> = SCALAR_INST
49///    ...
50///    BRANCH %cond BB1, BB2
51/// BB1:
52///   %vreg2 <vgpr> = VECTOR_INST
53///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
54/// BB2:
55///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
56///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
57///
58/// Now this code contains an illegal copy from a VGPR to an SGPR.
59///
60/// In order to avoid this problem, this pass searches for PHI instructions
61/// which define a <vsrc> register and constrains its definition class to
62/// <vgpr> if the user of the PHI's definition register is a vector instruction.
63/// If the PHI's definition class is constrained to <vgpr> then the coalescer
64/// will be unable to perform the COPY removal from the above example  which
65/// ultimately led to the creation of an illegal COPY.
66//===----------------------------------------------------------------------===//
67
68#include "AMDGPU.h"
69#include "AMDGPUSubtarget.h"
70#include "SIInstrInfo.h"
71#include "llvm/CodeGen/MachineFunctionPass.h"
72#include "llvm/CodeGen/MachineInstrBuilder.h"
73#include "llvm/CodeGen/MachineRegisterInfo.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/raw_ostream.h"
76#include "llvm/Target/TargetMachine.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "si-fix-sgpr-copies"
81
82namespace {
83
84class SIFixSGPRCopies : public MachineFunctionPass {
85public:
86  static char ID;
87
88  SIFixSGPRCopies() : MachineFunctionPass(ID) { }
89
90  bool runOnMachineFunction(MachineFunction &MF) override;
91
92  const char *getPassName() const override {
93    return "SI Fix SGPR copies";
94  }
95
96  void getAnalysisUsage(AnalysisUsage &AU) const override {
97    AU.setPreservesCFG();
98    MachineFunctionPass::getAnalysisUsage(AU);
99  }
100};
101
102} // End anonymous namespace
103
104INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
105                "SI Fix SGPR copies", false, false)
106
107char SIFixSGPRCopies::ID = 0;
108
109char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
110
111FunctionPass *llvm::createSIFixSGPRCopiesPass() {
112  return new SIFixSGPRCopies();
113}
114
115static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
116  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
117  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
118    if (!MI.getOperand(i).isReg() ||
119        !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
120      continue;
121
122    if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
123      return true;
124  }
125  return false;
126}
127
128static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
129getCopyRegClasses(const MachineInstr &Copy,
130                  const SIRegisterInfo &TRI,
131                  const MachineRegisterInfo &MRI) {
132  unsigned DstReg = Copy.getOperand(0).getReg();
133  unsigned SrcReg = Copy.getOperand(1).getReg();
134
135  const TargetRegisterClass *SrcRC =
136    TargetRegisterInfo::isVirtualRegister(SrcReg) ?
137    MRI.getRegClass(SrcReg) :
138    TRI.getPhysRegClass(SrcReg);
139
140  // We don't really care about the subregister here.
141  // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
142
143  const TargetRegisterClass *DstRC =
144    TargetRegisterInfo::isVirtualRegister(DstReg) ?
145    MRI.getRegClass(DstReg) :
146    TRI.getPhysRegClass(DstReg);
147
148  return std::make_pair(SrcRC, DstRC);
149}
150
151static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
152                             const TargetRegisterClass *DstRC,
153                             const SIRegisterInfo &TRI) {
154  return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
155}
156
157static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
158                             const TargetRegisterClass *DstRC,
159                             const SIRegisterInfo &TRI) {
160  return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
161}
162
163// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
164//
165// SGPRx = ...
166// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
167// VGPRz = COPY SGPRy
168//
169// ==>
170//
171// VGPRx = COPY SGPRx
172// VGPRz = REG_SEQUENCE VGPRx, sub0
173//
174// This exposes immediate folding opportunities when materializing 64-bit
175// immediates.
176static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
177                                        const SIRegisterInfo *TRI,
178                                        const SIInstrInfo *TII,
179                                        MachineRegisterInfo &MRI) {
180  assert(MI.isRegSequence());
181
182  unsigned DstReg = MI.getOperand(0).getReg();
183  if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
184    return false;
185
186  if (!MRI.hasOneUse(DstReg))
187    return false;
188
189  MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
190  if (!CopyUse.isCopy())
191    return false;
192
193  const TargetRegisterClass *SrcRC, *DstRC;
194  std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
195
196  if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
197    return false;
198
199  // TODO: Could have multiple extracts?
200  unsigned SubReg = CopyUse.getOperand(1).getSubReg();
201  if (SubReg != AMDGPU::NoSubRegister)
202    return false;
203
204  MRI.setRegClass(DstReg, DstRC);
205
206  // SGPRx = ...
207  // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
208  // VGPRz = COPY SGPRy
209
210  // =>
211  // VGPRx = COPY SGPRx
212  // VGPRz = REG_SEQUENCE VGPRx, sub0
213
214  MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
215
216  for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
217    unsigned SrcReg = MI.getOperand(I).getReg();
218    unsigned SrcSubReg = MI.getOperand(I).getSubReg();
219
220    const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
221    assert(TRI->isSGPRClass(SrcRC) &&
222           "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
223
224    SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
225    const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
226
227    unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
228
229    BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
230      .addOperand(MI.getOperand(I));
231
232    MI.getOperand(I).setReg(TmpReg);
233  }
234
235  CopyUse.eraseFromParent();
236  return true;
237}
238
239bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
240  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
241  MachineRegisterInfo &MRI = MF.getRegInfo();
242  const SIRegisterInfo *TRI = ST.getRegisterInfo();
243  const SIInstrInfo *TII = ST.getInstrInfo();
244
245  SmallVector<MachineInstr *, 16> Worklist;
246
247  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
248                                                  BI != BE; ++BI) {
249
250    MachineBasicBlock &MBB = *BI;
251    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
252         I != E; ++I) {
253      MachineInstr &MI = *I;
254
255      switch (MI.getOpcode()) {
256      default:
257        continue;
258      case AMDGPU::COPY: {
259        // If the destination register is a physical register there isn't really
260        // much we can do to fix this.
261        if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
262          continue;
263
264        const TargetRegisterClass *SrcRC, *DstRC;
265        std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
266        if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
267          DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
268          TII->moveToVALU(MI);
269        }
270
271        break;
272      }
273      case AMDGPU::PHI: {
274        DEBUG(dbgs() << "Fixing PHI: " << MI);
275        unsigned Reg = MI.getOperand(0).getReg();
276        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
277          break;
278
279        // If a PHI node defines an SGPR and any of its operands are VGPRs,
280        // then we need to move it to the VALU.
281        //
282        // Also, if a PHI node defines an SGPR and has all SGPR operands
283        // we must move it to the VALU, because the SGPR operands will
284        // all end up being assigned the same register, which means
285        // there is a potential for a conflict if different threads take
286        // different control flow paths.
287        //
288        // For Example:
289        //
290        // sgpr0 = def;
291        // ...
292        // sgpr1 = def;
293        // ...
294        // sgpr2 = PHI sgpr0, sgpr1
295        // use sgpr2;
296        //
297        // Will Become:
298        //
299        // sgpr2 = def;
300        // ...
301        // sgpr2 = def;
302        // ...
303        // use sgpr2
304        //
305        // FIXME: This is OK if the branching decision is made based on an
306        // SGPR value.
307        bool SGPRBranch = false;
308
309        // The one exception to this rule is when one of the operands
310        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
311        // instruction.  In this case, there we know the program will
312        // never enter the second block (the loop) without entering
313        // the first block (where the condition is computed), so there
314        // is no chance for values to be over-written.
315
316        bool HasBreakDef = false;
317        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
318          unsigned Reg = MI.getOperand(i).getReg();
319          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
320            TII->moveToVALU(MI);
321            break;
322          }
323          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
324          assert(DefInstr);
325          switch(DefInstr->getOpcode()) {
326
327          case AMDGPU::SI_BREAK:
328          case AMDGPU::SI_IF_BREAK:
329          case AMDGPU::SI_ELSE_BREAK:
330          // If we see a PHI instruction that defines an SGPR, then that PHI
331          // instruction has already been considered and should have
332          // a *_BREAK as an operand.
333          case AMDGPU::PHI:
334            HasBreakDef = true;
335            break;
336          }
337        }
338
339        if (!SGPRBranch && !HasBreakDef)
340          TII->moveToVALU(MI);
341        break;
342      }
343      case AMDGPU::REG_SEQUENCE: {
344        if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
345            !hasVGPROperands(MI, TRI)) {
346          foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
347          continue;
348        }
349
350        DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
351
352        TII->moveToVALU(MI);
353        break;
354      }
355      case AMDGPU::INSERT_SUBREG: {
356        const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
357        DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
358        Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
359        Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
360        if (TRI->isSGPRClass(DstRC) &&
361            (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
362          DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
363          TII->moveToVALU(MI);
364        }
365        break;
366      }
367      }
368    }
369  }
370
371  return true;
372}
373