1//===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// Copies from VGPR to SGPR registers are illegal and the register coalescer
12/// will sometimes generate these illegal copies in situations like this:
13///
14///  Register Class <vsrc> is the union of <vgpr> and <sgpr>
15///
16/// BB0:
17///   %vreg0 <sgpr> = SCALAR_INST
18///   %vreg1 <vsrc> = COPY %vreg0 <sgpr>
19///    ...
20///    BRANCH %cond BB1, BB2
21///  BB1:
22///    %vreg2 <vgpr> = VECTOR_INST
23///    %vreg3 <vsrc> = COPY %vreg2 <vgpr>
24///  BB2:
25///    %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1>
26///    %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc>
27///
28///
29/// The coalescer will begin at BB0 and eliminate its copy, then the resulting
30/// code will look like this:
31///
32/// BB0:
33///   %vreg0 <sgpr> = SCALAR_INST
34///    ...
35///    BRANCH %cond BB1, BB2
36/// BB1:
37///   %vreg2 <vgpr> = VECTOR_INST
38///   %vreg3 <vsrc> = COPY %vreg2 <vgpr>
39/// BB2:
40///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1>
41///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
42///
43/// Now that the result of the PHI instruction is an SGPR, the register
44/// allocator is now forced to constrain the register class of %vreg3 to
45/// <sgpr> so we end up with final code like this:
46///
47/// BB0:
48///   %vreg0 <sgpr> = SCALAR_INST
49///    ...
50///    BRANCH %cond BB1, BB2
51/// BB1:
52///   %vreg2 <vgpr> = VECTOR_INST
53///   %vreg3 <sgpr> = COPY %vreg2 <vgpr>
54/// BB2:
55///   %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1>
56///   %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr>
57///
58/// Now this code contains an illegal copy from a VGPR to an SGPR.
59///
60/// In order to avoid this problem, this pass searches for PHI instructions
61/// which define a <vsrc> register and constrains its definition class to
62/// <vgpr> if the user of the PHI's definition register is a vector instruction.
63/// If the PHI's definition class is constrained to <vgpr> then the coalescer
64/// will be unable to perform the COPY removal from the above example  which
65/// ultimately led to the creation of an illegal COPY.
66//===----------------------------------------------------------------------===//
67
68#include "AMDGPU.h"
69#include "AMDGPUSubtarget.h"
70#include "SIInstrInfo.h"
71#include "llvm/CodeGen/MachineFunctionPass.h"
72#include "llvm/CodeGen/MachineInstrBuilder.h"
73#include "llvm/CodeGen/MachineRegisterInfo.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/raw_ostream.h"
76#include "llvm/Target/TargetMachine.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "sgpr-copies"
81
82namespace {
83
84class SIFixSGPRCopies : public MachineFunctionPass {
85public:
86  static char ID;
87
88  SIFixSGPRCopies() : MachineFunctionPass(ID) { }
89
90  bool runOnMachineFunction(MachineFunction &MF) override;
91
92  const char *getPassName() const override {
93    return "SI Fix SGPR copies";
94  }
95
96  void getAnalysisUsage(AnalysisUsage &AU) const override {
97    AU.setPreservesCFG();
98    MachineFunctionPass::getAnalysisUsage(AU);
99  }
100};
101
102} // End anonymous namespace
103
104INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
105                "SI Fix SGPR copies", false, false)
106
107char SIFixSGPRCopies::ID = 0;
108
109char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
110
111FunctionPass *llvm::createSIFixSGPRCopiesPass() {
112  return new SIFixSGPRCopies();
113}
114
115static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
116  const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
117  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
118    if (!MI.getOperand(i).isReg() ||
119        !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg()))
120      continue;
121
122    if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg())))
123      return true;
124  }
125  return false;
126}
127
128static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
129getCopyRegClasses(const MachineInstr &Copy,
130                  const SIRegisterInfo &TRI,
131                  const MachineRegisterInfo &MRI) {
132  unsigned DstReg = Copy.getOperand(0).getReg();
133  unsigned SrcReg = Copy.getOperand(1).getReg();
134
135  const TargetRegisterClass *SrcRC =
136    TargetRegisterInfo::isVirtualRegister(SrcReg) ?
137    MRI.getRegClass(SrcReg) :
138    TRI.getPhysRegClass(SrcReg);
139
140  // We don't really care about the subregister here.
141  // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
142
143  const TargetRegisterClass *DstRC =
144    TargetRegisterInfo::isVirtualRegister(DstReg) ?
145    MRI.getRegClass(DstReg) :
146    TRI.getPhysRegClass(DstReg);
147
148  return std::make_pair(SrcRC, DstRC);
149}
150
151static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
152                             const TargetRegisterClass *DstRC,
153                             const SIRegisterInfo &TRI) {
154  return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
155}
156
157static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
158                             const TargetRegisterClass *DstRC,
159                             const SIRegisterInfo &TRI) {
160  return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
161}
162
163// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
164//
165// SGPRx = ...
166// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
167// VGPRz = COPY SGPRy
168//
169// ==>
170//
171// VGPRx = COPY SGPRx
172// VGPRz = REG_SEQUENCE VGPRx, sub0
173//
174// This exposes immediate folding opportunities when materializing 64-bit
175// immediates.
176static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
177                                        const SIRegisterInfo *TRI,
178                                        const SIInstrInfo *TII,
179                                        MachineRegisterInfo &MRI) {
180  assert(MI.isRegSequence());
181
182  unsigned DstReg = MI.getOperand(0).getReg();
183  if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
184    return false;
185
186  if (!MRI.hasOneUse(DstReg))
187    return false;
188
189  MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
190  if (!CopyUse.isCopy())
191    return false;
192
193  const TargetRegisterClass *SrcRC, *DstRC;
194  std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
195
196  if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
197    return false;
198
199  // TODO: Could have multiple extracts?
200  unsigned SubReg = CopyUse.getOperand(1).getSubReg();
201  if (SubReg != AMDGPU::NoSubRegister)
202    return false;
203
204  MRI.setRegClass(DstReg, DstRC);
205
206  // SGPRx = ...
207  // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
208  // VGPRz = COPY SGPRy
209
210  // =>
211  // VGPRx = COPY SGPRx
212  // VGPRz = REG_SEQUENCE VGPRx, sub0
213
214  MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
215
216  for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
217    unsigned SrcReg = MI.getOperand(I).getReg();
218    unsigned SrcSubReg = MI.getOperand(I).getReg();
219
220    const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
221    assert(TRI->isSGPRClass(SrcRC) &&
222           "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
223
224    SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
225    const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
226
227    unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
228
229    BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
230      .addOperand(MI.getOperand(I));
231
232    MI.getOperand(I).setReg(TmpReg);
233  }
234
235  CopyUse.eraseFromParent();
236  return true;
237}
238
239bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
240  MachineRegisterInfo &MRI = MF.getRegInfo();
241  const SIRegisterInfo *TRI =
242      static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
243  const SIInstrInfo *TII =
244      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
245
246  SmallVector<MachineInstr *, 16> Worklist;
247
248  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
249                                                  BI != BE; ++BI) {
250
251    MachineBasicBlock &MBB = *BI;
252    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
253         I != E; ++I) {
254      MachineInstr &MI = *I;
255
256      switch (MI.getOpcode()) {
257      default:
258        continue;
259      case AMDGPU::COPY: {
260        // If the destination register is a physical register there isn't really
261        // much we can do to fix this.
262        if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
263          continue;
264
265        const TargetRegisterClass *SrcRC, *DstRC;
266        std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
267        if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
268          DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
269          TII->moveToVALU(MI);
270        }
271
272        break;
273      }
274      case AMDGPU::PHI: {
275        DEBUG(dbgs() << "Fixing PHI: " << MI);
276        unsigned Reg = MI.getOperand(0).getReg();
277        if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
278          break;
279
280        // If a PHI node defines an SGPR and any of its operands are VGPRs,
281        // then we need to move it to the VALU.
282        //
283        // Also, if a PHI node defines an SGPR and has all SGPR operands
284        // we must move it to the VALU, because the SGPR operands will
285        // all end up being assigned the same register, which means
286        // there is a potential for a conflict if different threads take
287        // different control flow paths.
288        //
289        // For Example:
290        //
291        // sgpr0 = def;
292        // ...
293        // sgpr1 = def;
294        // ...
295        // sgpr2 = PHI sgpr0, sgpr1
296        // use sgpr2;
297        //
298        // Will Become:
299        //
300        // sgpr2 = def;
301        // ...
302        // sgpr2 = def;
303        // ...
304        // use sgpr2
305        //
306        // FIXME: This is OK if the branching decision is made based on an
307        // SGPR value.
308        bool SGPRBranch = false;
309
310        // The one exception to this rule is when one of the operands
311        // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
312        // instruction.  In this case, there we know the program will
313        // never enter the second block (the loop) without entering
314        // the first block (where the condition is computed), so there
315        // is no chance for values to be over-written.
316
317        bool HasBreakDef = false;
318        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
319          unsigned Reg = MI.getOperand(i).getReg();
320          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
321            TII->moveToVALU(MI);
322            break;
323          }
324          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
325          assert(DefInstr);
326          switch(DefInstr->getOpcode()) {
327
328          case AMDGPU::SI_BREAK:
329          case AMDGPU::SI_IF_BREAK:
330          case AMDGPU::SI_ELSE_BREAK:
331          // If we see a PHI instruction that defines an SGPR, then that PHI
332          // instruction has already been considered and should have
333          // a *_BREAK as an operand.
334          case AMDGPU::PHI:
335            HasBreakDef = true;
336            break;
337          }
338        }
339
340        if (!SGPRBranch && !HasBreakDef)
341          TII->moveToVALU(MI);
342        break;
343      }
344      case AMDGPU::REG_SEQUENCE: {
345        if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
346            !hasVGPROperands(MI, TRI)) {
347          foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
348          continue;
349        }
350
351        DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
352
353        TII->moveToVALU(MI);
354        break;
355      }
356      case AMDGPU::INSERT_SUBREG: {
357        const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
358        DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
359        Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
360        Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
361        if (TRI->isSGPRClass(DstRC) &&
362            (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
363          DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI);
364          TII->moveToVALU(MI);
365        }
366        break;
367      }
368      }
369    }
370  }
371
372  return true;
373}
374