1//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===---------------------------------------------------------------------===//
9//
10// This pass performs peephole optimizations to clean up ugly code
11// sequences at the MachineInstruction layer.  It runs at the end of
12// the SSA phases, following VSX swap removal.  A pass of dead code
13// elimination follows this one for quick clean-up of any dead
14// instructions introduced here.  Although we could do this as callbacks
15// from the generic peephole pass, this would have a couple of bad
16// effects:  it might remove optimization opportunities for VSX swap
17// removal, and it would miss cleanups made possible following VSX
18// swap removal.
19//
20//===---------------------------------------------------------------------===//
21
22#include "PPCInstrInfo.h"
23#include "PPC.h"
24#include "PPCInstrBuilder.h"
25#include "PPCTargetMachine.h"
26#include "llvm/CodeGen/MachineFunctionPass.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/MachineRegisterInfo.h"
29#include "llvm/Support/Debug.h"
30
31using namespace llvm;
32
33#define DEBUG_TYPE "ppc-mi-peepholes"
34
35namespace llvm {
36  void initializePPCMIPeepholePass(PassRegistry&);
37}
38
39namespace {
40
41struct PPCMIPeephole : public MachineFunctionPass {
42
43  static char ID;
44  const PPCInstrInfo *TII;
45  MachineFunction *MF;
46  MachineRegisterInfo *MRI;
47
48  PPCMIPeephole() : MachineFunctionPass(ID) {
49    initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
50  }
51
52private:
53  // Initialize class variables.
54  void initialize(MachineFunction &MFParm);
55
56  // Perform peepholes.
57  bool simplifyCode(void);
58
59  // Find the "true" register represented by SrcReg (following chains
60  // of copies and subreg_to_reg operations).
61  unsigned lookThruCopyLike(unsigned SrcReg);
62
63public:
64  // Main entry point for this pass.
65  bool runOnMachineFunction(MachineFunction &MF) override {
66    if (skipFunction(*MF.getFunction()))
67      return false;
68    initialize(MF);
69    return simplifyCode();
70  }
71};
72
73// Initialize class variables.
74void PPCMIPeephole::initialize(MachineFunction &MFParm) {
75  MF = &MFParm;
76  MRI = &MF->getRegInfo();
77  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
78  DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
79  DEBUG(MF->dump());
80}
81
82// Perform peephole optimizations.
83bool PPCMIPeephole::simplifyCode(void) {
84  bool Simplified = false;
85  MachineInstr* ToErase = nullptr;
86
87  for (MachineBasicBlock &MBB : *MF) {
88    for (MachineInstr &MI : MBB) {
89
90      // If the previous instruction was marked for elimination,
91      // remove it now.
92      if (ToErase) {
93        ToErase->eraseFromParent();
94        ToErase = nullptr;
95      }
96
97      // Ignore debug instructions.
98      if (MI.isDebugValue())
99        continue;
100
101      // Per-opcode peepholes.
102      switch (MI.getOpcode()) {
103
104      default:
105        break;
106
107      case PPC::XXPERMDI: {
108        // Perform simplifications of 2x64 vector swaps and splats.
109        // A swap is identified by an immediate value of 2, and a splat
110        // is identified by an immediate value of 0 or 3.
111        int Immed = MI.getOperand(3).getImm();
112
113        if (Immed != 1) {
114
115          // For each of these simplifications, we need the two source
116          // regs to match.  Unfortunately, MachineCSE ignores COPY and
117          // SUBREG_TO_REG, so for example we can see
118          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
119          // We have to look through chains of COPY and SUBREG_TO_REG
120          // to find the real source values for comparison.
121          unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
122          unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
123
124          if (TrueReg1 == TrueReg2
125              && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
126            MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
127
128            // If this is a splat or a swap fed by another splat, we
129            // can replace it with a copy.
130            if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
131              unsigned FeedImmed = DefMI->getOperand(3).getImm();
132              unsigned FeedReg1
133                = lookThruCopyLike(DefMI->getOperand(1).getReg());
134              unsigned FeedReg2
135                = lookThruCopyLike(DefMI->getOperand(2).getReg());
136
137              if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
138                DEBUG(dbgs()
139                      << "Optimizing splat/swap or splat/splat "
140                      "to splat/copy: ");
141                DEBUG(MI.dump());
142                BuildMI(MBB, &MI, MI.getDebugLoc(),
143                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
144                  .addOperand(MI.getOperand(1));
145                ToErase = &MI;
146                Simplified = true;
147              }
148
149              // If this is a splat fed by a swap, we can simplify modify
150              // the splat to splat the other value from the swap's input
151              // parameter.
152              else if ((Immed == 0 || Immed == 3)
153                       && FeedImmed == 2 && FeedReg1 == FeedReg2) {
154                DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
155                DEBUG(MI.dump());
156                MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
157                MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
158                MI.getOperand(3).setImm(3 - Immed);
159                Simplified = true;
160              }
161
162              // If this is a swap fed by a swap, we can replace it
163              // with a copy from the first swap's input.
164              else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
165                DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
166                DEBUG(MI.dump());
167                BuildMI(MBB, &MI, MI.getDebugLoc(),
168                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
169                  .addOperand(DefMI->getOperand(1));
170                ToErase = &MI;
171                Simplified = true;
172              }
173            }
174          }
175        }
176        break;
177      }
178      }
179    }
180
181    // If the last instruction was marked for elimination,
182    // remove it now.
183    if (ToErase) {
184      ToErase->eraseFromParent();
185      ToErase = nullptr;
186    }
187  }
188
189  return Simplified;
190}
191
192// This is used to find the "true" source register for an
193// XXPERMDI instruction, since MachineCSE does not handle the
194// "copy-like" operations (Copy and SubregToReg).  Returns
195// the original SrcReg unless it is the target of a copy-like
196// operation, in which case we chain backwards through all
197// such operations to the ultimate source register.  If a
198// physical register is encountered, we stop the search.
199unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
200
201  while (true) {
202
203    MachineInstr *MI = MRI->getVRegDef(SrcReg);
204    if (!MI->isCopyLike())
205      return SrcReg;
206
207    unsigned CopySrcReg;
208    if (MI->isCopy())
209      CopySrcReg = MI->getOperand(1).getReg();
210    else {
211      assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
212      CopySrcReg = MI->getOperand(2).getReg();
213    }
214
215    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
216      return CopySrcReg;
217
218    SrcReg = CopySrcReg;
219  }
220}
221
222} // end default namespace
223
224INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
225                      "PowerPC MI Peephole Optimization", false, false)
226INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
227                    "PowerPC MI Peephole Optimization", false, false)
228
229char PPCMIPeephole::ID = 0;
230FunctionPass*
231llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
232
233