1//===----- PPCQPXLoadSplat.cpp - QPX Load Splat Simplification ------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// The QPX vector registers overlay the scalar floating-point registers, and
11// any scalar floating-point loads splat their value across all vector lanes.
12// Thus, if we have a scalar load followed by a splat, we can remove the splat
13// (i.e. replace the load with a load-and-splat pseudo instruction).
14//
15// This pass must run after anything that might do store-to-load forwarding.
16//
17//===----------------------------------------------------------------------===//
18
19#include "PPC.h"
20#include "PPCInstrBuilder.h"
21#include "PPCInstrInfo.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/CodeGen/MachineFunctionPass.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Target/TargetMachine.h"
27#include "llvm/Target/TargetSubtargetInfo.h"
28using namespace llvm;
29
30#define DEBUG_TYPE "ppc-qpx-load-splat"
31
32STATISTIC(NumSimplified, "Number of QPX load splats simplified");
33
34namespace llvm {
35  void initializePPCQPXLoadSplatPass(PassRegistry&);
36}
37
38namespace {
39  struct PPCQPXLoadSplat : public MachineFunctionPass {
40    static char ID;
41    PPCQPXLoadSplat() : MachineFunctionPass(ID) {
42      initializePPCQPXLoadSplatPass(*PassRegistry::getPassRegistry());
43    }
44
45    bool runOnMachineFunction(MachineFunction &Fn) override;
46
47    const char *getPassName() const override {
48      return "PowerPC QPX Load Splat Simplification";
49    }
50  };
51  char PPCQPXLoadSplat::ID = 0;
52}
53
54INITIALIZE_PASS(PPCQPXLoadSplat, "ppc-qpx-load-splat",
55                "PowerPC QPX Load Splat Simplification",
56                false, false)
57
58FunctionPass *llvm::createPPCQPXLoadSplatPass() {
59  return new PPCQPXLoadSplat();
60}
61
62bool PPCQPXLoadSplat::runOnMachineFunction(MachineFunction &MF) {
63  if (skipFunction(*MF.getFunction()))
64    return false;
65
66  bool MadeChange = false;
67  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
68
69  for (auto MFI = MF.begin(), MFIE = MF.end(); MFI != MFIE; ++MFI) {
70    MachineBasicBlock *MBB = &*MFI;
71    SmallVector<MachineInstr *, 4> Splats;
72
73    for (auto MBBI = MBB->rbegin(); MBBI != MBB->rend(); ++MBBI) {
74      MachineInstr *MI = &*MBBI;
75
76      if (MI->hasUnmodeledSideEffects() || MI->isCall()) {
77        Splats.clear();
78        continue;
79      }
80
81      // We're looking for a sequence like this:
82      // %F0<def> = LFD 0, %X3<kill>, %QF0<imp-def>; mem:LD8[%a](tbaa=!2)
83      // %QF1<def> = QVESPLATI %QF0<kill>, 0, %RM<imp-use>
84
85      for (auto SI = Splats.begin(); SI != Splats.end();) {
86        MachineInstr *SMI = *SI;
87        unsigned SplatReg = SMI->getOperand(0).getReg();
88        unsigned SrcReg = SMI->getOperand(1).getReg();
89
90        if (MI->modifiesRegister(SrcReg, TRI)) {
91          switch (MI->getOpcode()) {
92          default:
93            SI = Splats.erase(SI);
94            continue;
95          case PPC::LFS:
96          case PPC::LFD:
97          case PPC::LFSU:
98          case PPC::LFDU:
99          case PPC::LFSUX:
100          case PPC::LFDUX:
101          case PPC::LFSX:
102          case PPC::LFDX:
103          case PPC::LFIWAX:
104          case PPC::LFIWZX:
105            if (SplatReg != SrcReg) {
106              // We need to change the load to define the scalar subregister of
107              // the QPX splat source register.
108              unsigned SubRegIndex =
109                TRI->getSubRegIndex(SrcReg, MI->getOperand(0).getReg());
110              unsigned SplatSubReg = TRI->getSubReg(SplatReg, SubRegIndex);
111
112              // Substitute both the explicit defined register, and also the
113              // implicit def of the containing QPX register.
114              MI->getOperand(0).setReg(SplatSubReg);
115              MI->substituteRegister(SrcReg, SplatReg, 0, *TRI);
116            }
117
118            SI = Splats.erase(SI);
119
120            // If SMI is directly after MI, then MBBI's base iterator is
121            // pointing at SMI.  Adjust MBBI around the call to erase SMI to
122            // avoid invalidating MBBI.
123            ++MBBI;
124            SMI->eraseFromParent();
125            --MBBI;
126
127            ++NumSimplified;
128            MadeChange = true;
129            continue;
130          }
131        }
132
133        // If this instruction defines the splat register, then we cannot move
134        // the previous definition above it. If it reads from the splat
135        // register, then it must already be alive from some previous
136        // definition, and if the splat register is different from the source
137        // register, then this definition must not be the load for which we're
138        // searching.
139        if (MI->modifiesRegister(SplatReg, TRI) ||
140            (SrcReg != SplatReg &&
141             MI->readsRegister(SplatReg, TRI))) {
142          SI = Splats.erase(SI);
143          continue;
144        }
145
146        ++SI;
147      }
148
149      if (MI->getOpcode() != PPC::QVESPLATI &&
150          MI->getOpcode() != PPC::QVESPLATIs &&
151          MI->getOpcode() != PPC::QVESPLATIb)
152        continue;
153      if (MI->getOperand(2).getImm() != 0)
154        continue;
155
156      // If there are other uses of the scalar value after this, replacing
157      // those uses might be non-trivial.
158      if (!MI->getOperand(1).isKill())
159        continue;
160
161      Splats.push_back(MI);
162    }
163  }
164
165  return MadeChange;
166}
167