1bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==//
2bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
3bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//                     The LLVM Compiler Infrastructure
4bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
5bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// This file is distributed under the University of Illinois Open Source
6bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// License. See LICENSE.TXT for details.
7bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
8bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//===----------------------------------------------------------------------===//
9bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
10bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// The Cortex-A15 processor employs a tracking scheme in its register renaming
11bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// in order to process each instruction's micro-ops speculatively and
12bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// out-of-order with appropriate forwarding. The ARM architecture allows VFP
13bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// instructions to read and write 32-bit S-registers.  Each S-register
14bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// corresponds to one half (upper or lower) of an overlaid 64-bit D-register.
15bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
16bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// There are several instruction patterns which can be used to provide this
17bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// capability which can provide higher performance than other, potentially more
18bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// direct patterns, specifically around when one micro-op reads a D-register
19bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// operand that has recently been written as one or more S-register results.
20bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
21bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// This file defines a pre-regalloc pass which looks for SPR producers which
22bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// are going to be used by a DPR (or QPR) consumers and creates the more
23bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// optimized access pattern.
24bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//
25bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//===----------------------------------------------------------------------===//
26bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
27bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#define DEBUG_TYPE "a15-sd-optimizer"
28bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARM.h"
29bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMBaseInstrInfo.h"
30bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMSubtarget.h"
31bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMISelLowering.h"
32bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMTargetMachine.h"
33bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
34bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/ADT/SmallPtrSet.h"
35bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/ADT/Statistic.h"
36bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineFunctionPass.h"
37bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineInstr.h"
38bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineInstrBuilder.h"
39bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineRegisterInfo.h"
40bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Support/CommandLine.h"
41bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Support/Debug.h"
42bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Support/raw_ostream.h"
43bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Target/TargetRegisterInfo.h"
44bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
45bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include <set>
46bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
47bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangausing namespace llvm;
48bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
49bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranganamespace {
50bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  struct A15SDOptimizer : public MachineFunctionPass {
51bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    static char ID;
52bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    A15SDOptimizer() : MachineFunctionPass(ID) {}
53bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
54bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    virtual bool runOnMachineFunction(MachineFunction &Fn);
55bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
56bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    virtual const char *getPassName() const {
57bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      return "ARM A15 S->D optimizer";
58bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
59bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
60bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  private:
61bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    const ARMBaseInstrInfo *TII;
62bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    const TargetRegisterInfo *TRI;
63bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    MachineRegisterInfo *MRI;
64bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
65bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    bool runOnInstruction(MachineInstr *MI);
66bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
67bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
68bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // Instruction builder helpers
69bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
70bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned createDupLane(MachineBasicBlock &MBB,
71bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           MachineBasicBlock::iterator InsertBefore,
72bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           DebugLoc DL,
73bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           unsigned Reg, unsigned Lane,
74bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           bool QPR=false);
75bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
76bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned createExtractSubreg(MachineBasicBlock &MBB,
77bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                 MachineBasicBlock::iterator InsertBefore,
78bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                 DebugLoc DL,
79bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                 unsigned DReg, unsigned Lane,
80bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                 const TargetRegisterClass *TRC);
81bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
82bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned createVExt(MachineBasicBlock &MBB,
83bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                        MachineBasicBlock::iterator InsertBefore,
84bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                        DebugLoc DL,
85bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                        unsigned Ssub0, unsigned Ssub1);
86bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
87bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned createRegSequence(MachineBasicBlock &MBB,
88bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                               MachineBasicBlock::iterator InsertBefore,
89bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                               DebugLoc DL,
90bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                               unsigned Reg1, unsigned Reg2);
91bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
92bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned createInsertSubreg(MachineBasicBlock &MBB,
93bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                MachineBasicBlock::iterator InsertBefore,
94bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                DebugLoc DL, unsigned DReg, unsigned Lane,
95bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                unsigned ToInsert);
96bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
97bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned createImplicitDef(MachineBasicBlock &MBB,
98bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                               MachineBasicBlock::iterator InsertBefore,
99bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                               DebugLoc DL);
100bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
101bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
102bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // Various property checkers
103bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
104bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC);
105bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    bool hasPartialWrite(MachineInstr *MI);
106bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI);
107bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned getDPRLaneFromSPR(unsigned SReg);
108bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
109bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
110bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // Methods used for getting the definitions of partial registers
111bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
112bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
113bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    MachineInstr *elideCopies(MachineInstr *MI);
114bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    void elideCopiesAndPHIs(MachineInstr *MI,
115bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                            SmallVectorImpl<MachineInstr*> &Outs);
116bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
117bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
118bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // Pattern optimization methods
119bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
120bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg);
121bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned optimizeSDPattern(MachineInstr *MI);
122bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned getPrefSPRLane(unsigned SReg);
123bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
124bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
125bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // Sanitizing method - used to make sure if don't leave dead code around.
126bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
127bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    void eraseInstrWithNoUses(MachineInstr *MI);
128bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
129bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
130bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // A map used to track the changes done by this pass.
131bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    //
132bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    std::map<MachineInstr*, unsigned> Replacements;
133bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    std::set<MachineInstr *> DeadInstr;
134bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  };
135bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  char A15SDOptimizer::ID = 0;
136bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} // end anonymous namespace
137bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
138bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Returns true if this is a use of a SPR register.
139bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::usesRegClass(MachineOperand &MO,
140bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                  const TargetRegisterClass *TRC) {
141bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!MO.isReg())
142bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return false;
143bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Reg = MO.getReg();
144bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
145bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (TargetRegisterInfo::isVirtualRegister(Reg))
146bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return MRI->getRegClass(Reg)->hasSuperClassEq(TRC);
147bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  else
148bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return TRC->contains(Reg);
149bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
150bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
151bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) {
152bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1,
153bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                           &ARM::DPRRegClass);
154bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (DReg != ARM::NoRegister) return ARM::ssub_1;
155bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return ARM::ssub_0;
156bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
157bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
158bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Get the subreg type that is most likely to be coalesced
159bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// for an SPR register that will be used in VDUP32d pseudo.
160bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) {
161bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!TRI->isVirtualRegister(SReg))
162bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return getDPRLaneFromSPR(SReg);
163bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
164bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  MachineInstr *MI = MRI->getVRegDef(SReg);
165bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!MI) return ARM::ssub_0;
166bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  MachineOperand *MO = MI->findRegisterDefOperand(SReg);
167bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
168bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  assert(MO->isReg() && "Non register operand found!");
169bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!MO) return ARM::ssub_0;
170bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
171bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isCopy() && usesRegClass(MI->getOperand(1),
172bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                    &ARM::SPRRegClass)) {
173bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    SReg = MI->getOperand(1).getReg();
174bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
175bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
176bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (TargetRegisterInfo::isVirtualRegister(SReg)) {
177bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1;
178bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return ARM::ssub_0;
179bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
180bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return getDPRLaneFromSPR(SReg);
181bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
182bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
183bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// MI is known to be dead. Figure out what instructions
184bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// are also made dead by this and mark them for removal.
185bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangavoid A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
186bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  SmallVector<MachineInstr *, 8> Front;
187bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  DeadInstr.insert(MI);
188bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
189bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n");
190bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  Front.push_back(MI);
191bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
192bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  while (Front.size() != 0) {
193bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    MI = Front.back();
194bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Front.pop_back();
195bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
196bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // MI is already known to be dead. We need to see
197bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // if other instructions can also be removed.
198bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
199bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineOperand &MO = MI->getOperand(i);
200bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if ((!MO.isReg()) || (!MO.isUse()))
201bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
202bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      unsigned Reg = MO.getReg();
203bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!TRI->isVirtualRegister(Reg))
204bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
205bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineOperand *Op = MI->findRegisterDefOperand(Reg);
206bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
207bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!Op)
208bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
209bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
210bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineInstr *Def = Op->getParent();
211bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
212bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // We don't need to do anything if we have already marked
213bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // this instruction as being dead.
214bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (DeadInstr.find(Def) != DeadInstr.end())
215bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
216bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
217bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // Check if all the uses of this instruction are marked as
218bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // dead. If so, we can also mark this instruction as being
219bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // dead.
220bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      bool IsDead = true;
221bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      for (unsigned int j = 0; j < Def->getNumOperands(); ++j) {
222bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        MachineOperand &MODef = Def->getOperand(j);
223bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        if ((!MODef.isReg()) || (!MODef.isDef()))
224bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          continue;
225bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        unsigned DefReg = MODef.getReg();
226bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        if (!TRI->isVirtualRegister(DefReg)) {
227bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          IsDead = false;
228bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          break;
229bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        }
230bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg),
231bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                            EE = MRI->use_end();
232bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                            II != EE; ++II) {
233bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          // We don't care about self references.
234bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          if (&*II == Def)
235bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            continue;
236bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          if (DeadInstr.find(&*II) == DeadInstr.end()) {
237bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            IsDead = false;
238bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            break;
239bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          }
240bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        }
241bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      }
242bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
243bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!IsDead) continue;
244bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
245bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      DEBUG(dbgs() << "Deleting instruction " << *Def << "\n");
246bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      DeadInstr.insert(Def);
247bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
248bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
249bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
250bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
251bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Creates the more optimized patterns and generally does all the code
252bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// transformations in this pass.
253bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
254bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isCopy()) {
255bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg());
256bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
257bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
258bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isInsertSubreg()) {
259bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned DPRReg = MI->getOperand(1).getReg();
260bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned SPRReg = MI->getOperand(2).getReg();
261bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
262bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) {
263bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg());
264bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg());
265bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
266bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (DPRMI && SPRMI) {
267bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        // See if the first operand of this insert_subreg is IMPLICIT_DEF
268bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        MachineInstr *ECDef = elideCopies(DPRMI);
269bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        if (ECDef != 0 && ECDef->isImplicitDef()) {
270bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          // Another corner case - if we're inserting something that is purely
271bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          // a subreg copy of a DPR, just use that DPR.
272bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
273bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          MachineInstr *EC = elideCopies(SPRMI);
274bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          // Is it a subreg copy of ssub_0?
275bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          if (EC && EC->isCopy() &&
276bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga              EC->getOperand(1).getSubReg() == ARM::ssub_0) {
277bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI);
278bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
279bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            // Find the thing we're subreg copying out of - is it of the same
280bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            // regclass as DPRMI? (i.e. a DPR or QPR).
281bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            unsigned FullReg = SPRMI->getOperand(1).getReg();
282bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            const TargetRegisterClass *TRC =
283bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga              MRI->getRegClass(MI->getOperand(1).getReg());
284bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) {
285bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga              DEBUG(dbgs() << "Subreg copy is compatible - returning ");
286bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga              DEBUG(dbgs() << PrintReg(FullReg) << "\n");
287bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga              eraseInstrWithNoUses(MI);
288bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga              return FullReg;
289bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga            }
290bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          }
291bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
292bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg());
293bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        }
294bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      }
295bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
296bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
297bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
298bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
299bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isRegSequence() && usesRegClass(MI->getOperand(1),
300bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                          &ARM::SPRRegClass)) {
301bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // See if all bar one of the operands are IMPLICIT_DEF and insert the
302bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // optimizer pattern accordingly.
303bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned NumImplicit = 0, NumTotal = 0;
304bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned NonImplicitReg = ~0U;
305bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
306bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) {
307bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!MI->getOperand(I).isReg())
308bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
309bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      ++NumTotal;
310bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      unsigned OpReg = MI->getOperand(I).getReg();
311bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
312bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!TRI->isVirtualRegister(OpReg))
313bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        break;
314bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
315bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineInstr *Def = MRI->getVRegDef(OpReg);
316bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!Def)
317bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        break;
318bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (Def->isImplicitDef())
319bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        ++NumImplicit;
320bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      else
321bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        NonImplicitReg = MI->getOperand(I).getReg();
322bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
323bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
324bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (NumImplicit == NumTotal - 1)
325bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      return optimizeAllLanesPattern(MI, NonImplicitReg);
326bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    else
327bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
328bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
329bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
330bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  assert(0 && "Unhandled update pattern!");
331bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return 0;
332bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
333bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
334bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Return true if this MachineInstr inserts a scalar (SPR) value into
335bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// a D or Q register.
336bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) {
337bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // The only way we can do a partial register update is through a COPY,
338bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // INSERT_SUBREG or REG_SEQUENCE.
339bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
340bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return true;
341bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
342bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2),
343bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                           &ARM::SPRRegClass))
344bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return true;
345bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
346bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass))
347bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return true;
348bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
349bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return false;
350bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
351bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
352bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Looks through full copies to get the instruction that defines the input
353bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// operand for MI.
354bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaMachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
355bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!MI->isFullCopy())
356bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return MI;
357bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
358bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return NULL;
359bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
360bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (!Def)
361bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return NULL;
362bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return elideCopies(Def);
363bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
364bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
365bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Look through full copies and PHIs to get the set of non-copy MachineInstrs
366bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// that can produce MI.
367bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangavoid A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI,
368bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                        SmallVectorImpl<MachineInstr*> &Outs) {
369bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   // Looking through PHIs may create loops so we need to track what
370bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   // instructions we have visited before.
371bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   std::set<MachineInstr *> Reached;
372bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   SmallVector<MachineInstr *, 8> Front;
373bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   Front.push_back(MI);
374bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   while (Front.size() != 0) {
375bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     MI = Front.back();
376bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     Front.pop_back();
377bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
378bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     // If we have already explored this MachineInstr, ignore it.
379bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     if (Reached.find(MI) != Reached.end())
380bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       continue;
381bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     Reached.insert(MI);
382bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     if (MI->isPHI()) {
383bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
384bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         unsigned Reg = MI->getOperand(I).getReg();
385bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         if (!TRI->isVirtualRegister(Reg)) {
386bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga           continue;
387bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         }
388bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         MachineInstr *NewMI = MRI->getVRegDef(Reg);
389bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         if (!NewMI)
390bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga           continue;
391bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         Front.push_back(NewMI);
392bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       }
393bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     } else if (MI->isFullCopy()) {
394bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
395bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         continue;
396bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg());
397bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       if (!NewMI)
398bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga         continue;
399bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       Front.push_back(NewMI);
400bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     } else {
401bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       DEBUG(dbgs() << "Found partial copy" << *MI <<"\n");
402bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       Outs.push_back(MI);
403bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     }
404bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga   }
405bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
406bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
407bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Return the DPR virtual registers that are read by this machine instruction
408bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// (if any).
409bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaSmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) {
410bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() ||
411bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MI->isKill())
412bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    return SmallVector<unsigned, 8>();
413bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
414bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  SmallVector<unsigned, 8> Defs;
415bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  for (unsigned i = 0; i < MI->getNumOperands(); ++i) {
416bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    MachineOperand &MO = MI->getOperand(i);
417bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
418bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (!MO.isReg() || !MO.isUse())
419bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      continue;
420bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (!usesRegClass(MO, &ARM::DPRRegClass) &&
421bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        !usesRegClass(MO, &ARM::QPRRegClass))
422bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      continue;
423bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
424bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Defs.push_back(MO.getReg());
425bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
426bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Defs;
427bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
428bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
429bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Creates a DPR register from an SPR one by using a VDUP.
430bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
431bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
432bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                              MachineBasicBlock::iterator InsertBefore,
433bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                              DebugLoc DL,
434bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                              unsigned Reg, unsigned Lane, bool QPR) {
435bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass :
436bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                                  &ARM::DPRRegClass);
437bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  AddDefaultPred(BuildMI(MBB,
438bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         InsertBefore,
439bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         DL,
440bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d),
441bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         Out)
442bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                   .addReg(Reg)
443bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                   .addImm(Lane));
444bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
445bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
446bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
447bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
448bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Creates a SPR register from a DPR by copying the value in lane 0.
449bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
450bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB,
451bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                    MachineBasicBlock::iterator InsertBefore,
452bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                    DebugLoc DL,
453bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                    unsigned DReg, unsigned Lane,
454bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                    const TargetRegisterClass *TRC) {
455bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out = MRI->createVirtualRegister(TRC);
456bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  BuildMI(MBB,
457bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          InsertBefore,
458bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          DL,
459bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          TII->get(TargetOpcode::COPY), Out)
460bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addReg(DReg, 0, Lane);
461bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
462bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
463bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
464bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
465bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE.
466bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
467bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createRegSequence(MachineBasicBlock &MBB,
468bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                  MachineBasicBlock::iterator InsertBefore,
469bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                  DebugLoc DL,
470bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                  unsigned Reg1, unsigned Reg2) {
471bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass);
472bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  BuildMI(MBB,
473bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          InsertBefore,
474bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          DL,
475bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          TII->get(TargetOpcode::REG_SEQUENCE), Out)
476bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addReg(Reg1)
477bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addImm(ARM::dsub_0)
478bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addReg(Reg2)
479bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addImm(ARM::dsub_1);
480bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
481bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
482bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
483bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1)
484bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// and merges them into one DPR register.
485bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
486bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createVExt(MachineBasicBlock &MBB,
487bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           MachineBasicBlock::iterator InsertBefore,
488bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           DebugLoc DL,
489bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                           unsigned Ssub0, unsigned Ssub1) {
490bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
491bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  AddDefaultPred(BuildMI(MBB,
492bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         InsertBefore,
493bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         DL,
494bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                         TII->get(ARM::VEXTd32), Out)
495bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                   .addReg(Ssub0)
496bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                   .addReg(Ssub1)
497bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                   .addImm(1));
498bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
499bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
500bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
501bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
502bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB,
503bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                   MachineBasicBlock::iterator InsertBefore,
504bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                   DebugLoc DL, unsigned DReg, unsigned Lane,
505bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                   unsigned ToInsert) {
506bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass);
507bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  BuildMI(MBB,
508bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          InsertBefore,
509bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          DL,
510bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          TII->get(TargetOpcode::INSERT_SUBREG), Out)
511bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addReg(DReg)
512bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addReg(ToInsert)
513bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    .addImm(Lane);
514bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
515bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
516bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
517bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
518bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
519bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB,
520bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                  MachineBasicBlock::iterator InsertBefore,
521bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                  DebugLoc DL) {
522bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass);
523bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  BuildMI(MBB,
524bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          InsertBefore,
525bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          DL,
526bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          TII->get(TargetOpcode::IMPLICIT_DEF), Out);
527bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
528bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
529bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
530bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// This function inserts instructions in order to optimize interactions between
531bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all
532bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// lanes, and the using VEXT instructions to recompose the result.
533bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned
534bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) {
535bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  MachineBasicBlock::iterator InsertPt(MI);
536bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  DebugLoc DL = MI->getDebugLoc();
537bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  MachineBasicBlock &MBB = *MI->getParent();
538bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  InsertPt++;
539bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  unsigned Out;
540bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
541bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) {
542bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg,
543bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                         ARM::dsub_0, &ARM::DPRRegClass);
544bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg,
545bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                         ARM::dsub_1, &ARM::DPRRegClass);
546bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
547bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0);
548bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1);
549bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
550bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
551bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0);
552bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1);
553bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4);
554bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
555bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out = createRegSequence(MBB, InsertPt, DL, Out, Out2);
556bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
557bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) {
558bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0);
559bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1);
560bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out = createVExt(MBB, InsertPt, DL, Out1, Out2);
561bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
562bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  } else {
563bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) &&
564bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga           "Found unexpected regclass!");
565bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
566bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned PrefLane = getPrefSPRLane(Reg);
567bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    unsigned Lane;
568bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    switch (PrefLane) {
569bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      case ARM::ssub_0: Lane = 0; break;
570bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      case ARM::ssub_1: Lane = 1; break;
571bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      default: llvm_unreachable("Unknown preferred lane!");
572bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
573bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
574bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass);
575bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
576bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out = createImplicitDef(MBB, InsertPt, DL);
577bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg);
578bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR);
579bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    eraseInstrWithNoUses(MI);
580bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
581bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Out;
582bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
583bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
584bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
585bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // We look for instructions that write S registers that are then read as
586bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and
587bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or
588bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // merge two SPR values to form a DPR register.  In order avoid false
589bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // positives we make sure that there is an SPR producer so we look past
590bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // COPY and PHI nodes to find it.
591bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //
592bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // The best code pattern for when an SPR producer is going to be used by a
593bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // DPR or QPR consumer depends on whether the other lanes of the
594bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // corresponding DPR/QPR are currently defined.
595bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //
596bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // We can handle these efficiently, depending on the type of
597bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // pseudo-instruction that is producing the pattern
598bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //
599bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //   * COPY:          * VDUP all lanes and merge the results together
600bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                      using VEXTs.
601bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //
602bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //   * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
603bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                      lane, and the other lane(s) of the DPR/QPR register
604bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                      that we are inserting in are undefined, use the
605bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                      original DPR/QPR value.
606bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                    * Otherwise, fall back on the same stategy as COPY.
607bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //
608bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //   * REG_SEQUENCE:  * If all except one of the input operands are
609bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                      IMPLICIT_DEFs, insert the VDUP pattern for just the
610bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                      defined input operand
611bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //                    * Otherwise, fall back on the same stategy as COPY.
612bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  //
613bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
614bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  // First, get all the reads of D-registers done by this instruction.
615bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  SmallVector<unsigned, 8> Defs = getReadDPRs(MI);
616bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  bool Modified = false;
617bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
618365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper  for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end();
619bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga     I != E; ++I) {
620bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // Follow the def-use chain for this DPR through COPYs, and also through
621bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // PHIs (which are essentially multi-way COPYs). It is because of PHIs that
622bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    // we can end up with multiple defs of this DPR.
623bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
624bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    SmallVector<MachineInstr *, 8> DefSrcs;
625bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (!TRI->isVirtualRegister(*I))
626bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      continue;
627bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    MachineInstr *Def = MRI->getVRegDef(*I);
628bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    if (!Def)
629bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      continue;
630bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
631bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    elideCopiesAndPHIs(Def, DefSrcs);
632bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
633365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper    for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(),
634bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      EE = DefSrcs.end(); II != EE; ++II) {
635bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MachineInstr *MI = *II;
636bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
637bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // If we've already analyzed and replaced this operand, don't do
638bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // anything.
639bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (Replacements.find(MI) != Replacements.end())
640bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
641bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
642bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // Now, work out if the instruction causes a SPR->DPR dependency.
643bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (!hasPartialWrite(MI))
644bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        continue;
645bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
646bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // Collect all the uses of this MI's DPR def for updating later.
647bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      SmallVector<MachineOperand*, 8> Uses;
648bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      unsigned DPRDefReg = MI->getOperand(0).getReg();
649bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
650bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga             E = MRI->use_end(); I != E; ++I)
651bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        Uses.push_back(&I.getOperand());
652bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
653bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      // We can optimize this.
654bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      unsigned NewReg = optimizeSDPattern(MI);
655bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
656bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      if (NewReg != 0) {
657bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        Modified = true;
658365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper        for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(),
659bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga               E = Uses.end(); I != E; ++I) {
660bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          DEBUG(dbgs() << "Replacing operand "
661bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                       << **I << " with "
662bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                       << PrintReg(NewReg) << "\n");
663bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga          (*I)->substVirtReg(NewReg, 0, *TRI);
664bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga        }
665bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      }
666bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      Replacements[MI] = NewReg;
667bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
668bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
669bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Modified;
670bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
671bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
672bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
673bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
674bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  TRI = Fn.getTarget().getRegisterInfo();
675bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  MRI = &Fn.getRegInfo();
676bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  bool Modified = false;
677bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
678bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n");
679bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
680bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  DeadInstr.clear();
681bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  Replacements.clear();
682bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
683bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
684bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga       ++MFI) {
685bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
686bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end();
687bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      MI != ME;) {
688bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga      Modified |= runOnInstruction(MI++);
689bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    }
690bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
691bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
692bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
693bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
694bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                            E = DeadInstr.end();
695bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga                                            I != E; ++I) {
696bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga    (*I)->eraseFromParent();
697bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  }
698bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
699bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return Modified;
700bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
701bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga
702bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaFunctionPass *llvm::createA15SDOptimizerPass() {
703bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga  return new A15SDOptimizer();
704bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga}
705