1bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// 2bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 3bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// The LLVM Compiler Infrastructure 4bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 5bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// This file is distributed under the University of Illinois Open Source 6bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// License. See LICENSE.TXT for details. 7bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 8bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//===----------------------------------------------------------------------===// 9bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 10bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// The Cortex-A15 processor employs a tracking scheme in its register renaming 11bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// in order to process each instruction's micro-ops speculatively and 12bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// out-of-order with appropriate forwarding. The ARM architecture allows VFP 13bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// instructions to read and write 32-bit S-registers. Each S-register 14bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. 15bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 16bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// There are several instruction patterns which can be used to provide this 17bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// capability which can provide higher performance than other, potentially more 18bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// direct patterns, specifically around when one micro-op reads a D-register 19bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// operand that has recently been written as one or more S-register results. 20bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 21bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// This file defines a pre-regalloc pass which looks for SPR producers which 22bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// are going to be used by a DPR (or QPR) consumers and creates the more 23bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// optimized access pattern. 24bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// 25bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga//===----------------------------------------------------------------------===// 26bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 27bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#define DEBUG_TYPE "a15-sd-optimizer" 28bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARM.h" 29bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMBaseInstrInfo.h" 30bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMSubtarget.h" 31bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMISelLowering.h" 32bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "ARMTargetMachine.h" 33bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 34bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/ADT/SmallPtrSet.h" 35bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/ADT/Statistic.h" 36bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineFunctionPass.h" 37bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineInstr.h" 38bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineInstrBuilder.h" 39bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/CodeGen/MachineRegisterInfo.h" 40bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Support/CommandLine.h" 41bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Support/Debug.h" 42bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Support/raw_ostream.h" 43bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include "llvm/Target/TargetRegisterInfo.h" 44bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 45bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga#include <set> 46bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 47bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangausing namespace llvm; 48bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 49bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranganamespace { 50bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga struct A15SDOptimizer : public MachineFunctionPass { 51bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga static char ID; 52bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga A15SDOptimizer() : MachineFunctionPass(ID) {} 53bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 54bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga virtual bool runOnMachineFunction(MachineFunction &Fn); 55bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 56bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga virtual const char *getPassName() const { 57bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return "ARM A15 S->D optimizer"; 58bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 59bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 60bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga private: 61bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga const ARMBaseInstrInfo *TII; 62bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga const TargetRegisterInfo *TRI; 63bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineRegisterInfo *MRI; 64bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 65bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool runOnInstruction(MachineInstr *MI); 66bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 67bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 68bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Instruction builder helpers 69bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 70bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned createDupLane(MachineBasicBlock &MBB, 71bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 72bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 73bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg, unsigned Lane, 74bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool QPR=false); 75bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 76bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned createExtractSubreg(MachineBasicBlock &MBB, 77bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 78bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 79bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DReg, unsigned Lane, 80bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga const TargetRegisterClass *TRC); 81bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 82bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned createVExt(MachineBasicBlock &MBB, 83bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 84bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 85bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Ssub0, unsigned Ssub1); 86bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 87bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned createRegSequence(MachineBasicBlock &MBB, 88bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 89bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 90bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg1, unsigned Reg2); 91bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 92bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned createInsertSubreg(MachineBasicBlock &MBB, 93bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 94bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, unsigned DReg, unsigned Lane, 95bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned ToInsert); 96bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 97bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned createImplicitDef(MachineBasicBlock &MBB, 98bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 99bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL); 100bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 101bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 102bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Various property checkers 103bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 104bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); 105bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool hasPartialWrite(MachineInstr *MI); 106bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); 107bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned getDPRLaneFromSPR(unsigned SReg); 108bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 109bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 110bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Methods used for getting the definitions of partial registers 111bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 112bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 113bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *elideCopies(MachineInstr *MI); 114bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga void elideCopiesAndPHIs(MachineInstr *MI, 115bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVectorImpl<MachineInstr*> &Outs); 116bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 117bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 118bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Pattern optimization methods 119bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 120bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); 121bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned optimizeSDPattern(MachineInstr *MI); 122bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned getPrefSPRLane(unsigned SReg); 123bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 124bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 125bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Sanitizing method - used to make sure if don't leave dead code around. 126bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 127bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga void eraseInstrWithNoUses(MachineInstr *MI); 128bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 129bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 130bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // A map used to track the changes done by this pass. 131bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 132bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga std::map<MachineInstr*, unsigned> Replacements; 133bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga std::set<MachineInstr *> DeadInstr; 134bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga }; 135bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga char A15SDOptimizer::ID = 0; 136bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} // end anonymous namespace 137bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 138bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Returns true if this is a use of a SPR register. 139bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::usesRegClass(MachineOperand &MO, 140bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga const TargetRegisterClass *TRC) { 141bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!MO.isReg()) 142bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return false; 143bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg = MO.getReg(); 144bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 145bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (TargetRegisterInfo::isVirtualRegister(Reg)) 146bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); 147bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga else 148bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return TRC->contains(Reg); 149bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 150bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 151bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { 152bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, 153bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga &ARM::DPRRegClass); 154bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (DReg != ARM::NoRegister) return ARM::ssub_1; 155bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return ARM::ssub_0; 156bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 157bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 158bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Get the subreg type that is most likely to be coalesced 159bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// for an SPR register that will be used in VDUP32d pseudo. 160bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { 161bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(SReg)) 162bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return getDPRLaneFromSPR(SReg); 163bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 164bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *MI = MRI->getVRegDef(SReg); 165bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!MI) return ARM::ssub_0; 166bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineOperand *MO = MI->findRegisterDefOperand(SReg); 167bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 168bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga assert(MO->isReg() && "Non register operand found!"); 169bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!MO) return ARM::ssub_0; 170bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 171bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isCopy() && usesRegClass(MI->getOperand(1), 172bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga &ARM::SPRRegClass)) { 173bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SReg = MI->getOperand(1).getReg(); 174bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 175bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 176bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (TargetRegisterInfo::isVirtualRegister(SReg)) { 177bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; 178bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return ARM::ssub_0; 179bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 180bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return getDPRLaneFromSPR(SReg); 181bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 182bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 183bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// MI is known to be dead. Figure out what instructions 184bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// are also made dead by this and mark them for removal. 185bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangavoid A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { 186bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<MachineInstr *, 8> Front; 187bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DeadInstr.insert(MI); 188bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 189bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); 190bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Front.push_back(MI); 191bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 192bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga while (Front.size() != 0) { 193bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MI = Front.back(); 194bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Front.pop_back(); 195bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 196bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // MI is already known to be dead. We need to see 197bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // if other instructions can also be removed. 198bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { 199bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineOperand &MO = MI->getOperand(i); 200bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if ((!MO.isReg()) || (!MO.isUse())) 201bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 202bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg = MO.getReg(); 203bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(Reg)) 204bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 205bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineOperand *Op = MI->findRegisterDefOperand(Reg); 206bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 207bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!Op) 208bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 209bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 210bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *Def = Op->getParent(); 211bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 212bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // We don't need to do anything if we have already marked 213bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // this instruction as being dead. 214bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (DeadInstr.find(Def) != DeadInstr.end()) 215bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 216bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 217bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Check if all the uses of this instruction are marked as 218bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // dead. If so, we can also mark this instruction as being 219bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // dead. 220bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool IsDead = true; 221bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { 222bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineOperand &MODef = Def->getOperand(j); 223bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if ((!MODef.isReg()) || (!MODef.isDef())) 224bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 225bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DefReg = MODef.getReg(); 226bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(DefReg)) { 227bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga IsDead = false; 228bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga break; 229bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 230bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), 231bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga EE = MRI->use_end(); 232bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga II != EE; ++II) { 233bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // We don't care about self references. 234bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (&*II == Def) 235bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 236bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (DeadInstr.find(&*II) == DeadInstr.end()) { 237bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga IsDead = false; 238bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga break; 239bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 240bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 241bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 242bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 243bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!IsDead) continue; 244bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 245bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); 246bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DeadInstr.insert(Def); 247bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 248bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 249bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 250bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 251bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Creates the more optimized patterns and generally does all the code 252bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// transformations in this pass. 253bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { 254bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isCopy()) { 255bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); 256bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 257bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 258bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isInsertSubreg()) { 259bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DPRReg = MI->getOperand(1).getReg(); 260bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned SPRReg = MI->getOperand(2).getReg(); 261bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 262bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { 263bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); 264bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); 265bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 266bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (DPRMI && SPRMI) { 267bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // See if the first operand of this insert_subreg is IMPLICIT_DEF 268bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *ECDef = elideCopies(DPRMI); 269bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (ECDef != 0 && ECDef->isImplicitDef()) { 270bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Another corner case - if we're inserting something that is purely 271bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // a subreg copy of a DPR, just use that DPR. 272bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 273bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *EC = elideCopies(SPRMI); 274bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Is it a subreg copy of ssub_0? 275bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (EC && EC->isCopy() && 276bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga EC->getOperand(1).getSubReg() == ARM::ssub_0) { 277bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); 278bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 279bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Find the thing we're subreg copying out of - is it of the same 280bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // regclass as DPRMI? (i.e. a DPR or QPR). 281bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned FullReg = SPRMI->getOperand(1).getReg(); 282bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga const TargetRegisterClass *TRC = 283bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MRI->getRegClass(MI->getOperand(1).getReg()); 284bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { 285bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Subreg copy is compatible - returning "); 286bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << PrintReg(FullReg) << "\n"); 287bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga eraseInstrWithNoUses(MI); 288bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return FullReg; 289bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 290bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 291bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 292bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); 293bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 294bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 295bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 296bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); 297bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 298bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 299bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), 300bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga &ARM::SPRRegClass)) { 301bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // See if all bar one of the operands are IMPLICIT_DEF and insert the 302bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // optimizer pattern accordingly. 303bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned NumImplicit = 0, NumTotal = 0; 304bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned NonImplicitReg = ~0U; 305bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 306bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { 307bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!MI->getOperand(I).isReg()) 308bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 309bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga ++NumTotal; 310bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned OpReg = MI->getOperand(I).getReg(); 311bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 312bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(OpReg)) 313bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga break; 314bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 315bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *Def = MRI->getVRegDef(OpReg); 316bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!Def) 317bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga break; 318bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (Def->isImplicitDef()) 319bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga ++NumImplicit; 320bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga else 321bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga NonImplicitReg = MI->getOperand(I).getReg(); 322bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 323bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 324bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (NumImplicit == NumTotal - 1) 325bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return optimizeAllLanesPattern(MI, NonImplicitReg); 326bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga else 327bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); 328bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 329bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 330bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga assert(0 && "Unhandled update pattern!"); 331bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return 0; 332bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 333bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 334bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Return true if this MachineInstr inserts a scalar (SPR) value into 335bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// a D or Q register. 336bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { 337bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // The only way we can do a partial register update is through a COPY, 338bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // INSERT_SUBREG or REG_SEQUENCE. 339bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) 340bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return true; 341bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 342bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), 343bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga &ARM::SPRRegClass)) 344bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return true; 345bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 346bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) 347bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return true; 348bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 349bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return false; 350bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 351bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 352bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Looks through full copies to get the instruction that defines the input 353bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// operand for MI. 354bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaMachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { 355bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!MI->isFullCopy()) 356bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return MI; 357bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) 358bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return NULL; 359bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); 360bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!Def) 361bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return NULL; 362bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return elideCopies(Def); 363bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 364bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 365bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Look through full copies and PHIs to get the set of non-copy MachineInstrs 366bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// that can produce MI. 367bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangavoid A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, 368bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVectorImpl<MachineInstr*> &Outs) { 369bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Looking through PHIs may create loops so we need to track what 370bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // instructions we have visited before. 371bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga std::set<MachineInstr *> Reached; 372bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<MachineInstr *, 8> Front; 373bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Front.push_back(MI); 374bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga while (Front.size() != 0) { 375bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MI = Front.back(); 376bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Front.pop_back(); 377bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 378bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // If we have already explored this MachineInstr, ignore it. 379bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (Reached.find(MI) != Reached.end()) 380bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 381bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Reached.insert(MI); 382bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isPHI()) { 383bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { 384bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg = MI->getOperand(I).getReg(); 385bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(Reg)) { 386bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 387bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 388bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *NewMI = MRI->getVRegDef(Reg); 389bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!NewMI) 390bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 391bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Front.push_back(NewMI); 392bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 393bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } else if (MI->isFullCopy()) { 394bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) 395bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 396bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); 397bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!NewMI) 398bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 399bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Front.push_back(NewMI); 400bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } else { 401bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); 402bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Outs.push_back(MI); 403bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 404bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 405bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 406bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 407bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Return the DPR virtual registers that are read by this machine instruction 408bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// (if any). 409bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaSmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { 410bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || 411bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MI->isKill()) 412bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return SmallVector<unsigned, 8>(); 413bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 414bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<unsigned, 8> Defs; 415bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (unsigned i = 0; i < MI->getNumOperands(); ++i) { 416bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineOperand &MO = MI->getOperand(i); 417bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 418bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!MO.isReg() || !MO.isUse()) 419bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 420bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!usesRegClass(MO, &ARM::DPRRegClass) && 421bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga !usesRegClass(MO, &ARM::QPRRegClass)) 422bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 423bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 424bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Defs.push_back(MO.getReg()); 425bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 426bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Defs; 427bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 428bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 429bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Creates a DPR register from an SPR one by using a VDUP. 430bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 431bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createDupLane(MachineBasicBlock &MBB, 432bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 433bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 434bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg, unsigned Lane, bool QPR) { 435bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : 436bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga &ARM::DPRRegClass); 437bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga AddDefaultPred(BuildMI(MBB, 438bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertBefore, 439bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DL, 440bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), 441bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out) 442bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(Reg) 443bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addImm(Lane)); 444bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 445bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 446bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 447bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 448bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Creates a SPR register from a DPR by copying the value in lane 0. 449bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 450bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, 451bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 452bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 453bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DReg, unsigned Lane, 454bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga const TargetRegisterClass *TRC) { 455bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out = MRI->createVirtualRegister(TRC); 456bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga BuildMI(MBB, 457bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertBefore, 458bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DL, 459bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII->get(TargetOpcode::COPY), Out) 460bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(DReg, 0, Lane); 461bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 462bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 463bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 464bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 465bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. 466bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 467bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, 468bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 469bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 470bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Reg1, unsigned Reg2) { 471bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); 472bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga BuildMI(MBB, 473bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertBefore, 474bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DL, 475bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII->get(TargetOpcode::REG_SEQUENCE), Out) 476bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(Reg1) 477bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addImm(ARM::dsub_0) 478bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(Reg2) 479bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addImm(ARM::dsub_1); 480bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 481bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 482bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 483bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) 484bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// and merges them into one DPR register. 485bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 486bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createVExt(MachineBasicBlock &MBB, 487bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 488bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, 489bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Ssub0, unsigned Ssub1) { 490bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); 491bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga AddDefaultPred(BuildMI(MBB, 492bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertBefore, 493bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DL, 494bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII->get(ARM::VEXTd32), Out) 495bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(Ssub0) 496bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(Ssub1) 497bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addImm(1)); 498bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 499bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 500bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 501bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 502bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, 503bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 504bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL, unsigned DReg, unsigned Lane, 505bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned ToInsert) { 506bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); 507bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga BuildMI(MBB, 508bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertBefore, 509bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DL, 510bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII->get(TargetOpcode::INSERT_SUBREG), Out) 511bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(DReg) 512bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addReg(ToInsert) 513bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga .addImm(Lane); 514bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 515bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 516bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 517bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 518bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 519bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, 520bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertBefore, 521bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL) { 522bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); 523bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga BuildMI(MBB, 524bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertBefore, 525bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DL, 526bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII->get(TargetOpcode::IMPLICIT_DEF), Out); 527bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 528bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 529bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 530bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// This function inserts instructions in order to optimize interactions between 531bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all 532bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga// lanes, and the using VEXT instructions to recompose the result. 533bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangaunsigned 534bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaA15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { 535bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock::iterator InsertPt(MI); 536bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DebugLoc DL = MI->getDebugLoc(); 537bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineBasicBlock &MBB = *MI->getParent(); 538bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga InsertPt++; 539bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out; 540bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 541bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { 542bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, 543bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga ARM::dsub_0, &ARM::DPRRegClass); 544bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, 545bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga ARM::dsub_1, &ARM::DPRRegClass); 546bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 547bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); 548bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); 549bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out = createVExt(MBB, InsertPt, DL, Out1, Out2); 550bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 551bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); 552bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); 553bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); 554bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 555bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); 556bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 557bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { 558bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); 559bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); 560bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out = createVExt(MBB, InsertPt, DL, Out1, Out2); 561bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 562bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } else { 563bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && 564bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga "Found unexpected regclass!"); 565bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 566bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned PrefLane = getPrefSPRLane(Reg); 567bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned Lane; 568bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga switch (PrefLane) { 569bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga case ARM::ssub_0: Lane = 0; break; 570bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga case ARM::ssub_1: Lane = 1; break; 571bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga default: llvm_unreachable("Unknown preferred lane!"); 572bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 573bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 574bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); 575bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 576bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out = createImplicitDef(MBB, InsertPt, DL); 577bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); 578bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); 579bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga eraseInstrWithNoUses(MI); 580bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 581bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Out; 582bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 583bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 584bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { 585bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // We look for instructions that write S registers that are then read as 586bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and 587bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or 588bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // merge two SPR values to form a DPR register. In order avoid false 589bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // positives we make sure that there is an SPR producer so we look past 590bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // COPY and PHI nodes to find it. 591bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 592bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // The best code pattern for when an SPR producer is going to be used by a 593bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // DPR or QPR consumer depends on whether the other lanes of the 594bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // corresponding DPR/QPR are currently defined. 595bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 596bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // We can handle these efficiently, depending on the type of 597bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // pseudo-instruction that is producing the pattern 598bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 599bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // * COPY: * VDUP all lanes and merge the results together 600bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // using VEXTs. 601bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 602bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR 603bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // lane, and the other lane(s) of the DPR/QPR register 604bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // that we are inserting in are undefined, use the 605bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // original DPR/QPR value. 606bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // * Otherwise, fall back on the same stategy as COPY. 607bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 608bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // * REG_SEQUENCE: * If all except one of the input operands are 609bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // IMPLICIT_DEFs, insert the VDUP pattern for just the 610bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // defined input operand 611bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // * Otherwise, fall back on the same stategy as COPY. 612bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // 613bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 614bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // First, get all the reads of D-registers done by this instruction. 615bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<unsigned, 8> Defs = getReadDPRs(MI); 616bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool Modified = false; 617bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 618365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper for (SmallVectorImpl<unsigned>::iterator I = Defs.begin(), E = Defs.end(); 619bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga I != E; ++I) { 620bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Follow the def-use chain for this DPR through COPYs, and also through 621bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // PHIs (which are essentially multi-way COPYs). It is because of PHIs that 622bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // we can end up with multiple defs of this DPR. 623bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 624bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<MachineInstr *, 8> DefSrcs; 625bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!TRI->isVirtualRegister(*I)) 626bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 627bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *Def = MRI->getVRegDef(*I); 628bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!Def) 629bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 630bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 631bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga elideCopiesAndPHIs(Def, DefSrcs); 632bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 633365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), 634bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga EE = DefSrcs.end(); II != EE; ++II) { 635bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MachineInstr *MI = *II; 636bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 637bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // If we've already analyzed and replaced this operand, don't do 638bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // anything. 639bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (Replacements.find(MI) != Replacements.end()) 640bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 641bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 642bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Now, work out if the instruction causes a SPR->DPR dependency. 643bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (!hasPartialWrite(MI)) 644bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga continue; 645bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 646bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // Collect all the uses of this MI's DPR def for updating later. 647bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga SmallVector<MachineOperand*, 8> Uses; 648bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned DPRDefReg = MI->getOperand(0).getReg(); 649bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), 650bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga E = MRI->use_end(); I != E; ++I) 651bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Uses.push_back(&I.getOperand()); 652bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 653bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga // We can optimize this. 654bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga unsigned NewReg = optimizeSDPattern(MI); 655bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 656bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga if (NewReg != 0) { 657bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Modified = true; 658365ef0b197d7c841f8e501da64296df65be4ca23Craig Topper for (SmallVectorImpl<MachineOperand *>::const_iterator I = Uses.begin(), 659bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga E = Uses.end(); I != E; ++I) { 660bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Replacing operand " 661bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga << **I << " with " 662bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga << PrintReg(NewReg) << "\n"); 663bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga (*I)->substVirtReg(NewReg, 0, *TRI); 664bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 665bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 666bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Replacements[MI] = NewReg; 667bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 668bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 669bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Modified; 670bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 671bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 672bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Barangabool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { 673bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); 674bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga TRI = Fn.getTarget().getRegisterInfo(); 675bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MRI = &Fn.getRegInfo(); 676bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga bool Modified = false; 677bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 678bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); 679bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 680bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga DeadInstr.clear(); 681bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Replacements.clear(); 682bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 683bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; 684bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga ++MFI) { 685bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 686bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); 687bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga MI != ME;) { 688bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga Modified |= runOnInstruction(MI++); 689bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 690bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 691bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 692bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 693bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), 694bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga E = DeadInstr.end(); 695bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga I != E; ++I) { 696bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga (*I)->eraseFromParent(); 697bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga } 698bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 699bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return Modified; 700bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 701bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga 702bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu BarangaFunctionPass *llvm::createA15SDOptimizerPass() { 703bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga return new A15SDOptimizer(); 704bcbf3fddef46f1f6e2f2408064c4b75e4b6c90f5Silviu Baranga} 705