1//==- llvm/CodeGen/ExecutionDepsFix.h - Execution Dependency Fix -*- C++ -*-==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10/// \file Execution Dependency Fix pass. 11/// 12/// Some X86 SSE instructions like mov, and, or, xor are available in different 13/// variants for different operand types. These variant instructions are 14/// equivalent, but on Nehalem and newer cpus there is extra latency 15/// transferring data between integer and floating point domains. ARM cores 16/// have similar issues when they are configured with both VFP and NEON 17/// pipelines. 18/// 19/// This pass changes the variant instructions to minimize domain crossings. 20// 21//===----------------------------------------------------------------------===// 22 23#ifndef LLVM_CODEGEN_EXECUTIONDEPSFIX_H 24#define LLVM_CODEGEN_EXECUTIONDEPSFIX_H 25 26#include "llvm/ADT/DenseMap.h" 27#include "llvm/ADT/iterator_range.h" 28#include "llvm/ADT/SmallVector.h" 29#include "llvm/CodeGen/LivePhysRegs.h" 30#include "llvm/CodeGen/MachineFunction.h" 31#include "llvm/CodeGen/MachineFunctionPass.h" 32#include "llvm/CodeGen/RegisterClassInfo.h" 33#include "llvm/Pass.h" 34#include "llvm/Support/Allocator.h" 35#include "llvm/Support/MathExtras.h" 36#include <cassert> 37#include <limits> 38#include <utility> 39#include <vector> 40 41namespace llvm { 42 43class MachineBasicBlock; 44class MachineInstr; 45class TargetInstrInfo; 46 47/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track 48/// of execution domains. 49/// 50/// An open DomainValue represents a set of instructions that can still switch 51/// execution domain. Multiple registers may refer to the same open 52/// DomainValue - they will eventually be collapsed to the same execution 53/// domain. 54/// 55/// A collapsed DomainValue represents a single register that has been forced 56/// into one of more execution domains. There is a separate collapsed 57/// DomainValue for each register, but it may contain multiple execution 58/// domains. A register value is initially created in a single execution 59/// domain, but if we were forced to pay the penalty of a domain crossing, we 60/// keep track of the fact that the register is now available in multiple 61/// domains. 62struct DomainValue { 63 // Basic reference counting. 64 unsigned Refs = 0; 65 66 // Bitmask of available domains. For an open DomainValue, it is the still 67 // possible domains for collapsing. For a collapsed DomainValue it is the 68 // domains where the register is available for free. 69 unsigned AvailableDomains; 70 71 // Pointer to the next DomainValue in a chain. When two DomainValues are 72 // merged, Victim.Next is set to point to Victor, so old DomainValue 73 // references can be updated by following the chain. 74 DomainValue *Next; 75 76 // Twiddleable instructions using or defining these registers. 77 SmallVector<MachineInstr*, 8> Instrs; 78 79 DomainValue() { clear(); } 80 81 // A collapsed DomainValue has no instructions to twiddle - it simply keeps 82 // track of the domains where the registers are already available. 83 bool isCollapsed() const { return Instrs.empty(); } 84 85 // Is domain available? 86 bool hasDomain(unsigned domain) const { 87 assert(domain < 88 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && 89 "undefined behavior"); 90 return AvailableDomains & (1u << domain); 91 } 92 93 // Mark domain as available. 94 void addDomain(unsigned domain) { 95 AvailableDomains |= 1u << domain; 96 } 97 98 // Restrict to a single domain available. 99 void setSingleDomain(unsigned domain) { 100 AvailableDomains = 1u << domain; 101 } 102 103 // Return bitmask of domains that are available and in mask. 104 unsigned getCommonDomains(unsigned mask) const { 105 return AvailableDomains & mask; 106 } 107 108 // First domain available. 109 unsigned getFirstDomain() const { 110 return countTrailingZeros(AvailableDomains); 111 } 112 113 // Clear this DomainValue and point to next which has all its data. 114 void clear() { 115 AvailableDomains = 0; 116 Next = nullptr; 117 Instrs.clear(); 118 } 119}; 120 121/// Information about a live register. 122struct LiveReg { 123 /// Value currently in this register, or NULL when no value is being tracked. 124 /// This counts as a DomainValue reference. 125 DomainValue *Value; 126 127 /// Instruction that defined this register, relative to the beginning of the 128 /// current basic block. When a LiveReg is used to represent a live-out 129 /// register, this value is relative to the end of the basic block, so it 130 /// will be a negative number. 131 int Def; 132}; 133 134class ExecutionDepsFix : public MachineFunctionPass { 135 SpecificBumpPtrAllocator<DomainValue> Allocator; 136 SmallVector<DomainValue*,16> Avail; 137 138 const TargetRegisterClass *const RC; 139 MachineFunction *MF; 140 const TargetInstrInfo *TII; 141 const TargetRegisterInfo *TRI; 142 RegisterClassInfo RegClassInfo; 143 std::vector<SmallVector<int, 1>> AliasMap; 144 const unsigned NumRegs; 145 LiveReg *LiveRegs; 146 struct MBBInfo { 147 // Keeps clearance and domain information for all registers. Note that this 148 // is different from the usual definition notion of liveness. The CPU 149 // doesn't care whether or not we consider a register killed. 150 LiveReg *OutRegs = nullptr; 151 152 // Whether we have gotten to this block in primary processing yet. 153 bool PrimaryCompleted = false; 154 155 // The number of predecessors for which primary processing has completed 156 unsigned IncomingProcessed = 0; 157 158 // The value of `IncomingProcessed` at the start of primary processing 159 unsigned PrimaryIncoming = 0; 160 161 // The number of predecessors for which all processing steps are done. 162 unsigned IncomingCompleted = 0; 163 164 MBBInfo() = default; 165 }; 166 using MBBInfoMap = DenseMap<MachineBasicBlock *, MBBInfo>; 167 MBBInfoMap MBBInfos; 168 169 /// List of undefined register reads in this block in forward order. 170 std::vector<std::pair<MachineInstr *, unsigned>> UndefReads; 171 172 /// Storage for register unit liveness. 173 LivePhysRegs LiveRegSet; 174 175 /// Current instruction number. 176 /// The first instruction in each basic block is 0. 177 int CurInstr; 178 179public: 180 ExecutionDepsFix(char &PassID, const TargetRegisterClass &RC) 181 : MachineFunctionPass(PassID), RC(&RC), NumRegs(RC.getNumRegs()) {} 182 183 void getAnalysisUsage(AnalysisUsage &AU) const override { 184 AU.setPreservesAll(); 185 MachineFunctionPass::getAnalysisUsage(AU); 186 } 187 188 bool runOnMachineFunction(MachineFunction &MF) override; 189 190 MachineFunctionProperties getRequiredProperties() const override { 191 return MachineFunctionProperties().set( 192 MachineFunctionProperties::Property::NoVRegs); 193 } 194 195private: 196 iterator_range<SmallVectorImpl<int>::const_iterator> 197 regIndices(unsigned Reg) const; 198 // DomainValue allocation. 199 DomainValue *alloc(int domain = -1); 200 DomainValue *retain(DomainValue *DV) { 201 if (DV) ++DV->Refs; 202 return DV; 203 } 204 void release(DomainValue*); 205 DomainValue *resolve(DomainValue*&); 206 207 // LiveRegs manipulations. 208 void setLiveReg(int rx, DomainValue *DV); 209 void kill(int rx); 210 void force(int rx, unsigned domain); 211 void collapse(DomainValue *dv, unsigned domain); 212 bool merge(DomainValue *A, DomainValue *B); 213 214 void enterBasicBlock(MachineBasicBlock*); 215 void leaveBasicBlock(MachineBasicBlock*); 216 bool isBlockDone(MachineBasicBlock *); 217 void processBasicBlock(MachineBasicBlock *MBB, bool PrimaryPass); 218 bool visitInstr(MachineInstr *); 219 void processDefs(MachineInstr *, bool breakDependency, bool Kill); 220 void visitSoftInstr(MachineInstr*, unsigned mask); 221 void visitHardInstr(MachineInstr*, unsigned domain); 222 bool pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx, 223 unsigned Pref); 224 bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); 225 void processUndefReads(MachineBasicBlock*); 226}; 227 228} // end namepsace llvm 229 230#endif // LLVM_CODEGEN_EXECUTIONDEPSFIX_H 231