137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===// 237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// 337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// The LLVM Compiler Infrastructure 437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// 537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This file is distributed under the University of Illinois Open Source 637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// License. See LICENSE.TXT for details. 737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// 837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//===----------------------------------------------------------------------===// 937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This file contains the AArch64 / Cortex-A57 specific register allocation 1037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// constraints for use by the PBQP register allocator. 1137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// 1237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// It is essentially a transcription of what is contained in 1337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// AArch64A57FPLoadBalancing, which tries to use a balanced 1437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// mix of odd and even D-registers when performing a critical sequence of 1537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// independent, non-quadword FP/ASIMD floating-point multiply-accumulates. 1637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//===----------------------------------------------------------------------===// 1737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 1837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#define DEBUG_TYPE "aarch64-pbqp" 1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 2037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64.h" 2137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64PBQPRegAlloc.h" 2237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64RegisterInfo.h" 2337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/LiveIntervalAnalysis.h" 2437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineBasicBlock.h" 2537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineFunction.h" 2637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineRegisterInfo.h" 2737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/RegAllocPBQP.h" 2837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/Debug.h" 2937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/ErrorHandling.h" 3037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/raw_ostream.h" 3137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesusing namespace llvm; 3337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesnamespace { 3537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 3637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG 3737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool isFPReg(unsigned reg) { 3837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return AArch64::FPR32RegClass.contains(reg) || 3937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AArch64::FPR64RegClass.contains(reg) || 4037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines AArch64::FPR128RegClass.contains(reg); 4137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 4237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif 4337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 4437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool isOdd(unsigned reg) { 4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines switch (reg) { 4637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines default: 4737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines llvm_unreachable("Register is not from the expected class !"); 4837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S1: 4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S3: 5037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S5: 5137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S7: 5237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S9: 5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S11: 5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S13: 5537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S15: 5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S17: 5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S19: 5837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S21: 5937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S23: 6037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S25: 6137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S27: 6237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S29: 6337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S31: 6437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D1: 6537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D3: 6637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D5: 6737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D7: 6837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D9: 6937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D11: 7037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D13: 7137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D15: 7237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D17: 7337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D19: 7437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D21: 7537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D23: 7637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D25: 7737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D27: 7837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D29: 7937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D31: 8037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q1: 8137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q3: 8237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q5: 8337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q7: 8437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q9: 8537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q11: 8637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q13: 8737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q15: 8837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q17: 8937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q19: 9037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q21: 9137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q23: 9237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q25: 9337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q27: 9437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q29: 9537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q31: 9637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 9737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S0: 9837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S2: 9937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S4: 10037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S6: 10137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S8: 10237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S10: 10337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S12: 10437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S14: 10537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S16: 10637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S18: 10737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S20: 10837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S22: 10937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S24: 11037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S26: 11137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S28: 11237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::S30: 11337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D0: 11437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D2: 11537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D4: 11637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D6: 11737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D8: 11837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D10: 11937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D12: 12037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D14: 12137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D16: 12237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D18: 12337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D20: 12437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D22: 12537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D24: 12637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D26: 12737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D28: 12837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::D30: 12937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q0: 13037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q2: 13137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q4: 13237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q6: 13337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q8: 13437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q10: 13537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q12: 13637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q14: 13737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q16: 13837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q18: 13937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q20: 14037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q22: 14137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q24: 14237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q26: 14337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q28: 14437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::Q30: 14537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 14637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 14737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 14837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 14937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 15037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool haveSameParity(unsigned reg1, unsigned reg2) { 15137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isFPReg(reg1) && "Expecting an FP register for reg1"); 15237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(isFPReg(reg2) && "Expecting an FP register for reg2"); 15337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 15437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return isOdd(reg1) == isOdd(reg2); 15537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 15637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 15737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 15837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 15937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd, 16037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Ra) { 16137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Rd == Ra) 16237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 16337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 16437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LiveIntervals &LIs = G.getMetadata().LIS; 16537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 16637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) { 16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd) 16837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << '\n'); 16937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra) 17037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << '\n'); 17137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return false; 17237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 17337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 17437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd); 17537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra); 17637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 17737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed = 17837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines &G.getNodeMetadata(node1).getAllowedRegs(); 17937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed = 18037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines &G.getNodeMetadata(node2).getAllowedRegs(); 18137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 18237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2); 18337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 18437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // The edge does not exist. Create one with the appropriate interference 18537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // costs. 18637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (edge == G.invalidEdgeId()) { 18737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const LiveInterval &ld = LIs.getInterval(Rd); 18837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const LiveInterval &la = LIs.getInterval(Ra); 18937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines bool livesOverlap = ld.overlaps(la); 19037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 19137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::RawMatrix costs(vRdAllowed->size() + 1, 19237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines vRaAllowed->size() + 1, 0); 19337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) { 19437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRd = (*vRdAllowed)[i]; 19537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { 19637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRa = (*vRaAllowed)[j]; 19737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (livesOverlap && TRI->regsOverlap(pRd, pRa)) 19837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); 19937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines else 20037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0; 20137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines G.addEdge(node1, node2, std::move(costs)); 20437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 20537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 20637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 20737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (G.getEdgeNode1Id(edge) == node2) { 20837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::swap(node1, node2); 20937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::swap(vRdAllowed, vRaAllowed); 21037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 21137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 21237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass)) 21337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::RawMatrix costs(G.getEdgeCosts(edge)); 21437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) { 21537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRd = (*vRdAllowed)[i]; 21637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 21737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Get the maximum cost (excluding unallocatable reg) for same parity 21837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // registers 21937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min(); 22037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { 22137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRa = (*vRaAllowed)[j]; 22237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (haveSameParity(pRd, pRa)) 22337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (costs[i + 1][j + 1] != 22437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::numeric_limits<PBQP::PBQPNum>::infinity() && 22537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines costs[i + 1][j + 1] > sameParityMax) 22637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines sameParityMax = costs[i + 1][j + 1]; 22737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 22837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 22937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Ensure all registers with a different parity have a higher cost 23037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // than sameParityMax 23137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) { 23237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRa = (*vRaAllowed)[j]; 23337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!haveSameParity(pRd, pRa)) 23437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (sameParityMax > costs[i + 1][j + 1]) 23537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines costs[i + 1][j + 1] = sameParityMax + 1.0; 23637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 23737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 238ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines G.updateEdgeCosts(edge, std::move(costs)); 23937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 24037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return true; 24137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 24237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 24337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, 24437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Ra) { 24537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LiveIntervals &LIs = G.getMetadata().LIS; 24637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 24737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Do some Chain management 24837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Chains.count(Ra)) { 24937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (Rd != Ra) { 25037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to " 25137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << PrintReg(Rd, TRI) << '\n';); 25237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Chains.remove(Ra); 25337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Chains.insert(Rd); 25437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 25537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } else { 25637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI) 25737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines << '\n';); 25837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Chains.insert(Rd); 25937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 26037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 26137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd); 26237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 26337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const LiveInterval &ld = LIs.getInterval(Rd); 26437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto r : Chains) { 26537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Skip self 26637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (r == Rd) 26737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines continue; 26837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 26937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const LiveInterval &lr = LIs.getInterval(r); 27037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (ld.overlaps(lr)) { 27137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed = 27237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines &G.getNodeMetadata(node1).getAllowedRegs(); 27337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 27437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r); 27537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed = 27637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines &G.getNodeMetadata(node2).getAllowedRegs(); 27737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 27837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2); 27937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines assert(edge != G.invalidEdgeId() && 28037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines "PBQP error ! The edge should exist !"); 28137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 28237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "Refining constraint !\n";); 28337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 28437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (G.getEdgeNode1Id(edge) == node2) { 28537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::swap(node1, node2); 28637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::swap(vRdAllowed, vRrAllowed); 28737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 28837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 28937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Enforce that cost is higher with all other Chains of the same parity 29037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQP::Matrix costs(G.getEdgeCosts(edge)); 29137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) { 29237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRd = (*vRdAllowed)[i]; 29337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 29437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Get the maximum cost (excluding unallocatable reg) for all other 29537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // parity registers 29637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min(); 29737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) { 29837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRa = (*vRrAllowed)[j]; 29937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (!haveSameParity(pRd, pRa)) 30037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (costs[i + 1][j + 1] != 30137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines std::numeric_limits<PBQP::PBQPNum>::infinity() && 30237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines costs[i + 1][j + 1] > sameParityMax) 30337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines sameParityMax = costs[i + 1][j + 1]; 30437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 30537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 30637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Ensure all registers with same parity have a higher cost 30737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // than sameParityMax 30837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) { 30937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned pRa = (*vRrAllowed)[j]; 31037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (haveSameParity(pRd, pRa)) 31137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (sameParityMax > costs[i + 1][j + 1]) 31237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines costs[i + 1][j + 1] = sameParityMax + 1.0; 31337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 31437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 315ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines G.updateEdgeCosts(edge, std::move(costs)); 31637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 31737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 31837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 31937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 32037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg, 32137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const MachineInstr &MI) { 3224c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar const LiveInterval &LI = LIs.getInterval(reg); 32337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SlotIndex SI = LIs.getInstructionIndex(&MI); 32437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines return LI.expiredAt(SI); 32537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 32637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 32737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid A57ChainingConstraint::apply(PBQPRAGraph &G) { 32837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines const MachineFunction &MF = G.getMetadata().MF; 32937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines LiveIntervals &LIs = G.getMetadata().LIS; 33037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 331ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines TRI = MF.getSubtarget().getRegisterInfo(); 33237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(MF.dump()); 33337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 33437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (const auto &MBB: MF) { 33537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Chains.clear(); // FIXME: really needed ? Could not work at MF level ? 33637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 33737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (const auto &MI: MBB) { 33837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 33937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines // Forget Chains which have expired 34037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines for (auto r : Chains) { 34137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines SmallVector<unsigned, 8> toDel; 34237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if(regJustKilledBefore(LIs, r, MI)) { 34337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at "; 34437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines MI.print(dbgs());); 34537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines toDel.push_back(r); 34637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 34737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 34837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines while (!toDel.empty()) { 34937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines Chains.remove(toDel.back()); 35037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines toDel.pop_back(); 35137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 35237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 35337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 35437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines switch (MI.getOpcode()) { 35537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FMSUBSrrr: 35637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FMADDSrrr: 35737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FNMSUBSrrr: 35837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FNMADDSrrr: 35937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FMSUBDrrr: 36037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FMADDDrrr: 36137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FNMSUBDrrr: 36237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FNMADDDrrr: { 36337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Rd = MI.getOperand(0).getReg(); 36437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Ra = MI.getOperand(3).getReg(); 36537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 36637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines if (addIntraChainConstraint(G, Rd, Ra)) 36737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines addInterChainConstraint(G, Rd, Ra); 36837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines break; 36937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 37037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 37137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FMLAv2f32: 37237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines case AArch64::FMLSv2f32: { 37337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines unsigned Rd = MI.getOperand(0).getReg(); 37437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines addInterChainConstraint(G, Rd, Rd); 37537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines break; 37637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 37737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines 37837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines default: 37937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines break; 38037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 38137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 38237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines } 38337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines} 384