137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//===-- AArch64PBQPRegAlloc.cpp - AArch64 specific PBQP constraints -------===//
237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//
337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//                     The LLVM Compiler Infrastructure
437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//
537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This file is distributed under the University of Illinois Open Source
637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// License. See LICENSE.TXT for details.
737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//
837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//===----------------------------------------------------------------------===//
937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// This file contains the AArch64 / Cortex-A57 specific register allocation
1037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// constraints for use by the PBQP register allocator.
1137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//
1237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// It is essentially a transcription of what is contained in
1337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// AArch64A57FPLoadBalancing, which tries to use a balanced
1437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// mix of odd and even D-registers when performing a critical sequence of
1537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines// independent, non-quadword FP/ASIMD floating-point multiply-accumulates.
1637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines//===----------------------------------------------------------------------===//
1737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
1837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#define DEBUG_TYPE "aarch64-pbqp"
1937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
2037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64.h"
2137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64PBQPRegAlloc.h"
2237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "AArch64RegisterInfo.h"
2337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/LiveIntervalAnalysis.h"
2437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineBasicBlock.h"
2537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineFunction.h"
2637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/MachineRegisterInfo.h"
2737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/CodeGen/RegAllocPBQP.h"
2837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/Debug.h"
2937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/ErrorHandling.h"
3037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#include "llvm/Support/raw_ostream.h"
3137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
3237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesusing namespace llvm;
3337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
3437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesnamespace {
3537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
3637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#ifndef NDEBUG
3737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool isFPReg(unsigned reg) {
3837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return AArch64::FPR32RegClass.contains(reg) ||
3937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         AArch64::FPR64RegClass.contains(reg) ||
4037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines         AArch64::FPR128RegClass.contains(reg);
4137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
4237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines#endif
4337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
4437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool isOdd(unsigned reg) {
4537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  switch (reg) {
4637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  default:
4737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    llvm_unreachable("Register is not from the expected class !");
4837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S1:
4937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S3:
5037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S5:
5137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S7:
5237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S9:
5337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S11:
5437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S13:
5537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S15:
5637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S17:
5737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S19:
5837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S21:
5937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S23:
6037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S25:
6137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S27:
6237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S29:
6337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S31:
6437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D1:
6537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D3:
6637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D5:
6737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D7:
6837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D9:
6937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D11:
7037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D13:
7137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D15:
7237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D17:
7337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D19:
7437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D21:
7537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D23:
7637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D25:
7737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D27:
7837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D29:
7937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D31:
8037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q1:
8137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q3:
8237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q5:
8337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q7:
8437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q9:
8537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q11:
8637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q13:
8737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q15:
8837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q17:
8937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q19:
9037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q21:
9137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q23:
9237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q25:
9337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q27:
9437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q29:
9537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q31:
9637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return true;
9737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S0:
9837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S2:
9937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S4:
10037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S6:
10137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S8:
10237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S10:
10337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S12:
10437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S14:
10537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S16:
10637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S18:
10737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S20:
10837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S22:
10937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S24:
11037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S26:
11137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S28:
11237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::S30:
11337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D0:
11437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D2:
11537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D4:
11637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D6:
11737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D8:
11837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D10:
11937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D12:
12037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D14:
12137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D16:
12237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D18:
12337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D20:
12437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D22:
12537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D24:
12637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D26:
12737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D28:
12837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::D30:
12937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q0:
13037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q2:
13137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q4:
13237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q6:
13337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q8:
13437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q10:
13537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q12:
13637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q14:
13737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q16:
13837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q18:
13937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q20:
14037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q22:
14137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q24:
14237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q26:
14337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q28:
14437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  case AArch64::Q30:
14537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return false;
14637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
14737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
14837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
14937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
15037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool haveSameParity(unsigned reg1, unsigned reg2) {
15137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(isFPReg(reg1) && "Expecting an FP register for reg1");
15237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  assert(isFPReg(reg2) && "Expecting an FP register for reg2");
15337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
15437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return isOdd(reg1) == isOdd(reg2);
15537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
15637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
15737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
15837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
15937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesbool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
16037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                 unsigned Ra) {
16137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (Rd == Ra)
16237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return false;
16337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
16437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  LiveIntervals &LIs = G.getMetadata().LIS;
16537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
16637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (TRI->isPhysicalRegister(Rd) || TRI->isPhysicalRegister(Ra)) {
16737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "Rd is a physical reg:" << TRI->isPhysicalRegister(Rd)
16837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          << '\n');
16937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "Ra is a physical reg:" << TRI->isPhysicalRegister(Ra)
17037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          << '\n');
17137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return false;
17237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
17337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
17437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
17537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra);
17637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
17737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
17837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    &G.getNodeMetadata(node1).getAllowedRegs();
17937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed =
18037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    &G.getNodeMetadata(node2).getAllowedRegs();
18137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
18237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
18337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
18437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // The edge does not exist. Create one with the appropriate interference
18537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // costs.
18637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (edge == G.invalidEdgeId()) {
18737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    const LiveInterval &ld = LIs.getInterval(Rd);
18837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    const LiveInterval &la = LIs.getInterval(Ra);
18937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    bool livesOverlap = ld.overlaps(la);
19037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
19137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    PBQPRAGraph::RawMatrix costs(vRdAllowed->size() + 1,
19237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                 vRaAllowed->size() + 1, 0);
19337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
19437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      unsigned pRd = (*vRdAllowed)[i];
19537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
19637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        unsigned pRa = (*vRaAllowed)[j];
19737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (livesOverlap && TRI->regsOverlap(pRd, pRa))
19837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          costs[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
19937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        else
20037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          costs[i + 1][j + 1] = haveSameParity(pRd, pRa) ? 0.0 : 1.0;
20137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
20237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
20337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    G.addEdge(node1, node2, std::move(costs));
20437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    return true;
20537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
20637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
20737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (G.getEdgeNode1Id(edge) == node2) {
20837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    std::swap(node1, node2);
20937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    std::swap(vRdAllowed, vRaAllowed);
21037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
21137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
21237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Enforce minCost(sameParity(RaClass)) > maxCost(otherParity(RdClass))
21337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  PBQPRAGraph::RawMatrix costs(G.getEdgeCosts(edge));
21437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
21537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    unsigned pRd = (*vRdAllowed)[i];
21637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
21737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Get the maximum cost (excluding unallocatable reg) for same parity
21837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // registers
21937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
22037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
22137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      unsigned pRa = (*vRaAllowed)[j];
22237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (haveSameParity(pRd, pRa))
22337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (costs[i + 1][j + 1] !=
22437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                std::numeric_limits<PBQP::PBQPNum>::infinity() &&
22537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            costs[i + 1][j + 1] > sameParityMax)
22637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          sameParityMax = costs[i + 1][j + 1];
22737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
22837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
22937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Ensure all registers with a different parity have a higher cost
23037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // than sameParityMax
23137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (unsigned j = 0, je = vRaAllowed->size(); j != je; ++j) {
23237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      unsigned pRa = (*vRaAllowed)[j];
23337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (!haveSameParity(pRd, pRa))
23437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (sameParityMax > costs[i + 1][j + 1])
23537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          costs[i + 1][j + 1] = sameParityMax + 1.0;
23637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
23737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
238ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  G.updateEdgeCosts(edge, std::move(costs));
23937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
24037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return true;
24137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
24237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
24337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
24437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                                 unsigned Ra) {
24537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  LiveIntervals &LIs = G.getMetadata().LIS;
24637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
24737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  // Do some Chain management
24837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  if (Chains.count(Ra)) {
24937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (Rd != Ra) {
25037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "Moving acc chain from " << PrintReg(Ra, TRI) << " to "
25137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                   << PrintReg(Rd, TRI) << '\n';);
25237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Chains.remove(Ra);
25337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      Chains.insert(Rd);
25437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
25537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  } else {
25637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    DEBUG(dbgs() << "Creating new acc chain for " << PrintReg(Rd, TRI)
25737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                 << '\n';);
25837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Chains.insert(Rd);
25937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
26037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
26137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
26237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
26337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  const LiveInterval &ld = LIs.getInterval(Rd);
26437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (auto r : Chains) {
26537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    // Skip self
26637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (r == Rd)
26737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      continue;
26837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
26937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    const LiveInterval &lr = LIs.getInterval(r);
27037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    if (ld.overlaps(lr)) {
27137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
27237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        &G.getNodeMetadata(node1).getAllowedRegs();
27337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
27437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r);
27537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed =
27637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        &G.getNodeMetadata(node2).getAllowedRegs();
27737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
27837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
27937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      assert(edge != G.invalidEdgeId() &&
28037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines             "PBQP error ! The edge should exist !");
28137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
28237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      DEBUG(dbgs() << "Refining constraint !\n";);
28337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
28437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      if (G.getEdgeNode1Id(edge) == node2) {
28537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        std::swap(node1, node2);
28637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        std::swap(vRdAllowed, vRrAllowed);
28737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
28837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
28937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Enforce that cost is higher with all other Chains of the same parity
29037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      PBQP::Matrix costs(G.getEdgeCosts(edge));
29137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (unsigned i = 0, ie = vRdAllowed->size(); i != ie; ++i) {
29237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        unsigned pRd = (*vRdAllowed)[i];
29337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
29437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Get the maximum cost (excluding unallocatable reg) for all other
29537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // parity registers
29637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        PBQP::PBQPNum sameParityMax = std::numeric_limits<PBQP::PBQPNum>::min();
29737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
29837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          unsigned pRa = (*vRrAllowed)[j];
29937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (!haveSameParity(pRd, pRa))
30037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            if (costs[i + 1][j + 1] !=
30137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                    std::numeric_limits<PBQP::PBQPNum>::infinity() &&
30237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                costs[i + 1][j + 1] > sameParityMax)
30337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              sameParityMax = costs[i + 1][j + 1];
30437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
30537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
30637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // Ensure all registers with same parity have a higher cost
30737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        // than sameParityMax
30837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        for (unsigned j = 0, je = vRrAllowed->size(); j != je; ++j) {
30937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          unsigned pRa = (*vRrAllowed)[j];
31037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          if (haveSameParity(pRd, pRa))
31137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines            if (sameParityMax > costs[i + 1][j + 1])
31237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines              costs[i + 1][j + 1] = sameParityMax + 1.0;
31337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
31437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
315ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines      G.updateEdgeCosts(edge, std::move(costs));
31637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
31737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
31837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
31937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
32037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesstatic bool regJustKilledBefore(const LiveIntervals &LIs, unsigned reg,
32137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                                const MachineInstr &MI) {
3224c5e43da7792f75567b693105cc53e3f1992ad98Pirama Arumuga Nainar  const LiveInterval &LI = LIs.getInterval(reg);
32337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  SlotIndex SI = LIs.getInstructionIndex(&MI);
32437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  return LI.expiredAt(SI);
32537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
32637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
32737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hinesvoid A57ChainingConstraint::apply(PBQPRAGraph &G) {
32837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  const MachineFunction &MF = G.getMetadata().MF;
32937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  LiveIntervals &LIs = G.getMetadata().LIS;
33037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
331ebe69fe11e48d322045d5949c83283927a0d790bStephen Hines  TRI = MF.getSubtarget().getRegisterInfo();
33237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  DEBUG(MF.dump());
33337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
33437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  for (const auto &MBB: MF) {
33537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    Chains.clear(); // FIXME: really needed ? Could not work at MF level ?
33637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
33737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    for (const auto &MI: MBB) {
33837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
33937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      // Forget Chains which have expired
34037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      for (auto r : Chains) {
34137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        SmallVector<unsigned, 8> toDel;
34237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if(regJustKilledBefore(LIs, r, MI)) {
34337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          DEBUG(dbgs() << "Killing chain " << PrintReg(r, TRI) << " at ";
34437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines                MI.print(dbgs()););
34537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          toDel.push_back(r);
34637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
34737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
34837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        while (!toDel.empty()) {
34937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          Chains.remove(toDel.back());
35037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          toDel.pop_back();
35137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        }
35237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
35337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
35437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      switch (MI.getOpcode()) {
35537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FMSUBSrrr:
35637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FMADDSrrr:
35737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FNMSUBSrrr:
35837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FNMADDSrrr:
35937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FMSUBDrrr:
36037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FMADDDrrr:
36137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FNMSUBDrrr:
36237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FNMADDDrrr: {
36337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        unsigned Rd = MI.getOperand(0).getReg();
36437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        unsigned Ra = MI.getOperand(3).getReg();
36537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
36637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        if (addIntraChainConstraint(G, Rd, Ra))
36737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines          addInterChainConstraint(G, Rd, Ra);
36837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        break;
36937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
37037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
37137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FMLAv2f32:
37237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      case AArch64::FMLSv2f32: {
37337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        unsigned Rd = MI.getOperand(0).getReg();
37437ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        addInterChainConstraint(G, Rd, Rd);
37537ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        break;
37637ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
37737ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines
37837ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      default:
37937ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines        break;
38037ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines      }
38137ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines    }
38237ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines  }
38337ed9c199ca639565f6ce88105f9e39e898d82d0Stephen Hines}
384