1//===--- HexagonGenInsert.cpp ---------------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10#define DEBUG_TYPE "hexinsert" 11 12#include "llvm/Pass.h" 13#include "llvm/PassRegistry.h" 14#include "llvm/ADT/BitVector.h" 15#include "llvm/ADT/DenseMap.h" 16#include "llvm/ADT/DenseSet.h" 17#include "llvm/ADT/PostOrderIterator.h" 18#include "llvm/CodeGen/MachineDominators.h" 19#include "llvm/CodeGen/MachineFunction.h" 20#include "llvm/CodeGen/MachineFunctionPass.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/MachineRegisterInfo.h" 23#include "llvm/IR/Constants.h" 24#include "llvm/Support/CommandLine.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/raw_ostream.h" 27#include "llvm/Support/Timer.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/Target/TargetRegisterInfo.h" 30 31#include "Hexagon.h" 32#include "HexagonRegisterInfo.h" 33#include "HexagonTargetMachine.h" 34#include "HexagonBitTracker.h" 35 36#include <map> 37#include <vector> 38 39using namespace llvm; 40 41static cl::opt<unsigned> VRegIndexCutoff("insert-vreg-cutoff", cl::init(~0U), 42 cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg# cutoff for insert generation.")); 43// The distance cutoff is selected based on the precheckin-perf results: 44// cutoffs 20, 25, 35, and 40 are worse than 30. 45static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), 46 cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " 47 "generation.")); 48 49static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, 50 cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); 51static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), 52 cl::Hidden, cl::ZeroOrMore, cl::desc("Enable detailed timing of insert " 53 "generation")); 54 55static cl::opt<bool> OptSelectAll0("insert-all0", cl::init(false), cl::Hidden, 56 cl::ZeroOrMore); 57static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden, 58 cl::ZeroOrMore); 59// Whether to construct constant values via "insert". Could eliminate constant 60// extenders, but often not practical. 61static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden, 62 cl::ZeroOrMore); 63 64namespace { 65 // The preprocessor gets confused when the DEBUG macro is passed larger 66 // chunks of code. Use this function to detect debugging. 67 inline bool isDebug() { 68#ifndef NDEBUG 69 return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE); 70#else 71 return false; 72#endif 73 } 74} 75 76 77namespace { 78 // Set of virtual registers, based on BitVector. 79 struct RegisterSet : private BitVector { 80 RegisterSet() = default; 81 explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} 82 83 using BitVector::clear; 84 85 unsigned find_first() const { 86 int First = BitVector::find_first(); 87 if (First < 0) 88 return 0; 89 return x2v(First); 90 } 91 92 unsigned find_next(unsigned Prev) const { 93 int Next = BitVector::find_next(v2x(Prev)); 94 if (Next < 0) 95 return 0; 96 return x2v(Next); 97 } 98 99 RegisterSet &insert(unsigned R) { 100 unsigned Idx = v2x(R); 101 ensure(Idx); 102 return static_cast<RegisterSet&>(BitVector::set(Idx)); 103 } 104 RegisterSet &remove(unsigned R) { 105 unsigned Idx = v2x(R); 106 if (Idx >= size()) 107 return *this; 108 return static_cast<RegisterSet&>(BitVector::reset(Idx)); 109 } 110 111 RegisterSet &insert(const RegisterSet &Rs) { 112 return static_cast<RegisterSet&>(BitVector::operator|=(Rs)); 113 } 114 RegisterSet &remove(const RegisterSet &Rs) { 115 return static_cast<RegisterSet&>(BitVector::reset(Rs)); 116 } 117 118 reference operator[](unsigned R) { 119 unsigned Idx = v2x(R); 120 ensure(Idx); 121 return BitVector::operator[](Idx); 122 } 123 bool operator[](unsigned R) const { 124 unsigned Idx = v2x(R); 125 assert(Idx < size()); 126 return BitVector::operator[](Idx); 127 } 128 bool has(unsigned R) const { 129 unsigned Idx = v2x(R); 130 if (Idx >= size()) 131 return false; 132 return BitVector::test(Idx); 133 } 134 135 bool empty() const { 136 return !BitVector::any(); 137 } 138 bool includes(const RegisterSet &Rs) const { 139 // A.BitVector::test(B) <=> A-B != {} 140 return !Rs.BitVector::test(*this); 141 } 142 bool intersects(const RegisterSet &Rs) const { 143 return BitVector::anyCommon(Rs); 144 } 145 146 private: 147 void ensure(unsigned Idx) { 148 if (size() <= Idx) 149 resize(std::max(Idx+1, 32U)); 150 } 151 static inline unsigned v2x(unsigned v) { 152 return TargetRegisterInfo::virtReg2Index(v); 153 } 154 static inline unsigned x2v(unsigned x) { 155 return TargetRegisterInfo::index2VirtReg(x); 156 } 157 }; 158 159 160 struct PrintRegSet { 161 PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI) 162 : RS(S), TRI(RI) {} 163 friend raw_ostream &operator<< (raw_ostream &OS, 164 const PrintRegSet &P); 165 private: 166 const RegisterSet &RS; 167 const TargetRegisterInfo *TRI; 168 }; 169 170 raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) { 171 OS << '{'; 172 for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R)) 173 OS << ' ' << PrintReg(R, P.TRI); 174 OS << " }"; 175 return OS; 176 } 177} 178 179 180namespace { 181 // A convenience class to associate unsigned numbers (such as virtual 182 // registers) with unsigned numbers. 183 struct UnsignedMap : public DenseMap<unsigned,unsigned> { 184 UnsignedMap() : BaseType() {} 185 private: 186 typedef DenseMap<unsigned,unsigned> BaseType; 187 }; 188 189 // A utility to establish an ordering between virtual registers: 190 // VRegA < VRegB <=> RegisterOrdering[VRegA] < RegisterOrdering[VRegB] 191 // This is meant as a cache for the ordering of virtual registers defined 192 // by a potentially expensive comparison function, or obtained by a proce- 193 // dure that should not be repeated each time two registers are compared. 194 struct RegisterOrdering : public UnsignedMap { 195 RegisterOrdering() : UnsignedMap() {} 196 unsigned operator[](unsigned VR) const { 197 const_iterator F = find(VR); 198 assert(F != end()); 199 return F->second; 200 } 201 // Add operator(), so that objects of this class can be used as 202 // comparators in std::sort et al. 203 bool operator() (unsigned VR1, unsigned VR2) const { 204 return operator[](VR1) < operator[](VR2); 205 } 206 }; 207} 208 209 210namespace { 211 // Ordering of bit values. This class does not have operator[], but 212 // is supplies a comparison operator() for use in std:: algorithms. 213 // The order is as follows: 214 // - 0 < 1 < ref 215 // - ref1 < ref2, if ord(ref1.Reg) < ord(ref2.Reg), 216 // or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos. 217 struct BitValueOrdering { 218 BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {} 219 bool operator() (const BitTracker::BitValue &V1, 220 const BitTracker::BitValue &V2) const; 221 const RegisterOrdering &BaseOrd; 222 }; 223} 224 225 226bool BitValueOrdering::operator() (const BitTracker::BitValue &V1, 227 const BitTracker::BitValue &V2) const { 228 if (V1 == V2) 229 return false; 230 // V1==0 => true, V2==0 => false 231 if (V1.is(0) || V2.is(0)) 232 return V1.is(0); 233 // Neither of V1,V2 is 0, and V1!=V2. 234 // V2==1 => false, V1==1 => true 235 if (V2.is(1) || V1.is(1)) 236 return !V2.is(1); 237 // Both V1,V2 are refs. 238 unsigned Ind1 = BaseOrd[V1.RefI.Reg], Ind2 = BaseOrd[V2.RefI.Reg]; 239 if (Ind1 != Ind2) 240 return Ind1 < Ind2; 241 // If V1.Pos==V2.Pos 242 assert(V1.RefI.Pos != V2.RefI.Pos && "Bit values should be different"); 243 return V1.RefI.Pos < V2.RefI.Pos; 244} 245 246 247namespace { 248 // Cache for the BitTracker's cell map. Map lookup has a logarithmic 249 // complexity, this class will memoize the lookup results to reduce 250 // the access time for repeated lookups of the same cell. 251 struct CellMapShadow { 252 CellMapShadow(const BitTracker &T) : BT(T) {} 253 const BitTracker::RegisterCell &lookup(unsigned VR) { 254 unsigned RInd = TargetRegisterInfo::virtReg2Index(VR); 255 // Grow the vector to at least 32 elements. 256 if (RInd >= CVect.size()) 257 CVect.resize(std::max(RInd+16, 32U), 0); 258 const BitTracker::RegisterCell *CP = CVect[RInd]; 259 if (CP == 0) 260 CP = CVect[RInd] = &BT.lookup(VR); 261 return *CP; 262 } 263 264 const BitTracker &BT; 265 266 private: 267 typedef std::vector<const BitTracker::RegisterCell*> CellVectType; 268 CellVectType CVect; 269 }; 270} 271 272 273namespace { 274 // Comparator class for lexicographic ordering of virtual registers 275 // according to the corresponding BitTracker::RegisterCell objects. 276 struct RegisterCellLexCompare { 277 RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M) 278 : BitOrd(BO), CM(M) {} 279 bool operator() (unsigned VR1, unsigned VR2) const; 280 private: 281 const BitValueOrdering &BitOrd; 282 CellMapShadow &CM; 283 }; 284 285 // Comparator class for lexicographic ordering of virtual registers 286 // according to the specified bits of the corresponding BitTracker:: 287 // RegisterCell objects. 288 // Specifically, this class will be used to compare bit B of a register 289 // cell for a selected virtual register R with bit N of any register 290 // other than R. 291 struct RegisterCellBitCompareSel { 292 RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N, 293 const BitValueOrdering &BO, CellMapShadow &M) 294 : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {} 295 bool operator() (unsigned VR1, unsigned VR2) const; 296 private: 297 const unsigned SelR, SelB; 298 const unsigned BitN; 299 const BitValueOrdering &BitOrd; 300 CellMapShadow &CM; 301 }; 302} 303 304 305bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const { 306 // Ordering of registers, made up from two given orderings: 307 // - the ordering of the register numbers, and 308 // - the ordering of register cells. 309 // Def. R1 < R2 if: 310 // - cell(R1) < cell(R2), or 311 // - cell(R1) == cell(R2), and index(R1) < index(R2). 312 // 313 // For register cells, the ordering is lexicographic, with index 0 being 314 // the most significant. 315 if (VR1 == VR2) 316 return false; 317 318 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1), &RC2 = CM.lookup(VR2); 319 uint16_t W1 = RC1.width(), W2 = RC2.width(); 320 for (uint16_t i = 0, w = std::min(W1, W2); i < w; ++i) { 321 const BitTracker::BitValue &V1 = RC1[i], &V2 = RC2[i]; 322 if (V1 != V2) 323 return BitOrd(V1, V2); 324 } 325 // Cells are equal up until the common length. 326 if (W1 != W2) 327 return W1 < W2; 328 329 return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2]; 330} 331 332 333bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const { 334 if (VR1 == VR2) 335 return false; 336 const BitTracker::RegisterCell &RC1 = CM.lookup(VR1); 337 const BitTracker::RegisterCell &RC2 = CM.lookup(VR2); 338 uint16_t W1 = RC1.width(), W2 = RC2.width(); 339 uint16_t Bit1 = (VR1 == SelR) ? SelB : BitN; 340 uint16_t Bit2 = (VR2 == SelR) ? SelB : BitN; 341 // If Bit1 exceeds the width of VR1, then: 342 // - return false, if at the same time Bit2 exceeds VR2, or 343 // - return true, otherwise. 344 // (I.e. "a bit value that does not exist is less than any bit value 345 // that does exist".) 346 if (W1 <= Bit1) 347 return Bit2 < W2; 348 // If Bit1 is within VR1, but Bit2 is not within VR2, return false. 349 if (W2 <= Bit2) 350 return false; 351 352 const BitTracker::BitValue &V1 = RC1[Bit1], V2 = RC2[Bit2]; 353 if (V1 != V2) 354 return BitOrd(V1, V2); 355 return false; 356} 357 358 359namespace { 360 class OrderedRegisterList { 361 typedef std::vector<unsigned> ListType; 362 public: 363 OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} 364 void insert(unsigned VR); 365 void remove(unsigned VR); 366 unsigned operator[](unsigned Idx) const { 367 assert(Idx < Seq.size()); 368 return Seq[Idx]; 369 } 370 unsigned size() const { 371 return Seq.size(); 372 } 373 374 typedef ListType::iterator iterator; 375 typedef ListType::const_iterator const_iterator; 376 iterator begin() { return Seq.begin(); } 377 iterator end() { return Seq.end(); } 378 const_iterator begin() const { return Seq.begin(); } 379 const_iterator end() const { return Seq.end(); } 380 381 // Convenience function to convert an iterator to the corresponding index. 382 unsigned idx(iterator It) const { return It-begin(); } 383 private: 384 ListType Seq; 385 const RegisterOrdering &Ord; 386 }; 387 388 389 struct PrintORL { 390 PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI) 391 : RL(L), TRI(RI) {} 392 friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P); 393 private: 394 const OrderedRegisterList &RL; 395 const TargetRegisterInfo *TRI; 396 }; 397 398 raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P) { 399 OS << '('; 400 OrderedRegisterList::const_iterator B = P.RL.begin(), E = P.RL.end(); 401 for (OrderedRegisterList::const_iterator I = B; I != E; ++I) { 402 if (I != B) 403 OS << ", "; 404 OS << PrintReg(*I, P.TRI); 405 } 406 OS << ')'; 407 return OS; 408 } 409} 410 411 412void OrderedRegisterList::insert(unsigned VR) { 413 iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); 414 if (L == Seq.end()) 415 Seq.push_back(VR); 416 else 417 Seq.insert(L, VR); 418} 419 420 421void OrderedRegisterList::remove(unsigned VR) { 422 iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); 423 assert(L != Seq.end()); 424 Seq.erase(L); 425} 426 427 428namespace { 429 // A record of the insert form. The fields correspond to the operands 430 // of the "insert" instruction: 431 // ... = insert(SrcR, InsR, #Wdh, #Off) 432 struct IFRecord { 433 IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0) 434 : SrcR(SR), InsR(IR), Wdh(W), Off(O) {} 435 unsigned SrcR, InsR; 436 uint16_t Wdh, Off; 437 }; 438 439 struct PrintIFR { 440 PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI) 441 : IFR(R), TRI(RI) {} 442 private: 443 const IFRecord &IFR; 444 const TargetRegisterInfo *TRI; 445 friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P); 446 }; 447 448 raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) { 449 unsigned SrcR = P.IFR.SrcR, InsR = P.IFR.InsR; 450 OS << '(' << PrintReg(SrcR, P.TRI) << ',' << PrintReg(InsR, P.TRI) 451 << ",#" << P.IFR.Wdh << ",#" << P.IFR.Off << ')'; 452 return OS; 453 } 454 455 typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet; 456} 457 458 459namespace llvm { 460 void initializeHexagonGenInsertPass(PassRegistry&); 461 FunctionPass *createHexagonGenInsert(); 462} 463 464 465namespace { 466 class HexagonGenInsert : public MachineFunctionPass { 467 public: 468 static char ID; 469 HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) { 470 initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry()); 471 } 472 virtual const char *getPassName() const { 473 return "Hexagon generate \"insert\" instructions"; 474 } 475 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 476 AU.addRequired<MachineDominatorTree>(); 477 AU.addPreserved<MachineDominatorTree>(); 478 MachineFunctionPass::getAnalysisUsage(AU); 479 } 480 virtual bool runOnMachineFunction(MachineFunction &MF); 481 482 private: 483 typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType; 484 485 void buildOrderingMF(RegisterOrdering &RO) const; 486 void buildOrderingBT(RegisterOrdering &RB, RegisterOrdering &RO) const; 487 bool isIntClass(const TargetRegisterClass *RC) const; 488 bool isConstant(unsigned VR) const; 489 bool isSmallConstant(unsigned VR) const; 490 bool isValidInsertForm(unsigned DstR, unsigned SrcR, unsigned InsR, 491 uint16_t L, uint16_t S) const; 492 bool findSelfReference(unsigned VR) const; 493 bool findNonSelfReference(unsigned VR) const; 494 void getInstrDefs(const MachineInstr *MI, RegisterSet &Defs) const; 495 void getInstrUses(const MachineInstr *MI, RegisterSet &Uses) const; 496 unsigned distance(const MachineBasicBlock *FromB, 497 const MachineBasicBlock *ToB, const UnsignedMap &RPO, 498 PairMapType &M) const; 499 unsigned distance(MachineBasicBlock::const_iterator FromI, 500 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 501 PairMapType &M) const; 502 bool findRecordInsertForms(unsigned VR, OrderedRegisterList &AVs); 503 void collectInBlock(MachineBasicBlock *B, OrderedRegisterList &AVs); 504 void findRemovableRegisters(unsigned VR, IFRecord IF, 505 RegisterSet &RMs) const; 506 void computeRemovableRegisters(); 507 508 void pruneEmptyLists(); 509 void pruneCoveredSets(unsigned VR); 510 void pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, PairMapType &M); 511 void pruneRegCopies(unsigned VR); 512 void pruneCandidates(); 513 void selectCandidates(); 514 bool generateInserts(); 515 516 bool removeDeadCode(MachineDomTreeNode *N); 517 518 // IFRecord coupled with a set of potentially removable registers: 519 typedef std::vector<IFRecordWithRegSet> IFListType; 520 typedef DenseMap<unsigned,IFListType> IFMapType; // vreg -> IFListType 521 522 void dump_map() const; 523 524 const HexagonInstrInfo *HII; 525 const HexagonRegisterInfo *HRI; 526 527 MachineFunction *MFN; 528 MachineRegisterInfo *MRI; 529 MachineDominatorTree *MDT; 530 CellMapShadow *CMS; 531 532 RegisterOrdering BaseOrd; 533 RegisterOrdering CellOrd; 534 IFMapType IFMap; 535 }; 536 537 char HexagonGenInsert::ID = 0; 538} 539 540 541void HexagonGenInsert::dump_map() const { 542 typedef IFMapType::const_iterator iterator; 543 for (iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 544 dbgs() << " " << PrintReg(I->first, HRI) << ":\n"; 545 const IFListType &LL = I->second; 546 for (unsigned i = 0, n = LL.size(); i < n; ++i) 547 dbgs() << " " << PrintIFR(LL[i].first, HRI) << ", " 548 << PrintRegSet(LL[i].second, HRI) << '\n'; 549 } 550} 551 552 553void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const { 554 unsigned Index = 0; 555 typedef MachineFunction::const_iterator mf_iterator; 556 for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) { 557 const MachineBasicBlock &B = *A; 558 if (!CMS->BT.reached(&B)) 559 continue; 560 typedef MachineBasicBlock::const_iterator mb_iterator; 561 for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) { 562 const MachineInstr *MI = &*I; 563 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 564 const MachineOperand &MO = MI->getOperand(i); 565 if (MO.isReg() && MO.isDef()) { 566 unsigned R = MO.getReg(); 567 assert(MO.getSubReg() == 0 && "Unexpected subregister in definition"); 568 if (TargetRegisterInfo::isVirtualRegister(R)) 569 RO.insert(std::make_pair(R, Index++)); 570 } 571 } 572 } 573 } 574 // Since some virtual registers may have had their def and uses eliminated, 575 // they are no longer referenced in the code, and so they will not appear 576 // in the map. 577} 578 579 580void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, 581 RegisterOrdering &RO) const { 582 // Create a vector of all virtual registers (collect them from the base 583 // ordering RB), and then sort it using the RegisterCell comparator. 584 BitValueOrdering BVO(RB); 585 RegisterCellLexCompare LexCmp(BVO, *CMS); 586 typedef std::vector<unsigned> SortableVectorType; 587 SortableVectorType VRs; 588 for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) 589 VRs.push_back(I->first); 590 std::sort(VRs.begin(), VRs.end(), LexCmp); 591 // Transfer the results to the outgoing register ordering. 592 for (unsigned i = 0, n = VRs.size(); i < n; ++i) 593 RO.insert(std::make_pair(VRs[i], i)); 594} 595 596 597inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const { 598 return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass; 599} 600 601 602bool HexagonGenInsert::isConstant(unsigned VR) const { 603 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 604 uint16_t W = RC.width(); 605 for (uint16_t i = 0; i < W; ++i) { 606 const BitTracker::BitValue &BV = RC[i]; 607 if (BV.is(0) || BV.is(1)) 608 continue; 609 return false; 610 } 611 return true; 612} 613 614 615bool HexagonGenInsert::isSmallConstant(unsigned VR) const { 616 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 617 uint16_t W = RC.width(); 618 if (W > 64) 619 return false; 620 uint64_t V = 0, B = 1; 621 for (uint16_t i = 0; i < W; ++i) { 622 const BitTracker::BitValue &BV = RC[i]; 623 if (BV.is(1)) 624 V |= B; 625 else if (!BV.is(0)) 626 return false; 627 B <<= 1; 628 } 629 630 // For 32-bit registers, consider: Rd = #s16. 631 if (W == 32) 632 return isInt<16>(V); 633 634 // For 64-bit registers, it's Rdd = #s8 or Rdd = combine(#s8,#s8) 635 return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V)); 636} 637 638 639bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR, 640 unsigned InsR, uint16_t L, uint16_t S) const { 641 const TargetRegisterClass *DstRC = MRI->getRegClass(DstR); 642 const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcR); 643 const TargetRegisterClass *InsRC = MRI->getRegClass(InsR); 644 // Only integet (32-/64-bit) register classes. 645 if (!isIntClass(DstRC) || !isIntClass(SrcRC) || !isIntClass(InsRC)) 646 return false; 647 // The "source" register must be of the same class as DstR. 648 if (DstRC != SrcRC) 649 return false; 650 if (DstRC == InsRC) 651 return true; 652 // A 64-bit register can only be generated from other 64-bit registers. 653 if (DstRC == &Hexagon::DoubleRegsRegClass) 654 return false; 655 // Otherwise, the L and S cannot span 32-bit word boundary. 656 if (S < 32 && S+L > 32) 657 return false; 658 return true; 659} 660 661 662bool HexagonGenInsert::findSelfReference(unsigned VR) const { 663 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 664 for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 665 const BitTracker::BitValue &V = RC[i]; 666 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg == VR) 667 return true; 668 } 669 return false; 670} 671 672 673bool HexagonGenInsert::findNonSelfReference(unsigned VR) const { 674 BitTracker::RegisterCell RC = CMS->lookup(VR); 675 for (uint16_t i = 0, w = RC.width(); i < w; ++i) { 676 const BitTracker::BitValue &V = RC[i]; 677 if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != VR) 678 return true; 679 } 680 return false; 681} 682 683 684void HexagonGenInsert::getInstrDefs(const MachineInstr *MI, 685 RegisterSet &Defs) const { 686 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 687 const MachineOperand &MO = MI->getOperand(i); 688 if (!MO.isReg() || !MO.isDef()) 689 continue; 690 unsigned R = MO.getReg(); 691 if (!TargetRegisterInfo::isVirtualRegister(R)) 692 continue; 693 Defs.insert(R); 694 } 695} 696 697 698void HexagonGenInsert::getInstrUses(const MachineInstr *MI, 699 RegisterSet &Uses) const { 700 for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) { 701 const MachineOperand &MO = MI->getOperand(i); 702 if (!MO.isReg() || !MO.isUse()) 703 continue; 704 unsigned R = MO.getReg(); 705 if (!TargetRegisterInfo::isVirtualRegister(R)) 706 continue; 707 Uses.insert(R); 708 } 709} 710 711 712unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB, 713 const MachineBasicBlock *ToB, const UnsignedMap &RPO, 714 PairMapType &M) const { 715 // Forward distance from the end of a block to the beginning of it does 716 // not make sense. This function should not be called with FromB == ToB. 717 assert(FromB != ToB); 718 719 unsigned FromN = FromB->getNumber(), ToN = ToB->getNumber(); 720 // If we have already computed it, return the cached result. 721 PairMapType::iterator F = M.find(std::make_pair(FromN, ToN)); 722 if (F != M.end()) 723 return F->second; 724 unsigned ToRPO = RPO.lookup(ToN); 725 726 unsigned MaxD = 0; 727 typedef MachineBasicBlock::const_pred_iterator pred_iterator; 728 for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) { 729 const MachineBasicBlock *PB = *I; 730 // Skip back edges. Also, if FromB is a predecessor of ToB, the distance 731 // along that path will be 0, and we don't need to do any calculations 732 // on it. 733 if (PB == FromB || RPO.lookup(PB->getNumber()) >= ToRPO) 734 continue; 735 unsigned D = PB->size() + distance(FromB, PB, RPO, M); 736 if (D > MaxD) 737 MaxD = D; 738 } 739 740 // Memoize the result for later lookup. 741 M.insert(std::make_pair(std::make_pair(FromN, ToN), MaxD)); 742 return MaxD; 743} 744 745 746unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI, 747 MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO, 748 PairMapType &M) const { 749 const MachineBasicBlock *FB = FromI->getParent(), *TB = ToI->getParent(); 750 if (FB == TB) 751 return std::distance(FromI, ToI); 752 unsigned D1 = std::distance(TB->begin(), ToI); 753 unsigned D2 = distance(FB, TB, RPO, M); 754 unsigned D3 = std::distance(FromI, FB->end()); 755 return D1+D2+D3; 756} 757 758 759bool HexagonGenInsert::findRecordInsertForms(unsigned VR, 760 OrderedRegisterList &AVs) { 761 if (isDebug()) { 762 dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI) 763 << " AVs: " << PrintORL(AVs, HRI) << "\n"; 764 } 765 if (AVs.size() == 0) 766 return false; 767 768 typedef OrderedRegisterList::iterator iterator; 769 BitValueOrdering BVO(BaseOrd); 770 const BitTracker::RegisterCell &RC = CMS->lookup(VR); 771 uint16_t W = RC.width(); 772 773 typedef std::pair<unsigned,uint16_t> RSRecord; // (reg,shift) 774 typedef std::vector<RSRecord> RSListType; 775 // Have a map, with key being the matching prefix length, and the value 776 // being the list of pairs (R,S), where R's prefix matches VR at S. 777 // (DenseMap<uint16_t,RSListType> fails to instantiate.) 778 typedef DenseMap<unsigned,RSListType> LRSMapType; 779 LRSMapType LM; 780 781 // Conceptually, rotate the cell RC right (i.e. towards the LSB) by S, 782 // and find matching prefixes from AVs with the rotated RC. Such a prefix 783 // would match a string of bits (of length L) in RC starting at S. 784 for (uint16_t S = 0; S < W; ++S) { 785 iterator B = AVs.begin(), E = AVs.end(); 786 // The registers in AVs are ordered according to the lexical order of 787 // the corresponding register cells. This means that the range of regis- 788 // ters in AVs that match a prefix of length L+1 will be contained in 789 // the range that matches a prefix of length L. This means that we can 790 // keep narrowing the search space as the prefix length goes up. This 791 // helps reduce the overall complexity of the search. 792 uint16_t L; 793 for (L = 0; L < W-S; ++L) { 794 // Compare against VR's bits starting at S, which emulates rotation 795 // of VR by S. 796 RegisterCellBitCompareSel RCB(VR, S+L, L, BVO, *CMS); 797 iterator NewB = std::lower_bound(B, E, VR, RCB); 798 iterator NewE = std::upper_bound(NewB, E, VR, RCB); 799 // For the registers that are eliminated from the next range, L is 800 // the longest prefix matching VR at position S (their prefixes 801 // differ from VR at S+L). If L>0, record this information for later 802 // use. 803 if (L > 0) { 804 for (iterator I = B; I != NewB; ++I) 805 LM[L].push_back(std::make_pair(*I, S)); 806 for (iterator I = NewE; I != E; ++I) 807 LM[L].push_back(std::make_pair(*I, S)); 808 } 809 B = NewB, E = NewE; 810 if (B == E) 811 break; 812 } 813 // Record the final register range. If this range is non-empty, then 814 // L=W-S. 815 assert(B == E || L == W-S); 816 if (B != E) { 817 for (iterator I = B; I != E; ++I) 818 LM[L].push_back(std::make_pair(*I, S)); 819 // If B!=E, then we found a range of registers whose prefixes cover the 820 // rest of VR from position S. There is no need to further advance S. 821 break; 822 } 823 } 824 825 if (isDebug()) { 826 dbgs() << "Prefixes matching register " << PrintReg(VR, HRI) << "\n"; 827 for (LRSMapType::iterator I = LM.begin(), E = LM.end(); I != E; ++I) { 828 dbgs() << " L=" << I->first << ':'; 829 const RSListType &LL = I->second; 830 for (unsigned i = 0, n = LL.size(); i < n; ++i) 831 dbgs() << " (" << PrintReg(LL[i].first, HRI) << ",@" 832 << LL[i].second << ')'; 833 dbgs() << '\n'; 834 } 835 } 836 837 838 bool Recorded = false; 839 840 for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) { 841 unsigned SrcR = *I; 842 int FDi = -1, LDi = -1; // First/last different bit. 843 const BitTracker::RegisterCell &AC = CMS->lookup(SrcR); 844 uint16_t AW = AC.width(); 845 for (uint16_t i = 0, w = std::min(W, AW); i < w; ++i) { 846 if (RC[i] == AC[i]) 847 continue; 848 if (FDi == -1) 849 FDi = i; 850 LDi = i; 851 } 852 if (FDi == -1) 853 continue; // TODO (future): Record identical registers. 854 // Look for a register whose prefix could patch the range [FD..LD] 855 // where VR and SrcR differ. 856 uint16_t FD = FDi, LD = LDi; // Switch to unsigned type. 857 uint16_t MinL = LD-FD+1; 858 for (uint16_t L = MinL; L < W; ++L) { 859 LRSMapType::iterator F = LM.find(L); 860 if (F == LM.end()) 861 continue; 862 RSListType &LL = F->second; 863 for (unsigned i = 0, n = LL.size(); i < n; ++i) { 864 uint16_t S = LL[i].second; 865 // MinL is the minimum length of the prefix. Any length above MinL 866 // allows some flexibility as to where the prefix can start: 867 // given the extra length EL=L-MinL, the prefix must start between 868 // max(0,FD-EL) and FD. 869 if (S > FD) // Starts too late. 870 continue; 871 uint16_t EL = L-MinL; 872 uint16_t LowS = (EL < FD) ? FD-EL : 0; 873 if (S < LowS) // Starts too early. 874 continue; 875 unsigned InsR = LL[i].first; 876 if (!isValidInsertForm(VR, SrcR, InsR, L, S)) 877 continue; 878 if (isDebug()) { 879 dbgs() << PrintReg(VR, HRI) << " = insert(" << PrintReg(SrcR, HRI) 880 << ',' << PrintReg(InsR, HRI) << ",#" << L << ",#" 881 << S << ")\n"; 882 } 883 IFRecordWithRegSet RR(IFRecord(SrcR, InsR, L, S), RegisterSet()); 884 IFMap[VR].push_back(RR); 885 Recorded = true; 886 } 887 } 888 } 889 890 return Recorded; 891} 892 893 894void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, 895 OrderedRegisterList &AVs) { 896 if (isDebug()) 897 dbgs() << "visiting block BB#" << B->getNumber() << "\n"; 898 899 // First, check if this block is reachable at all. If not, the bit tracker 900 // will not have any information about registers in it. 901 if (!CMS->BT.reached(B)) 902 return; 903 904 bool DoConst = OptConst; 905 // Keep a separate set of registers defined in this block, so that we 906 // can remove them from the list of available registers once all DT 907 // successors have been processed. 908 RegisterSet BlockDefs, InsDefs; 909 for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) { 910 MachineInstr *MI = &*I; 911 InsDefs.clear(); 912 getInstrDefs(MI, InsDefs); 913 // Leave those alone. They are more transparent than "insert". 914 bool Skip = MI->isCopy() || MI->isRegSequence(); 915 916 if (!Skip) { 917 // Visit all defined registers, and attempt to find the corresponding 918 // "insert" representations. 919 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) { 920 // Do not collect registers that are known to be compile-time cons- 921 // tants, unless requested. 922 if (!DoConst && isConstant(VR)) 923 continue; 924 // If VR's cell contains a reference to VR, then VR cannot be defined 925 // via "insert". If VR is a constant that can be generated in a single 926 // instruction (without constant extenders), generating it via insert 927 // makes no sense. 928 if (findSelfReference(VR) || isSmallConstant(VR)) 929 continue; 930 931 findRecordInsertForms(VR, AVs); 932 } 933 } 934 935 // Insert the defined registers into the list of available registers 936 // after they have been processed. 937 for (unsigned VR = InsDefs.find_first(); VR; VR = InsDefs.find_next(VR)) 938 AVs.insert(VR); 939 BlockDefs.insert(InsDefs); 940 } 941 942 MachineDomTreeNode *N = MDT->getNode(B); 943 typedef GraphTraits<MachineDomTreeNode*> GTN; 944 typedef GTN::ChildIteratorType ChildIter; 945 for (ChildIter I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) { 946 MachineBasicBlock *SB = (*I)->getBlock(); 947 collectInBlock(SB, AVs); 948 } 949 950 for (unsigned VR = BlockDefs.find_first(); VR; VR = BlockDefs.find_next(VR)) 951 AVs.remove(VR); 952} 953 954 955void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF, 956 RegisterSet &RMs) const { 957 // For a given register VR and a insert form, find the registers that are 958 // used by the current definition of VR, and which would no longer be 959 // needed for it after the definition of VR is replaced with the insert 960 // form. These are the registers that could potentially become dead. 961 RegisterSet Regs[2]; 962 963 unsigned S = 0; // Register set selector. 964 Regs[S].insert(VR); 965 966 while (!Regs[S].empty()) { 967 // Breadth-first search. 968 unsigned OtherS = 1-S; 969 Regs[OtherS].clear(); 970 for (unsigned R = Regs[S].find_first(); R; R = Regs[S].find_next(R)) { 971 Regs[S].remove(R); 972 if (R == IF.SrcR || R == IF.InsR) 973 continue; 974 // Check if a given register has bits that are references to any other 975 // registers. This is to detect situations where the instruction that 976 // defines register R takes register Q as an operand, but R itself does 977 // not contain any bits from Q. Loads are examples of how this could 978 // happen: 979 // R = load Q 980 // In this case (assuming we do not have any knowledge about the loaded 981 // value), we must not treat R as a "conveyance" of the bits from Q. 982 // (The information in BT about R's bits would have them as constants, 983 // in case of zero-extending loads, or refs to R.) 984 if (!findNonSelfReference(R)) 985 continue; 986 RMs.insert(R); 987 const MachineInstr *DefI = MRI->getVRegDef(R); 988 assert(DefI); 989 // Do not iterate past PHI nodes to avoid infinite loops. This can 990 // make the final set a bit less accurate, but the removable register 991 // sets are an approximation anyway. 992 if (DefI->isPHI()) 993 continue; 994 getInstrUses(DefI, Regs[OtherS]); 995 } 996 S = OtherS; 997 } 998 // The register VR is added to the list as a side-effect of the algorithm, 999 // but it is not "potentially removable". A potentially removable register 1000 // is one that may become unused (dead) after conversion to the insert form 1001 // IF, and obviously VR (or its replacement) will not become dead by apply- 1002 // ing IF. 1003 RMs.remove(VR); 1004} 1005 1006 1007void HexagonGenInsert::computeRemovableRegisters() { 1008 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1009 IFListType &LL = I->second; 1010 for (unsigned i = 0, n = LL.size(); i < n; ++i) 1011 findRemovableRegisters(I->first, LL[i].first, LL[i].second); 1012 } 1013} 1014 1015 1016void HexagonGenInsert::pruneEmptyLists() { 1017 // Remove all entries from the map, where the register has no insert forms 1018 // associated with it. 1019 typedef SmallVector<IFMapType::iterator,16> IterListType; 1020 IterListType Prune; 1021 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1022 if (I->second.size() == 0) 1023 Prune.push_back(I); 1024 } 1025 for (unsigned i = 0, n = Prune.size(); i < n; ++i) 1026 IFMap.erase(Prune[i]); 1027} 1028 1029 1030void HexagonGenInsert::pruneCoveredSets(unsigned VR) { 1031 IFMapType::iterator F = IFMap.find(VR); 1032 assert(F != IFMap.end()); 1033 IFListType &LL = F->second; 1034 1035 // First, examine the IF candidates for register VR whose removable-regis- 1036 // ter sets are empty. This means that a given candidate will not help eli- 1037 // minate any registers, but since "insert" is not a constant-extendable 1038 // instruction, using such a candidate may reduce code size if the defini- 1039 // tion of VR is constant-extended. 1040 // If there exists a candidate with a non-empty set, the ones with empty 1041 // sets will not be used and can be removed. 1042 MachineInstr *DefVR = MRI->getVRegDef(VR); 1043 bool DefEx = HII->isConstExtended(DefVR); 1044 bool HasNE = false; 1045 for (unsigned i = 0, n = LL.size(); i < n; ++i) { 1046 if (LL[i].second.empty()) 1047 continue; 1048 HasNE = true; 1049 break; 1050 } 1051 if (!DefEx || HasNE) { 1052 // The definition of VR is not constant-extended, or there is a candidate 1053 // with a non-empty set. Remove all candidates with empty sets. 1054 auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool { 1055 return IR.second.empty(); 1056 }; 1057 auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty); 1058 if (End != LL.end()) 1059 LL.erase(End, LL.end()); 1060 } else { 1061 // The definition of VR is constant-extended, and all candidates have 1062 // empty removable-register sets. Pick the maximum candidate, and remove 1063 // all others. The "maximum" does not have any special meaning here, it 1064 // is only so that the candidate that will remain on the list is selec- 1065 // ted deterministically. 1066 IFRecord MaxIF = LL[0].first; 1067 for (unsigned i = 1, n = LL.size(); i < n; ++i) { 1068 // If LL[MaxI] < LL[i], then MaxI = i. 1069 const IFRecord &IF = LL[i].first; 1070 unsigned M0 = BaseOrd[MaxIF.SrcR], M1 = BaseOrd[MaxIF.InsR]; 1071 unsigned R0 = BaseOrd[IF.SrcR], R1 = BaseOrd[IF.InsR]; 1072 if (M0 > R0) 1073 continue; 1074 if (M0 == R0) { 1075 if (M1 > R1) 1076 continue; 1077 if (M1 == R1) { 1078 if (MaxIF.Wdh > IF.Wdh) 1079 continue; 1080 if (MaxIF.Wdh == IF.Wdh && MaxIF.Off >= IF.Off) 1081 continue; 1082 } 1083 } 1084 // MaxIF < IF. 1085 MaxIF = IF; 1086 } 1087 // Remove everything except the maximum candidate. All register sets 1088 // are empty, so no need to preserve anything. 1089 LL.clear(); 1090 LL.push_back(std::make_pair(MaxIF, RegisterSet())); 1091 } 1092 1093 // Now, remove those whose sets of potentially removable registers are 1094 // contained in another IF candidate for VR. For example, given these 1095 // candidates for vreg45, 1096 // %vreg45: 1097 // (%vreg44,%vreg41,#9,#8), { %vreg42 } 1098 // (%vreg43,%vreg41,#9,#8), { %vreg42 %vreg44 } 1099 // remove the first one, since it is contained in the second one. 1100 for (unsigned i = 0, n = LL.size(); i < n; ) { 1101 const RegisterSet &RMi = LL[i].second; 1102 unsigned j = 0; 1103 while (j < n) { 1104 if (j != i && LL[j].second.includes(RMi)) 1105 break; 1106 j++; 1107 } 1108 if (j == n) { // RMi not contained in anything else. 1109 i++; 1110 continue; 1111 } 1112 LL.erase(LL.begin()+i); 1113 n = LL.size(); 1114 } 1115} 1116 1117 1118void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO, 1119 PairMapType &M) { 1120 IFMapType::iterator F = IFMap.find(VR); 1121 assert(F != IFMap.end()); 1122 IFListType &LL = F->second; 1123 unsigned Cutoff = VRegDistCutoff; 1124 const MachineInstr *DefV = MRI->getVRegDef(VR); 1125 1126 for (unsigned i = LL.size(); i > 0; --i) { 1127 unsigned SR = LL[i-1].first.SrcR, IR = LL[i-1].first.InsR; 1128 const MachineInstr *DefS = MRI->getVRegDef(SR); 1129 const MachineInstr *DefI = MRI->getVRegDef(IR); 1130 unsigned DSV = distance(DefS, DefV, RPO, M); 1131 if (DSV < Cutoff) { 1132 unsigned DIV = distance(DefI, DefV, RPO, M); 1133 if (DIV < Cutoff) 1134 continue; 1135 } 1136 LL.erase(LL.begin()+(i-1)); 1137 } 1138} 1139 1140 1141void HexagonGenInsert::pruneRegCopies(unsigned VR) { 1142 IFMapType::iterator F = IFMap.find(VR); 1143 assert(F != IFMap.end()); 1144 IFListType &LL = F->second; 1145 1146 auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool { 1147 return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32); 1148 }; 1149 auto End = std::remove_if(LL.begin(), LL.end(), IsCopy); 1150 if (End != LL.end()) 1151 LL.erase(End, LL.end()); 1152} 1153 1154 1155void HexagonGenInsert::pruneCandidates() { 1156 // Remove candidates that are not beneficial, regardless of the final 1157 // selection method. 1158 // First, remove candidates whose potentially removable set is a subset 1159 // of another candidate's set. 1160 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1161 pruneCoveredSets(I->first); 1162 1163 UnsignedMap RPO; 1164 typedef ReversePostOrderTraversal<const MachineFunction*> RPOTType; 1165 RPOTType RPOT(MFN); 1166 unsigned RPON = 0; 1167 for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) 1168 RPO[(*I)->getNumber()] = RPON++; 1169 1170 PairMapType Memo; // Memoization map for distance calculation. 1171 // Remove candidates that would use registers defined too far away. 1172 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1173 pruneUsesTooFar(I->first, RPO, Memo); 1174 1175 pruneEmptyLists(); 1176 1177 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) 1178 pruneRegCopies(I->first); 1179} 1180 1181 1182namespace { 1183 // Class for comparing IF candidates for registers that have multiple of 1184 // them. The smaller the candidate, according to this ordering, the better. 1185 // First, compare the number of zeros in the associated potentially remova- 1186 // ble register sets. "Zero" indicates that the register is very likely to 1187 // become dead after this transformation. 1188 // Second, compare "averages", i.e. use-count per size. The lower wins. 1189 // After that, it does not really matter which one is smaller. Resolve 1190 // the tie in some deterministic way. 1191 struct IFOrdering { 1192 IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO) 1193 : UseC(UC), BaseOrd(BO) {} 1194 bool operator() (const IFRecordWithRegSet &A, 1195 const IFRecordWithRegSet &B) const; 1196 private: 1197 void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1198 unsigned &Sum) const; 1199 const UnsignedMap &UseC; 1200 const RegisterOrdering &BaseOrd; 1201 }; 1202} 1203 1204 1205bool IFOrdering::operator() (const IFRecordWithRegSet &A, 1206 const IFRecordWithRegSet &B) const { 1207 unsigned SizeA = 0, ZeroA = 0, SumA = 0; 1208 unsigned SizeB = 0, ZeroB = 0, SumB = 0; 1209 stats(A.second, SizeA, ZeroA, SumA); 1210 stats(B.second, SizeB, ZeroB, SumB); 1211 1212 // We will pick the minimum element. The more zeros, the better. 1213 if (ZeroA != ZeroB) 1214 return ZeroA > ZeroB; 1215 // Compare SumA/SizeA with SumB/SizeB, lower is better. 1216 uint64_t AvgA = SumA*SizeB, AvgB = SumB*SizeA; 1217 if (AvgA != AvgB) 1218 return AvgA < AvgB; 1219 1220 // The sets compare identical so far. Resort to comparing the IF records. 1221 // The actual values don't matter, this is only for determinism. 1222 unsigned OSA = BaseOrd[A.first.SrcR], OSB = BaseOrd[B.first.SrcR]; 1223 if (OSA != OSB) 1224 return OSA < OSB; 1225 unsigned OIA = BaseOrd[A.first.InsR], OIB = BaseOrd[B.first.InsR]; 1226 if (OIA != OIB) 1227 return OIA < OIB; 1228 if (A.first.Wdh != B.first.Wdh) 1229 return A.first.Wdh < B.first.Wdh; 1230 return A.first.Off < B.first.Off; 1231} 1232 1233 1234void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero, 1235 unsigned &Sum) const { 1236 for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) { 1237 UnsignedMap::const_iterator F = UseC.find(R); 1238 assert(F != UseC.end()); 1239 unsigned UC = F->second; 1240 if (UC == 0) 1241 Zero++; 1242 Sum += UC; 1243 Size++; 1244 } 1245} 1246 1247 1248void HexagonGenInsert::selectCandidates() { 1249 // Some registers may have multiple valid candidates. Pick the best one 1250 // (or decide not to use any). 1251 1252 // Compute the "removability" measure of R: 1253 // For each potentially removable register R, record the number of regis- 1254 // ters with IF candidates, where R appears in at least one set. 1255 RegisterSet AllRMs; 1256 UnsignedMap UseC, RemC; 1257 IFMapType::iterator End = IFMap.end(); 1258 1259 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1260 const IFListType &LL = I->second; 1261 RegisterSet TT; 1262 for (unsigned i = 0, n = LL.size(); i < n; ++i) 1263 TT.insert(LL[i].second); 1264 for (unsigned R = TT.find_first(); R; R = TT.find_next(R)) 1265 RemC[R]++; 1266 AllRMs.insert(TT); 1267 } 1268 1269 for (unsigned R = AllRMs.find_first(); R; R = AllRMs.find_next(R)) { 1270 typedef MachineRegisterInfo::use_nodbg_iterator use_iterator; 1271 typedef SmallSet<const MachineInstr*,16> InstrSet; 1272 InstrSet UIs; 1273 // Count as the number of instructions in which R is used, not the 1274 // number of operands. 1275 use_iterator E = MRI->use_nodbg_end(); 1276 for (use_iterator I = MRI->use_nodbg_begin(R); I != E; ++I) 1277 UIs.insert(I->getParent()); 1278 unsigned C = UIs.size(); 1279 // Calculate a measure, which is the number of instructions using R, 1280 // minus the "removability" count computed earlier. 1281 unsigned D = RemC[R]; 1282 UseC[R] = (C > D) ? C-D : 0; // doz 1283 } 1284 1285 1286 bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0; 1287 if (!SelectAll0 && !SelectHas0) 1288 SelectAll0 = true; 1289 1290 // The smaller the number UseC for a given register R, the "less used" 1291 // R is aside from the opportunities for removal offered by generating 1292 // "insert" instructions. 1293 // Iterate over the IF map, and for those registers that have multiple 1294 // candidates, pick the minimum one according to IFOrdering. 1295 IFOrdering IFO(UseC, BaseOrd); 1296 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1297 IFListType &LL = I->second; 1298 if (LL.empty()) 1299 continue; 1300 // Get the minimum element, remember it and clear the list. If the 1301 // element found is adequate, we will put it back on the list, other- 1302 // wise the list will remain empty, and the entry for this register 1303 // will be removed (i.e. this register will not be replaced by insert). 1304 IFListType::iterator MinI = std::min_element(LL.begin(), LL.end(), IFO); 1305 assert(MinI != LL.end()); 1306 IFRecordWithRegSet M = *MinI; 1307 LL.clear(); 1308 1309 // We want to make sure that this replacement will have a chance to be 1310 // beneficial, and that means that we want to have indication that some 1311 // register will be removed. The most likely registers to be eliminated 1312 // are the use operands in the definition of I->first. Accept/reject a 1313 // candidate based on how many of its uses it can potentially eliminate. 1314 1315 RegisterSet Us; 1316 const MachineInstr *DefI = MRI->getVRegDef(I->first); 1317 getInstrUses(DefI, Us); 1318 bool Accept = false; 1319 1320 if (SelectAll0) { 1321 bool All0 = true; 1322 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1323 if (UseC[R] == 0) 1324 continue; 1325 All0 = false; 1326 break; 1327 } 1328 Accept = All0; 1329 } else if (SelectHas0) { 1330 bool Has0 = false; 1331 for (unsigned R = Us.find_first(); R; R = Us.find_next(R)) { 1332 if (UseC[R] != 0) 1333 continue; 1334 Has0 = true; 1335 break; 1336 } 1337 Accept = Has0; 1338 } 1339 if (Accept) 1340 LL.push_back(M); 1341 } 1342 1343 // Remove candidates that add uses of removable registers, unless the 1344 // removable registers are among replacement candidates. 1345 // Recompute the removable registers, since some candidates may have 1346 // been eliminated. 1347 AllRMs.clear(); 1348 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1349 const IFListType &LL = I->second; 1350 if (LL.size() > 0) 1351 AllRMs.insert(LL[0].second); 1352 } 1353 for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) { 1354 IFListType &LL = I->second; 1355 if (LL.size() == 0) 1356 continue; 1357 unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR; 1358 if (AllRMs[SR] || AllRMs[IR]) 1359 LL.clear(); 1360 } 1361 1362 pruneEmptyLists(); 1363} 1364 1365 1366bool HexagonGenInsert::generateInserts() { 1367 // Create a new register for each one from IFMap, and store them in the 1368 // map. 1369 UnsignedMap RegMap; 1370 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1371 unsigned VR = I->first; 1372 const TargetRegisterClass *RC = MRI->getRegClass(VR); 1373 unsigned NewVR = MRI->createVirtualRegister(RC); 1374 RegMap[VR] = NewVR; 1375 } 1376 1377 // We can generate the "insert" instructions using potentially stale re- 1378 // gisters: SrcR and InsR for a given VR may be among other registers that 1379 // are also replaced. This is fine, we will do the mass "rauw" a bit later. 1380 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1381 MachineInstr *MI = MRI->getVRegDef(I->first); 1382 MachineBasicBlock &B = *MI->getParent(); 1383 DebugLoc DL = MI->getDebugLoc(); 1384 unsigned NewR = RegMap[I->first]; 1385 bool R32 = MRI->getRegClass(NewR) == &Hexagon::IntRegsRegClass; 1386 const MCInstrDesc &D = R32 ? HII->get(Hexagon::S2_insert) 1387 : HII->get(Hexagon::S2_insertp); 1388 IFRecord IF = I->second[0].first; 1389 unsigned Wdh = IF.Wdh, Off = IF.Off; 1390 unsigned InsS = 0; 1391 if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) { 1392 InsS = Hexagon::subreg_loreg; 1393 if (Off >= 32) { 1394 InsS = Hexagon::subreg_hireg; 1395 Off -= 32; 1396 } 1397 } 1398 // Advance to the proper location for inserting instructions. This could 1399 // be B.end(). 1400 MachineBasicBlock::iterator At = MI; 1401 if (MI->isPHI()) 1402 At = B.getFirstNonPHI(); 1403 1404 BuildMI(B, At, DL, D, NewR) 1405 .addReg(IF.SrcR) 1406 .addReg(IF.InsR, 0, InsS) 1407 .addImm(Wdh) 1408 .addImm(Off); 1409 1410 MRI->clearKillFlags(IF.SrcR); 1411 MRI->clearKillFlags(IF.InsR); 1412 } 1413 1414 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1415 MachineInstr *DefI = MRI->getVRegDef(I->first); 1416 MRI->replaceRegWith(I->first, RegMap[I->first]); 1417 DefI->eraseFromParent(); 1418 } 1419 1420 return true; 1421} 1422 1423 1424bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) { 1425 bool Changed = false; 1426 typedef GraphTraits<MachineDomTreeNode*> GTN; 1427 for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) 1428 Changed |= removeDeadCode(*I); 1429 1430 MachineBasicBlock *B = N->getBlock(); 1431 std::vector<MachineInstr*> Instrs; 1432 for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) 1433 Instrs.push_back(&*I); 1434 1435 for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) { 1436 MachineInstr *MI = *I; 1437 unsigned Opc = MI->getOpcode(); 1438 // Do not touch lifetime markers. This is why the target-independent DCE 1439 // cannot be used. 1440 if (Opc == TargetOpcode::LIFETIME_START || 1441 Opc == TargetOpcode::LIFETIME_END) 1442 continue; 1443 bool Store = false; 1444 if (MI->isInlineAsm() || !MI->isSafeToMove(nullptr, Store)) 1445 continue; 1446 1447 bool AllDead = true; 1448 SmallVector<unsigned,2> Regs; 1449 for (ConstMIOperands Op(MI); Op.isValid(); ++Op) { 1450 if (!Op->isReg() || !Op->isDef()) 1451 continue; 1452 unsigned R = Op->getReg(); 1453 if (!TargetRegisterInfo::isVirtualRegister(R) || 1454 !MRI->use_nodbg_empty(R)) { 1455 AllDead = false; 1456 break; 1457 } 1458 Regs.push_back(R); 1459 } 1460 if (!AllDead) 1461 continue; 1462 1463 B->erase(MI); 1464 for (unsigned I = 0, N = Regs.size(); I != N; ++I) 1465 MRI->markUsesInDebugValueAsUndef(Regs[I]); 1466 Changed = true; 1467 } 1468 1469 return Changed; 1470} 1471 1472 1473bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) { 1474 bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail; 1475 bool Changed = false; 1476 TimerGroup __G("hexinsert"); 1477 NamedRegionTimer __T("hexinsert", Timing && !TimingDetail); 1478 1479 // Sanity check: one, but not both. 1480 assert(!OptSelectAll0 || !OptSelectHas0); 1481 1482 IFMap.clear(); 1483 BaseOrd.clear(); 1484 CellOrd.clear(); 1485 1486 const auto &ST = MF.getSubtarget<HexagonSubtarget>(); 1487 HII = ST.getInstrInfo(); 1488 HRI = ST.getRegisterInfo(); 1489 MFN = &MF; 1490 MRI = &MF.getRegInfo(); 1491 MDT = &getAnalysis<MachineDominatorTree>(); 1492 1493 // Clean up before any further processing, so that dead code does not 1494 // get used in a newly generated "insert" instruction. Have a custom 1495 // version of DCE that preserves lifetime markers. Without it, merging 1496 // of stack objects can fail to recognize and merge disjoint objects 1497 // leading to unnecessary stack growth. 1498 Changed = removeDeadCode(MDT->getRootNode()); 1499 1500 const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); 1501 BitTracker BTLoc(HE, MF); 1502 BTLoc.trace(isDebug()); 1503 BTLoc.run(); 1504 CellMapShadow MS(BTLoc); 1505 CMS = &MS; 1506 1507 buildOrderingMF(BaseOrd); 1508 buildOrderingBT(BaseOrd, CellOrd); 1509 1510 if (isDebug()) { 1511 dbgs() << "Cell ordering:\n"; 1512 for (RegisterOrdering::iterator I = CellOrd.begin(), E = CellOrd.end(); 1513 I != E; ++I) { 1514 unsigned VR = I->first, Pos = I->second; 1515 dbgs() << PrintReg(VR, HRI) << " -> " << Pos << "\n"; 1516 } 1517 } 1518 1519 // Collect candidates for conversion into the insert forms. 1520 MachineBasicBlock *RootB = MDT->getRoot(); 1521 OrderedRegisterList AvailR(CellOrd); 1522 1523 { 1524 NamedRegionTimer _T("collection", "hexinsert", TimingDetail); 1525 collectInBlock(RootB, AvailR); 1526 // Complete the information gathered in IFMap. 1527 computeRemovableRegisters(); 1528 } 1529 1530 if (isDebug()) { 1531 dbgs() << "Candidates after collection:\n"; 1532 dump_map(); 1533 } 1534 1535 if (IFMap.empty()) 1536 return Changed; 1537 1538 { 1539 NamedRegionTimer _T("pruning", "hexinsert", TimingDetail); 1540 pruneCandidates(); 1541 } 1542 1543 if (isDebug()) { 1544 dbgs() << "Candidates after pruning:\n"; 1545 dump_map(); 1546 } 1547 1548 if (IFMap.empty()) 1549 return Changed; 1550 1551 { 1552 NamedRegionTimer _T("selection", "hexinsert", TimingDetail); 1553 selectCandidates(); 1554 } 1555 1556 if (isDebug()) { 1557 dbgs() << "Candidates after selection:\n"; 1558 dump_map(); 1559 } 1560 1561 // Filter out vregs beyond the cutoff. 1562 if (VRegIndexCutoff.getPosition()) { 1563 unsigned Cutoff = VRegIndexCutoff; 1564 typedef SmallVector<IFMapType::iterator,16> IterListType; 1565 IterListType Out; 1566 for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) { 1567 unsigned Idx = TargetRegisterInfo::virtReg2Index(I->first); 1568 if (Idx >= Cutoff) 1569 Out.push_back(I); 1570 } 1571 for (unsigned i = 0, n = Out.size(); i < n; ++i) 1572 IFMap.erase(Out[i]); 1573 } 1574 if (IFMap.empty()) 1575 return Changed; 1576 1577 { 1578 NamedRegionTimer _T("generation", "hexinsert", TimingDetail); 1579 generateInserts(); 1580 } 1581 1582 return true; 1583} 1584 1585 1586FunctionPass *llvm::createHexagonGenInsert() { 1587 return new HexagonGenInsert(); 1588} 1589 1590 1591//===----------------------------------------------------------------------===// 1592// Public Constructor Functions 1593//===----------------------------------------------------------------------===// 1594 1595INITIALIZE_PASS_BEGIN(HexagonGenInsert, "hexinsert", 1596 "Hexagon generate \"insert\" instructions", false, false) 1597INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1598INITIALIZE_PASS_END(HexagonGenInsert, "hexinsert", 1599 "Hexagon generate \"insert\" instructions", false, false) 1600