1//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the AArch64PromoteConstant pass which promotes constants 11// to global variables when this is likely to be more efficient. Currently only 12// types related to constant vector (i.e., constant vector, array of constant 13// vectors, constant structure with a constant vector field, etc.) are promoted 14// to global variables. Constant vectors are likely to be lowered in target 15// constant pool during instruction selection already; therefore, the access 16// will remain the same (memory load), but the structure types are not split 17// into different constant pool accesses for each field. A bonus side effect is 18// that created globals may be merged by the global merge pass. 19// 20// FIXME: This pass may be useful for other targets too. 21//===----------------------------------------------------------------------===// 22 23#include "AArch64.h" 24#include "llvm/ADT/DenseMap.h" 25#include "llvm/ADT/SmallPtrSet.h" 26#include "llvm/ADT/SmallVector.h" 27#include "llvm/ADT/Statistic.h" 28#include "llvm/IR/Constants.h" 29#include "llvm/IR/Dominators.h" 30#include "llvm/IR/Function.h" 31#include "llvm/IR/GlobalVariable.h" 32#include "llvm/IR/IRBuilder.h" 33#include "llvm/IR/InlineAsm.h" 34#include "llvm/IR/InstIterator.h" 35#include "llvm/IR/Instructions.h" 36#include "llvm/IR/IntrinsicInst.h" 37#include "llvm/IR/Module.h" 38#include "llvm/Pass.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/raw_ostream.h" 42 43using namespace llvm; 44 45#define DEBUG_TYPE "aarch64-promote-const" 46 47// Stress testing mode - disable heuristics. 48static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden, 49 cl::desc("Promote all vector constants")); 50 51STATISTIC(NumPromoted, "Number of promoted constants"); 52STATISTIC(NumPromotedUses, "Number of promoted constants uses"); 53 54//===----------------------------------------------------------------------===// 55// AArch64PromoteConstant 56//===----------------------------------------------------------------------===// 57 58namespace { 59/// Promotes interesting constant into global variables. 60/// The motivating example is: 61/// static const uint16_t TableA[32] = { 62/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, 63/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, 64/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, 65/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, 66/// }; 67/// 68/// uint8x16x4_t LoadStatic(void) { 69/// uint8x16x4_t ret; 70/// ret.val[0] = vld1q_u16(TableA + 0); 71/// ret.val[1] = vld1q_u16(TableA + 8); 72/// ret.val[2] = vld1q_u16(TableA + 16); 73/// ret.val[3] = vld1q_u16(TableA + 24); 74/// return ret; 75/// } 76/// 77/// The constants in this example are folded into the uses. Thus, 4 different 78/// constants are created. 79/// 80/// As their type is vector the cheapest way to create them is to load them 81/// for the memory. 82/// 83/// Therefore the final assembly final has 4 different loads. With this pass 84/// enabled, only one load is issued for the constants. 85class AArch64PromoteConstant : public ModulePass { 86 87public: 88 struct PromotedConstant { 89 bool ShouldConvert = false; 90 GlobalVariable *GV = nullptr; 91 }; 92 typedef SmallDenseMap<Constant *, PromotedConstant, 16> PromotionCacheTy; 93 94 struct UpdateRecord { 95 Constant *C; 96 Instruction *User; 97 unsigned Op; 98 99 UpdateRecord(Constant *C, Instruction *User, unsigned Op) 100 : C(C), User(User), Op(Op) {} 101 }; 102 103 static char ID; 104 AArch64PromoteConstant() : ModulePass(ID) {} 105 106 const char *getPassName() const override { return "AArch64 Promote Constant"; } 107 108 /// Iterate over the functions and promote the interesting constants into 109 /// global variables with module scope. 110 bool runOnModule(Module &M) override { 111 DEBUG(dbgs() << getPassName() << '\n'); 112 if (skipModule(M)) 113 return false; 114 bool Changed = false; 115 PromotionCacheTy PromotionCache; 116 for (auto &MF : M) { 117 Changed |= runOnFunction(MF, PromotionCache); 118 } 119 return Changed; 120 } 121 122private: 123 /// Look for interesting constants used within the given function. 124 /// Promote them into global variables, load these global variables within 125 /// the related function, so that the number of inserted load is minimal. 126 bool runOnFunction(Function &F, PromotionCacheTy &PromotionCache); 127 128 // This transformation requires dominator info 129 void getAnalysisUsage(AnalysisUsage &AU) const override { 130 AU.setPreservesCFG(); 131 AU.addRequired<DominatorTreeWrapperPass>(); 132 AU.addPreserved<DominatorTreeWrapperPass>(); 133 } 134 135 /// Type to store a list of Uses. 136 typedef SmallVector<std::pair<Instruction *, unsigned>, 4> Uses; 137 /// Map an insertion point to all the uses it dominates. 138 typedef DenseMap<Instruction *, Uses> InsertionPoints; 139 140 /// Find the closest point that dominates the given Use. 141 Instruction *findInsertionPoint(Instruction &User, unsigned OpNo); 142 143 /// Check if the given insertion point is dominated by an existing 144 /// insertion point. 145 /// If true, the given use is added to the list of dominated uses for 146 /// the related existing point. 147 /// \param NewPt the insertion point to be checked 148 /// \param User the user of the constant 149 /// \param OpNo the operand number of the use 150 /// \param InsertPts existing insertion points 151 /// \pre NewPt and all instruction in InsertPts belong to the same function 152 /// \return true if one of the insertion point in InsertPts dominates NewPt, 153 /// false otherwise 154 bool isDominated(Instruction *NewPt, Instruction *User, unsigned OpNo, 155 InsertionPoints &InsertPts); 156 157 /// Check if the given insertion point can be merged with an existing 158 /// insertion point in a common dominator. 159 /// If true, the given use is added to the list of the created insertion 160 /// point. 161 /// \param NewPt the insertion point to be checked 162 /// \param User the user of the constant 163 /// \param OpNo the operand number of the use 164 /// \param InsertPts existing insertion points 165 /// \pre NewPt and all instruction in InsertPts belong to the same function 166 /// \pre isDominated returns false for the exact same parameters. 167 /// \return true if it exists an insertion point in InsertPts that could 168 /// have been merged with NewPt in a common dominator, 169 /// false otherwise 170 bool tryAndMerge(Instruction *NewPt, Instruction *User, unsigned OpNo, 171 InsertionPoints &InsertPts); 172 173 /// Compute the minimal insertion points to dominates all the interesting 174 /// uses of value. 175 /// Insertion points are group per function and each insertion point 176 /// contains a list of all the uses it dominates within the related function 177 /// \param User the user of the constant 178 /// \param OpNo the operand number of the constant 179 /// \param[out] InsertPts output storage of the analysis 180 void computeInsertionPoint(Instruction *User, unsigned OpNo, 181 InsertionPoints &InsertPts); 182 183 /// Insert a definition of a new global variable at each point contained in 184 /// InsPtsPerFunc and update the related uses (also contained in 185 /// InsPtsPerFunc). 186 void insertDefinitions(Function &F, GlobalVariable &GV, 187 InsertionPoints &InsertPts); 188 189 /// Do the constant promotion indicated by the Updates records, keeping track 190 /// of globals in PromotionCache. 191 void promoteConstants(Function &F, SmallVectorImpl<UpdateRecord> &Updates, 192 PromotionCacheTy &PromotionCache); 193 194 /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. 195 /// Append Use to this list and delete the entry of IPI in InsertPts. 196 static void appendAndTransferDominatedUses(Instruction *NewPt, 197 Instruction *User, unsigned OpNo, 198 InsertionPoints::iterator &IPI, 199 InsertionPoints &InsertPts) { 200 // Record the dominated use. 201 IPI->second.emplace_back(User, OpNo); 202 // Transfer the dominated uses of IPI to NewPt 203 // Inserting into the DenseMap may invalidate existing iterator. 204 // Keep a copy of the key to find the iterator to erase. Keep a copy of the 205 // value so that we don't have to dereference IPI->second. 206 Instruction *OldInstr = IPI->first; 207 Uses OldUses = std::move(IPI->second); 208 InsertPts[NewPt] = std::move(OldUses); 209 // Erase IPI. 210 InsertPts.erase(OldInstr); 211 } 212}; 213} // end anonymous namespace 214 215char AArch64PromoteConstant::ID = 0; 216 217namespace llvm { 218void initializeAArch64PromoteConstantPass(PassRegistry &); 219} 220 221INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", 222 "AArch64 Promote Constant Pass", false, false) 223INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 224INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", 225 "AArch64 Promote Constant Pass", false, false) 226 227ModulePass *llvm::createAArch64PromoteConstantPass() { 228 return new AArch64PromoteConstant(); 229} 230 231/// Check if the given type uses a vector type. 232static bool isConstantUsingVectorTy(const Type *CstTy) { 233 if (CstTy->isVectorTy()) 234 return true; 235 if (CstTy->isStructTy()) { 236 for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); 237 EltIdx < EndEltIdx; ++EltIdx) 238 if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) 239 return true; 240 } else if (CstTy->isArrayTy()) 241 return isConstantUsingVectorTy(CstTy->getArrayElementType()); 242 return false; 243} 244 245/// Check if the given use (Instruction + OpIdx) of Cst should be converted into 246/// a load of a global variable initialized with Cst. 247/// A use should be converted if it is legal to do so. 248/// For instance, it is not legal to turn the mask operand of a shuffle vector 249/// into a load of a global variable. 250static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, 251 unsigned OpIdx) { 252 // shufflevector instruction expects a const for the mask argument, i.e., the 253 // third argument. Do not promote this use in that case. 254 if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) 255 return false; 256 257 // extractvalue instruction expects a const idx. 258 if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) 259 return false; 260 261 // extractvalue instruction expects a const idx. 262 if (isa<const InsertValueInst>(Instr) && OpIdx > 1) 263 return false; 264 265 if (isa<const AllocaInst>(Instr) && OpIdx > 0) 266 return false; 267 268 // Alignment argument must be constant. 269 if (isa<const LoadInst>(Instr) && OpIdx > 0) 270 return false; 271 272 // Alignment argument must be constant. 273 if (isa<const StoreInst>(Instr) && OpIdx > 1) 274 return false; 275 276 // Index must be constant. 277 if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) 278 return false; 279 280 // Personality function and filters must be constant. 281 // Give up on that instruction. 282 if (isa<const LandingPadInst>(Instr)) 283 return false; 284 285 // Switch instruction expects constants to compare to. 286 if (isa<const SwitchInst>(Instr)) 287 return false; 288 289 // Expected address must be a constant. 290 if (isa<const IndirectBrInst>(Instr)) 291 return false; 292 293 // Do not mess with intrinsics. 294 if (isa<const IntrinsicInst>(Instr)) 295 return false; 296 297 // Do not mess with inline asm. 298 const CallInst *CI = dyn_cast<const CallInst>(Instr); 299 return !(CI && isa<const InlineAsm>(CI->getCalledValue())); 300} 301 302/// Check if the given Cst should be converted into 303/// a load of a global variable initialized with Cst. 304/// A constant should be converted if it is likely that the materialization of 305/// the constant will be tricky. Thus, we give up on zero or undef values. 306/// 307/// \todo Currently, accept only vector related types. 308/// Also we give up on all simple vector type to keep the existing 309/// behavior. Otherwise, we should push here all the check of the lowering of 310/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging 311/// constant via global merge and the fact that the same constant is stored 312/// only once with this method (versus, as many function that uses the constant 313/// for the regular approach, even for float). 314/// Again, the simplest solution would be to promote every 315/// constant and rematerialize them when they are actually cheap to create. 316static bool shouldConvertImpl(const Constant *Cst) { 317 if (isa<const UndefValue>(Cst)) 318 return false; 319 320 // FIXME: In some cases, it may be interesting to promote in memory 321 // a zero initialized constant. 322 // E.g., when the type of Cst require more instructions than the 323 // adrp/add/load sequence or when this sequence can be shared by several 324 // instances of Cst. 325 // Ideally, we could promote this into a global and rematerialize the constant 326 // when it was a bad idea. 327 if (Cst->isZeroValue()) 328 return false; 329 330 if (Stress) 331 return true; 332 333 // FIXME: see function \todo 334 if (Cst->getType()->isVectorTy()) 335 return false; 336 return isConstantUsingVectorTy(Cst->getType()); 337} 338 339static bool 340shouldConvert(Constant &C, 341 AArch64PromoteConstant::PromotionCacheTy &PromotionCache) { 342 auto Converted = PromotionCache.insert( 343 std::make_pair(&C, AArch64PromoteConstant::PromotedConstant())); 344 if (Converted.second) 345 Converted.first->second.ShouldConvert = shouldConvertImpl(&C); 346 return Converted.first->second.ShouldConvert; 347} 348 349Instruction *AArch64PromoteConstant::findInsertionPoint(Instruction &User, 350 unsigned OpNo) { 351 // If this user is a phi, the insertion point is in the related 352 // incoming basic block. 353 if (PHINode *PhiInst = dyn_cast<PHINode>(&User)) 354 return PhiInst->getIncomingBlock(OpNo)->getTerminator(); 355 356 return &User; 357} 358 359bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Instruction *User, 360 unsigned OpNo, 361 InsertionPoints &InsertPts) { 362 363 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 364 *NewPt->getParent()->getParent()).getDomTree(); 365 366 // Traverse all the existing insertion points and check if one is dominating 367 // NewPt. If it is, remember that. 368 for (auto &IPI : InsertPts) { 369 if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || 370 // When IPI.first is a terminator instruction, DT may think that 371 // the result is defined on the edge. 372 // Here we are testing the insertion point, not the definition. 373 (IPI.first->getParent() != NewPt->getParent() && 374 DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { 375 // No need to insert this point. Just record the dominated use. 376 DEBUG(dbgs() << "Insertion point dominated by:\n"); 377 DEBUG(IPI.first->print(dbgs())); 378 DEBUG(dbgs() << '\n'); 379 IPI.second.emplace_back(User, OpNo); 380 return true; 381 } 382 } 383 return false; 384} 385 386bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Instruction *User, 387 unsigned OpNo, 388 InsertionPoints &InsertPts) { 389 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 390 *NewPt->getParent()->getParent()).getDomTree(); 391 BasicBlock *NewBB = NewPt->getParent(); 392 393 // Traverse all the existing insertion point and check if one is dominated by 394 // NewPt and thus useless or can be combined with NewPt into a common 395 // dominator. 396 for (InsertionPoints::iterator IPI = InsertPts.begin(), 397 EndIPI = InsertPts.end(); 398 IPI != EndIPI; ++IPI) { 399 BasicBlock *CurBB = IPI->first->getParent(); 400 if (NewBB == CurBB) { 401 // Instructions are in the same block. 402 // By construction, NewPt is dominating the other. 403 // Indeed, isDominated returned false with the exact same arguments. 404 DEBUG(dbgs() << "Merge insertion point with:\n"); 405 DEBUG(IPI->first->print(dbgs())); 406 DEBUG(dbgs() << "\nat considered insertion point.\n"); 407 appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 408 return true; 409 } 410 411 // Look for a common dominator 412 BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); 413 // If none exists, we cannot merge these two points. 414 if (!CommonDominator) 415 continue; 416 417 if (CommonDominator != NewBB) { 418 // By construction, the CommonDominator cannot be CurBB. 419 assert(CommonDominator != CurBB && 420 "Instruction has not been rejected during isDominated check!"); 421 // Take the last instruction of the CommonDominator as insertion point 422 NewPt = CommonDominator->getTerminator(); 423 } 424 // else, CommonDominator is the block of NewBB, hence NewBB is the last 425 // possible insertion point in that block. 426 DEBUG(dbgs() << "Merge insertion point with:\n"); 427 DEBUG(IPI->first->print(dbgs())); 428 DEBUG(dbgs() << '\n'); 429 DEBUG(NewPt->print(dbgs())); 430 DEBUG(dbgs() << '\n'); 431 appendAndTransferDominatedUses(NewPt, User, OpNo, IPI, InsertPts); 432 return true; 433 } 434 return false; 435} 436 437void AArch64PromoteConstant::computeInsertionPoint( 438 Instruction *User, unsigned OpNo, InsertionPoints &InsertPts) { 439 DEBUG(dbgs() << "Considered use, opidx " << OpNo << ":\n"); 440 DEBUG(User->print(dbgs())); 441 DEBUG(dbgs() << '\n'); 442 443 Instruction *InsertionPoint = findInsertionPoint(*User, OpNo); 444 445 DEBUG(dbgs() << "Considered insertion point:\n"); 446 DEBUG(InsertionPoint->print(dbgs())); 447 DEBUG(dbgs() << '\n'); 448 449 if (isDominated(InsertionPoint, User, OpNo, InsertPts)) 450 return; 451 // This insertion point is useful, check if we can merge some insertion 452 // point in a common dominator or if NewPt dominates an existing one. 453 if (tryAndMerge(InsertionPoint, User, OpNo, InsertPts)) 454 return; 455 456 DEBUG(dbgs() << "Keep considered insertion point\n"); 457 458 // It is definitely useful by its own 459 InsertPts[InsertionPoint].emplace_back(User, OpNo); 460} 461 462static void ensurePromotedGV(Function &F, Constant &C, 463 AArch64PromoteConstant::PromotedConstant &PC) { 464 assert(PC.ShouldConvert && 465 "Expected that we should convert this to a global"); 466 if (PC.GV) 467 return; 468 PC.GV = new GlobalVariable( 469 *F.getParent(), C.getType(), true, GlobalValue::InternalLinkage, nullptr, 470 "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); 471 PC.GV->setInitializer(&C); 472 DEBUG(dbgs() << "Global replacement: "); 473 DEBUG(PC.GV->print(dbgs())); 474 DEBUG(dbgs() << '\n'); 475 ++NumPromoted; 476} 477 478void AArch64PromoteConstant::insertDefinitions(Function &F, 479 GlobalVariable &PromotedGV, 480 InsertionPoints &InsertPts) { 481#ifndef NDEBUG 482 // Do more checking for debug purposes. 483 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 484#endif 485 assert(!InsertPts.empty() && "Empty uses does not need a definition"); 486 487 for (const auto &IPI : InsertPts) { 488 // Create the load of the global variable. 489 IRBuilder<> Builder(IPI.first); 490 LoadInst *LoadedCst = Builder.CreateLoad(&PromotedGV); 491 DEBUG(dbgs() << "**********\n"); 492 DEBUG(dbgs() << "New def: "); 493 DEBUG(LoadedCst->print(dbgs())); 494 DEBUG(dbgs() << '\n'); 495 496 // Update the dominated uses. 497 for (auto Use : IPI.second) { 498#ifndef NDEBUG 499 assert(DT.dominates(LoadedCst, 500 findInsertionPoint(*Use.first, Use.second)) && 501 "Inserted definition does not dominate all its uses!"); 502#endif 503 DEBUG({ 504 dbgs() << "Use to update " << Use.second << ":"; 505 Use.first->print(dbgs()); 506 dbgs() << '\n'; 507 }); 508 Use.first->setOperand(Use.second, LoadedCst); 509 ++NumPromotedUses; 510 } 511 } 512} 513 514void AArch64PromoteConstant::promoteConstants( 515 Function &F, SmallVectorImpl<UpdateRecord> &Updates, 516 PromotionCacheTy &PromotionCache) { 517 // Promote the constants. 518 for (auto U = Updates.begin(), E = Updates.end(); U != E;) { 519 DEBUG(dbgs() << "** Compute insertion points **\n"); 520 auto First = U; 521 Constant *C = First->C; 522 InsertionPoints InsertPts; 523 do { 524 computeInsertionPoint(U->User, U->Op, InsertPts); 525 } while (++U != E && U->C == C); 526 527 auto &Promotion = PromotionCache[C]; 528 ensurePromotedGV(F, *C, Promotion); 529 insertDefinitions(F, *Promotion.GV, InsertPts); 530 } 531} 532 533bool AArch64PromoteConstant::runOnFunction(Function &F, 534 PromotionCacheTy &PromotionCache) { 535 // Look for instructions using constant vector. Promote that constant to a 536 // global variable. Create as few loads of this variable as possible and 537 // update the uses accordingly. 538 SmallVector<UpdateRecord, 64> Updates; 539 for (Instruction &I : instructions(&F)) { 540 // Traverse the operand, looking for constant vectors. Replace them by a 541 // load of a global variable of constant vector type. 542 for (Use &U : I.operands()) { 543 Constant *Cst = dyn_cast<Constant>(U); 544 // There is no point in promoting global values as they are already 545 // global. Do not promote constant expressions either, as they may 546 // require some code expansion. 547 if (!Cst || isa<GlobalValue>(Cst) || isa<ConstantExpr>(Cst)) 548 continue; 549 550 // Check if this constant is worth promoting. 551 if (!shouldConvert(*Cst, PromotionCache)) 552 continue; 553 554 // Check if this use should be promoted. 555 unsigned OpNo = &U - I.op_begin(); 556 if (!shouldConvertUse(Cst, &I, OpNo)) 557 continue; 558 559 Updates.emplace_back(Cst, &I, OpNo); 560 } 561 } 562 563 if (Updates.empty()) 564 return false; 565 566 promoteConstants(F, Updates, PromotionCache); 567 return true; 568} 569