1//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the AArch64PromoteConstant pass which promotes constants 11// to global variables when this is likely to be more efficient. Currently only 12// types related to constant vector (i.e., constant vector, array of constant 13// vectors, constant structure with a constant vector field, etc.) are promoted 14// to global variables. Constant vectors are likely to be lowered in target 15// constant pool during instruction selection already; therefore, the access 16// will remain the same (memory load), but the structure types are not split 17// into different constant pool accesses for each field. A bonus side effect is 18// that created globals may be merged by the global merge pass. 19// 20// FIXME: This pass may be useful for other targets too. 21//===----------------------------------------------------------------------===// 22 23#include "AArch64.h" 24#include "llvm/ADT/DenseMap.h" 25#include "llvm/ADT/SmallPtrSet.h" 26#include "llvm/ADT/SmallVector.h" 27#include "llvm/ADT/Statistic.h" 28#include "llvm/IR/Constants.h" 29#include "llvm/IR/Dominators.h" 30#include "llvm/IR/Function.h" 31#include "llvm/IR/GlobalVariable.h" 32#include "llvm/IR/IRBuilder.h" 33#include "llvm/IR/InlineAsm.h" 34#include "llvm/IR/InstIterator.h" 35#include "llvm/IR/Instructions.h" 36#include "llvm/IR/IntrinsicInst.h" 37#include "llvm/IR/Module.h" 38#include "llvm/Pass.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/raw_ostream.h" 42 43using namespace llvm; 44 45#define DEBUG_TYPE "aarch64-promote-const" 46 47// Stress testing mode - disable heuristics. 48static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden, 49 cl::desc("Promote all vector constants")); 50 51STATISTIC(NumPromoted, "Number of promoted constants"); 52STATISTIC(NumPromotedUses, "Number of promoted constants uses"); 53 54//===----------------------------------------------------------------------===// 55// AArch64PromoteConstant 56//===----------------------------------------------------------------------===// 57 58namespace { 59/// Promotes interesting constant into global variables. 60/// The motivating example is: 61/// static const uint16_t TableA[32] = { 62/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, 63/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, 64/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, 65/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, 66/// }; 67/// 68/// uint8x16x4_t LoadStatic(void) { 69/// uint8x16x4_t ret; 70/// ret.val[0] = vld1q_u16(TableA + 0); 71/// ret.val[1] = vld1q_u16(TableA + 8); 72/// ret.val[2] = vld1q_u16(TableA + 16); 73/// ret.val[3] = vld1q_u16(TableA + 24); 74/// return ret; 75/// } 76/// 77/// The constants in this example are folded into the uses. Thus, 4 different 78/// constants are created. 79/// 80/// As their type is vector the cheapest way to create them is to load them 81/// for the memory. 82/// 83/// Therefore the final assembly final has 4 different loads. With this pass 84/// enabled, only one load is issued for the constants. 85class AArch64PromoteConstant : public ModulePass { 86 87public: 88 static char ID; 89 AArch64PromoteConstant() : ModulePass(ID) {} 90 91 const char *getPassName() const override { return "AArch64 Promote Constant"; } 92 93 /// Iterate over the functions and promote the interesting constants into 94 /// global variables with module scope. 95 bool runOnModule(Module &M) override { 96 DEBUG(dbgs() << getPassName() << '\n'); 97 bool Changed = false; 98 for (auto &MF : M) { 99 Changed |= runOnFunction(MF); 100 } 101 return Changed; 102 } 103 104private: 105 /// Look for interesting constants used within the given function. 106 /// Promote them into global variables, load these global variables within 107 /// the related function, so that the number of inserted load is minimal. 108 bool runOnFunction(Function &F); 109 110 // This transformation requires dominator info 111 void getAnalysisUsage(AnalysisUsage &AU) const override { 112 AU.setPreservesCFG(); 113 AU.addRequired<DominatorTreeWrapperPass>(); 114 AU.addPreserved<DominatorTreeWrapperPass>(); 115 } 116 117 /// Type to store a list of Uses. 118 typedef SmallVector<Use *, 4> Uses; 119 /// Map an insertion point to all the uses it dominates. 120 typedef DenseMap<Instruction *, Uses> InsertionPoints; 121 /// Map a function to the required insertion point of load for a 122 /// global variable. 123 typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc; 124 125 /// Find the closest point that dominates the given Use. 126 Instruction *findInsertionPoint(Use &Use); 127 128 /// Check if the given insertion point is dominated by an existing 129 /// insertion point. 130 /// If true, the given use is added to the list of dominated uses for 131 /// the related existing point. 132 /// \param NewPt the insertion point to be checked 133 /// \param Use the use to be added into the list of dominated uses 134 /// \param InsertPts existing insertion points 135 /// \pre NewPt and all instruction in InsertPts belong to the same function 136 /// \return true if one of the insertion point in InsertPts dominates NewPt, 137 /// false otherwise 138 bool isDominated(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts); 139 140 /// Check if the given insertion point can be merged with an existing 141 /// insertion point in a common dominator. 142 /// If true, the given use is added to the list of the created insertion 143 /// point. 144 /// \param NewPt the insertion point to be checked 145 /// \param Use the use to be added into the list of dominated uses 146 /// \param InsertPts existing insertion points 147 /// \pre NewPt and all instruction in InsertPts belong to the same function 148 /// \pre isDominated returns false for the exact same parameters. 149 /// \return true if it exists an insertion point in InsertPts that could 150 /// have been merged with NewPt in a common dominator, 151 /// false otherwise 152 bool tryAndMerge(Instruction *NewPt, Use &Use, InsertionPoints &InsertPts); 153 154 /// Compute the minimal insertion points to dominates all the interesting 155 /// uses of value. 156 /// Insertion points are group per function and each insertion point 157 /// contains a list of all the uses it dominates within the related function 158 /// \param Val constant to be examined 159 /// \param[out] InsPtsPerFunc output storage of the analysis 160 void computeInsertionPoints(Constant *Val, 161 InsertionPointsPerFunc &InsPtsPerFunc); 162 163 /// Insert a definition of a new global variable at each point contained in 164 /// InsPtsPerFunc and update the related uses (also contained in 165 /// InsPtsPerFunc). 166 bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc); 167 168 /// Compute the minimal insertion points to dominate all the interesting 169 /// uses of Val and insert a definition of a new global variable 170 /// at these points. 171 /// Also update the uses of Val accordingly. 172 /// Currently a use of Val is considered interesting if: 173 /// - Val is not UndefValue 174 /// - Val is not zeroinitialized 175 /// - Replacing Val per a load of a global variable is valid. 176 /// \see shouldConvert for more details 177 bool computeAndInsertDefinitions(Constant *Val); 178 179 /// Promote the given constant into a global variable if it is expected to 180 /// be profitable. 181 /// \return true if Cst has been promoted 182 bool promoteConstant(Constant *Cst); 183 184 /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. 185 /// Append Use to this list and delete the entry of IPI in InsertPts. 186 static void appendAndTransferDominatedUses(Instruction *NewPt, Use &Use, 187 InsertionPoints::iterator &IPI, 188 InsertionPoints &InsertPts) { 189 // Record the dominated use. 190 IPI->second.push_back(&Use); 191 // Transfer the dominated uses of IPI to NewPt 192 // Inserting into the DenseMap may invalidate existing iterator. 193 // Keep a copy of the key to find the iterator to erase. Keep a copy of the 194 // value so that we don't have to dereference IPI->second. 195 Instruction *OldInstr = IPI->first; 196 Uses OldUses = std::move(IPI->second); 197 InsertPts[NewPt] = std::move(OldUses); 198 // Erase IPI. 199 InsertPts.erase(OldInstr); 200 } 201}; 202} // end anonymous namespace 203 204char AArch64PromoteConstant::ID = 0; 205 206namespace llvm { 207void initializeAArch64PromoteConstantPass(PassRegistry &); 208} 209 210INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", 211 "AArch64 Promote Constant Pass", false, false) 212INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 213INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", 214 "AArch64 Promote Constant Pass", false, false) 215 216ModulePass *llvm::createAArch64PromoteConstantPass() { 217 return new AArch64PromoteConstant(); 218} 219 220/// Check if the given type uses a vector type. 221static bool isConstantUsingVectorTy(const Type *CstTy) { 222 if (CstTy->isVectorTy()) 223 return true; 224 if (CstTy->isStructTy()) { 225 for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); 226 EltIdx < EndEltIdx; ++EltIdx) 227 if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) 228 return true; 229 } else if (CstTy->isArrayTy()) 230 return isConstantUsingVectorTy(CstTy->getArrayElementType()); 231 return false; 232} 233 234/// Check if the given use (Instruction + OpIdx) of Cst should be converted into 235/// a load of a global variable initialized with Cst. 236/// A use should be converted if it is legal to do so. 237/// For instance, it is not legal to turn the mask operand of a shuffle vector 238/// into a load of a global variable. 239static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, 240 unsigned OpIdx) { 241 // shufflevector instruction expects a const for the mask argument, i.e., the 242 // third argument. Do not promote this use in that case. 243 if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2) 244 return false; 245 246 // extractvalue instruction expects a const idx. 247 if (isa<const ExtractValueInst>(Instr) && OpIdx > 0) 248 return false; 249 250 // extractvalue instruction expects a const idx. 251 if (isa<const InsertValueInst>(Instr) && OpIdx > 1) 252 return false; 253 254 if (isa<const AllocaInst>(Instr) && OpIdx > 0) 255 return false; 256 257 // Alignment argument must be constant. 258 if (isa<const LoadInst>(Instr) && OpIdx > 0) 259 return false; 260 261 // Alignment argument must be constant. 262 if (isa<const StoreInst>(Instr) && OpIdx > 1) 263 return false; 264 265 // Index must be constant. 266 if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0) 267 return false; 268 269 // Personality function and filters must be constant. 270 // Give up on that instruction. 271 if (isa<const LandingPadInst>(Instr)) 272 return false; 273 274 // Switch instruction expects constants to compare to. 275 if (isa<const SwitchInst>(Instr)) 276 return false; 277 278 // Expected address must be a constant. 279 if (isa<const IndirectBrInst>(Instr)) 280 return false; 281 282 // Do not mess with intrinsics. 283 if (isa<const IntrinsicInst>(Instr)) 284 return false; 285 286 // Do not mess with inline asm. 287 const CallInst *CI = dyn_cast<const CallInst>(Instr); 288 if (CI && isa<const InlineAsm>(CI->getCalledValue())) 289 return false; 290 291 return true; 292} 293 294/// Check if the given Cst should be converted into 295/// a load of a global variable initialized with Cst. 296/// A constant should be converted if it is likely that the materialization of 297/// the constant will be tricky. Thus, we give up on zero or undef values. 298/// 299/// \todo Currently, accept only vector related types. 300/// Also we give up on all simple vector type to keep the existing 301/// behavior. Otherwise, we should push here all the check of the lowering of 302/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging 303/// constant via global merge and the fact that the same constant is stored 304/// only once with this method (versus, as many function that uses the constant 305/// for the regular approach, even for float). 306/// Again, the simplest solution would be to promote every 307/// constant and rematerialize them when they are actually cheap to create. 308static bool shouldConvert(const Constant *Cst) { 309 if (isa<const UndefValue>(Cst)) 310 return false; 311 312 // FIXME: In some cases, it may be interesting to promote in memory 313 // a zero initialized constant. 314 // E.g., when the type of Cst require more instructions than the 315 // adrp/add/load sequence or when this sequence can be shared by several 316 // instances of Cst. 317 // Ideally, we could promote this into a global and rematerialize the constant 318 // when it was a bad idea. 319 if (Cst->isZeroValue()) 320 return false; 321 322 if (Stress) 323 return true; 324 325 // FIXME: see function \todo 326 if (Cst->getType()->isVectorTy()) 327 return false; 328 return isConstantUsingVectorTy(Cst->getType()); 329} 330 331Instruction *AArch64PromoteConstant::findInsertionPoint(Use &Use) { 332 Instruction *User = cast<Instruction>(Use.getUser()); 333 334 // If this user is a phi, the insertion point is in the related 335 // incoming basic block. 336 if (PHINode *PhiInst = dyn_cast<PHINode>(User)) 337 return PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); 338 339 return User; 340} 341 342bool AArch64PromoteConstant::isDominated(Instruction *NewPt, Use &Use, 343 InsertionPoints &InsertPts) { 344 345 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 346 *NewPt->getParent()->getParent()).getDomTree(); 347 348 // Traverse all the existing insertion points and check if one is dominating 349 // NewPt. If it is, remember that. 350 for (auto &IPI : InsertPts) { 351 if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || 352 // When IPI.first is a terminator instruction, DT may think that 353 // the result is defined on the edge. 354 // Here we are testing the insertion point, not the definition. 355 (IPI.first->getParent() != NewPt->getParent() && 356 DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { 357 // No need to insert this point. Just record the dominated use. 358 DEBUG(dbgs() << "Insertion point dominated by:\n"); 359 DEBUG(IPI.first->print(dbgs())); 360 DEBUG(dbgs() << '\n'); 361 IPI.second.push_back(&Use); 362 return true; 363 } 364 } 365 return false; 366} 367 368bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, Use &Use, 369 InsertionPoints &InsertPts) { 370 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 371 *NewPt->getParent()->getParent()).getDomTree(); 372 BasicBlock *NewBB = NewPt->getParent(); 373 374 // Traverse all the existing insertion point and check if one is dominated by 375 // NewPt and thus useless or can be combined with NewPt into a common 376 // dominator. 377 for (InsertionPoints::iterator IPI = InsertPts.begin(), 378 EndIPI = InsertPts.end(); 379 IPI != EndIPI; ++IPI) { 380 BasicBlock *CurBB = IPI->first->getParent(); 381 if (NewBB == CurBB) { 382 // Instructions are in the same block. 383 // By construction, NewPt is dominating the other. 384 // Indeed, isDominated returned false with the exact same arguments. 385 DEBUG(dbgs() << "Merge insertion point with:\n"); 386 DEBUG(IPI->first->print(dbgs())); 387 DEBUG(dbgs() << "\nat considered insertion point.\n"); 388 appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts); 389 return true; 390 } 391 392 // Look for a common dominator 393 BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); 394 // If none exists, we cannot merge these two points. 395 if (!CommonDominator) 396 continue; 397 398 if (CommonDominator != NewBB) { 399 // By construction, the CommonDominator cannot be CurBB. 400 assert(CommonDominator != CurBB && 401 "Instruction has not been rejected during isDominated check!"); 402 // Take the last instruction of the CommonDominator as insertion point 403 NewPt = CommonDominator->getTerminator(); 404 } 405 // else, CommonDominator is the block of NewBB, hence NewBB is the last 406 // possible insertion point in that block. 407 DEBUG(dbgs() << "Merge insertion point with:\n"); 408 DEBUG(IPI->first->print(dbgs())); 409 DEBUG(dbgs() << '\n'); 410 DEBUG(NewPt->print(dbgs())); 411 DEBUG(dbgs() << '\n'); 412 appendAndTransferDominatedUses(NewPt, Use, IPI, InsertPts); 413 return true; 414 } 415 return false; 416} 417 418void AArch64PromoteConstant::computeInsertionPoints( 419 Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { 420 DEBUG(dbgs() << "** Compute insertion points **\n"); 421 for (Use &Use : Val->uses()) { 422 Instruction *User = dyn_cast<Instruction>(Use.getUser()); 423 424 // If the user is not an Instruction, we cannot modify it. 425 if (!User) 426 continue; 427 428 // Filter out uses that should not be converted. 429 if (!shouldConvertUse(Val, User, Use.getOperandNo())) 430 continue; 431 432 DEBUG(dbgs() << "Considered use, opidx " << Use.getOperandNo() << ":\n"); 433 DEBUG(User->print(dbgs())); 434 DEBUG(dbgs() << '\n'); 435 436 Instruction *InsertionPoint = findInsertionPoint(Use); 437 438 DEBUG(dbgs() << "Considered insertion point:\n"); 439 DEBUG(InsertionPoint->print(dbgs())); 440 DEBUG(dbgs() << '\n'); 441 442 // Check if the current insertion point is useless, i.e., it is dominated 443 // by another one. 444 InsertionPoints &InsertPts = 445 InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; 446 if (isDominated(InsertionPoint, Use, InsertPts)) 447 continue; 448 // This insertion point is useful, check if we can merge some insertion 449 // point in a common dominator or if NewPt dominates an existing one. 450 if (tryAndMerge(InsertionPoint, Use, InsertPts)) 451 continue; 452 453 DEBUG(dbgs() << "Keep considered insertion point\n"); 454 455 // It is definitely useful by its own 456 InsertPts[InsertionPoint].push_back(&Use); 457 } 458} 459 460bool AArch64PromoteConstant::insertDefinitions( 461 Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc) { 462 // We will create one global variable per Module. 463 DenseMap<Module *, GlobalVariable *> ModuleToMergedGV; 464 bool HasChanged = false; 465 466 // Traverse all insertion points in all the function. 467 for (const auto &FctToInstPtsIt : InsPtsPerFunc) { 468 const InsertionPoints &InsertPts = FctToInstPtsIt.second; 469// Do more checking for debug purposes. 470#ifndef NDEBUG 471 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>( 472 *FctToInstPtsIt.first).getDomTree(); 473#endif 474 assert(!InsertPts.empty() && "Empty uses does not need a definition"); 475 476 Module *M = FctToInstPtsIt.first->getParent(); 477 GlobalVariable *&PromotedGV = ModuleToMergedGV[M]; 478 if (!PromotedGV) { 479 PromotedGV = new GlobalVariable( 480 *M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr, 481 "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); 482 PromotedGV->setInitializer(Cst); 483 DEBUG(dbgs() << "Global replacement: "); 484 DEBUG(PromotedGV->print(dbgs())); 485 DEBUG(dbgs() << '\n'); 486 ++NumPromoted; 487 HasChanged = true; 488 } 489 490 for (const auto &IPI : InsertPts) { 491 // Create the load of the global variable. 492 IRBuilder<> Builder(IPI.first); 493 LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); 494 DEBUG(dbgs() << "**********\n"); 495 DEBUG(dbgs() << "New def: "); 496 DEBUG(LoadedCst->print(dbgs())); 497 DEBUG(dbgs() << '\n'); 498 499 // Update the dominated uses. 500 for (Use *Use : IPI.second) { 501#ifndef NDEBUG 502 assert(DT.dominates(LoadedCst, findInsertionPoint(*Use)) && 503 "Inserted definition does not dominate all its uses!"); 504#endif 505 DEBUG(dbgs() << "Use to update " << Use->getOperandNo() << ":"); 506 DEBUG(Use->getUser()->print(dbgs())); 507 DEBUG(dbgs() << '\n'); 508 Use->set(LoadedCst); 509 ++NumPromotedUses; 510 } 511 } 512 } 513 return HasChanged; 514} 515 516bool AArch64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { 517 InsertionPointsPerFunc InsertPtsPerFunc; 518 computeInsertionPoints(Val, InsertPtsPerFunc); 519 return insertDefinitions(Val, InsertPtsPerFunc); 520} 521 522bool AArch64PromoteConstant::promoteConstant(Constant *Cst) { 523 assert(Cst && "Given variable is not a valid constant."); 524 525 if (!shouldConvert(Cst)) 526 return false; 527 528 DEBUG(dbgs() << "******************************\n"); 529 DEBUG(dbgs() << "Candidate constant: "); 530 DEBUG(Cst->print(dbgs())); 531 DEBUG(dbgs() << '\n'); 532 533 return computeAndInsertDefinitions(Cst); 534} 535 536bool AArch64PromoteConstant::runOnFunction(Function &F) { 537 // Look for instructions using constant vector. Promote that constant to a 538 // global variable. Create as few loads of this variable as possible and 539 // update the uses accordingly. 540 bool LocalChange = false; 541 SmallPtrSet<Constant *, 8> AlreadyChecked; 542 543 for (Instruction &I : instructions(&F)) { 544 // Traverse the operand, looking for constant vectors. Replace them by a 545 // load of a global variable of constant vector type. 546 for (Value *Op : I.operand_values()) { 547 Constant *Cst = dyn_cast<Constant>(Op); 548 // There is no point in promoting global values as they are already 549 // global. Do not promote constant expressions either, as they may 550 // require some code expansion. 551 if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) && 552 AlreadyChecked.insert(Cst).second) 553 LocalChange |= promoteConstant(Cst); 554 } 555 } 556 return LocalChange; 557} 558