1//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. 11// See http://llvm.org/docs/LangRef.html#bitsets for more information. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/Transforms/IPO/LowerBitSets.h" 16#include "llvm/Transforms/IPO.h" 17#include "llvm/ADT/EquivalenceClasses.h" 18#include "llvm/ADT/Statistic.h" 19#include "llvm/ADT/Triple.h" 20#include "llvm/IR/Constant.h" 21#include "llvm/IR/Constants.h" 22#include "llvm/IR/GlobalVariable.h" 23#include "llvm/IR/IRBuilder.h" 24#include "llvm/IR/Instructions.h" 25#include "llvm/IR/Intrinsics.h" 26#include "llvm/IR/Module.h" 27#include "llvm/IR/Operator.h" 28#include "llvm/Pass.h" 29#include "llvm/Transforms/Utils/BasicBlockUtils.h" 30 31using namespace llvm; 32 33#define DEBUG_TYPE "lowerbitsets" 34 35STATISTIC(ByteArraySizeBits, "Byte array size in bits"); 36STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); 37STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); 38STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); 39STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); 40 41static cl::opt<bool> AvoidReuse( 42 "lowerbitsets-avoid-reuse", 43 cl::desc("Try to avoid reuse of byte array addresses using aliases"), 44 cl::Hidden, cl::init(true)); 45 46bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { 47 if (Offset < ByteOffset) 48 return false; 49 50 if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) 51 return false; 52 53 uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; 54 if (BitOffset >= BitSize) 55 return false; 56 57 return Bits.count(BitOffset); 58} 59 60bool BitSetInfo::containsValue( 61 const DataLayout &DL, 62 const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V, 63 uint64_t COffset) const { 64 if (auto GV = dyn_cast<GlobalVariable>(V)) { 65 auto I = GlobalLayout.find(GV); 66 if (I == GlobalLayout.end()) 67 return false; 68 return containsGlobalOffset(I->second + COffset); 69 } 70 71 if (auto GEP = dyn_cast<GEPOperator>(V)) { 72 APInt APOffset(DL.getPointerSizeInBits(0), 0); 73 bool Result = GEP->accumulateConstantOffset(DL, APOffset); 74 if (!Result) 75 return false; 76 COffset += APOffset.getZExtValue(); 77 return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), 78 COffset); 79 } 80 81 if (auto Op = dyn_cast<Operator>(V)) { 82 if (Op->getOpcode() == Instruction::BitCast) 83 return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset); 84 85 if (Op->getOpcode() == Instruction::Select) 86 return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) && 87 containsValue(DL, GlobalLayout, Op->getOperand(2), COffset); 88 } 89 90 return false; 91} 92 93BitSetInfo BitSetBuilder::build() { 94 if (Min > Max) 95 Min = 0; 96 97 // Normalize each offset against the minimum observed offset, and compute 98 // the bitwise OR of each of the offsets. The number of trailing zeros 99 // in the mask gives us the log2 of the alignment of all offsets, which 100 // allows us to compress the bitset by only storing one bit per aligned 101 // address. 102 uint64_t Mask = 0; 103 for (uint64_t &Offset : Offsets) { 104 Offset -= Min; 105 Mask |= Offset; 106 } 107 108 BitSetInfo BSI; 109 BSI.ByteOffset = Min; 110 111 BSI.AlignLog2 = 0; 112 if (Mask != 0) 113 BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); 114 115 // Build the compressed bitset while normalizing the offsets against the 116 // computed alignment. 117 BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; 118 for (uint64_t Offset : Offsets) { 119 Offset >>= BSI.AlignLog2; 120 BSI.Bits.insert(Offset); 121 } 122 123 return BSI; 124} 125 126void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { 127 // Create a new fragment to hold the layout for F. 128 Fragments.emplace_back(); 129 std::vector<uint64_t> &Fragment = Fragments.back(); 130 uint64_t FragmentIndex = Fragments.size() - 1; 131 132 for (auto ObjIndex : F) { 133 uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; 134 if (OldFragmentIndex == 0) { 135 // We haven't seen this object index before, so just add it to the current 136 // fragment. 137 Fragment.push_back(ObjIndex); 138 } else { 139 // This index belongs to an existing fragment. Copy the elements of the 140 // old fragment into this one and clear the old fragment. We don't update 141 // the fragment map just yet, this ensures that any further references to 142 // indices from the old fragment in this fragment do not insert any more 143 // indices. 144 std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; 145 Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); 146 OldFragment.clear(); 147 } 148 } 149 150 // Update the fragment map to point our object indices to this fragment. 151 for (uint64_t ObjIndex : Fragment) 152 FragmentMap[ObjIndex] = FragmentIndex; 153} 154 155void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, 156 uint64_t BitSize, uint64_t &AllocByteOffset, 157 uint8_t &AllocMask) { 158 // Find the smallest current allocation. 159 unsigned Bit = 0; 160 for (unsigned I = 1; I != BitsPerByte; ++I) 161 if (BitAllocs[I] < BitAllocs[Bit]) 162 Bit = I; 163 164 AllocByteOffset = BitAllocs[Bit]; 165 166 // Add our size to it. 167 unsigned ReqSize = AllocByteOffset + BitSize; 168 BitAllocs[Bit] = ReqSize; 169 if (Bytes.size() < ReqSize) 170 Bytes.resize(ReqSize); 171 172 // Set our bits. 173 AllocMask = 1 << Bit; 174 for (uint64_t B : Bits) 175 Bytes[AllocByteOffset + B] |= AllocMask; 176} 177 178namespace { 179 180struct ByteArrayInfo { 181 std::set<uint64_t> Bits; 182 uint64_t BitSize; 183 GlobalVariable *ByteArray; 184 Constant *Mask; 185}; 186 187struct LowerBitSets : public ModulePass { 188 static char ID; 189 LowerBitSets() : ModulePass(ID) { 190 initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); 191 } 192 193 Module *M; 194 195 bool LinkerSubsectionsViaSymbols; 196 IntegerType *Int1Ty; 197 IntegerType *Int8Ty; 198 IntegerType *Int32Ty; 199 Type *Int32PtrTy; 200 IntegerType *Int64Ty; 201 Type *IntPtrTy; 202 203 // The llvm.bitsets named metadata. 204 NamedMDNode *BitSetNM; 205 206 // Mapping from bitset mdstrings to the call sites that test them. 207 DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites; 208 209 std::vector<ByteArrayInfo> ByteArrayInfos; 210 211 BitSetInfo 212 buildBitSet(MDString *BitSet, 213 const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); 214 ByteArrayInfo *createByteArray(BitSetInfo &BSI); 215 void allocateByteArrays(); 216 Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, 217 Value *BitOffset); 218 Value * 219 lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, 220 GlobalVariable *CombinedGlobal, 221 const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout); 222 void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets, 223 const std::vector<GlobalVariable *> &Globals); 224 bool buildBitSets(); 225 bool eraseBitSetMetadata(); 226 227 bool doInitialization(Module &M) override; 228 bool runOnModule(Module &M) override; 229}; 230 231} // namespace 232 233INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", 234 "Lower bitset metadata", false, false) 235INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", 236 "Lower bitset metadata", false, false) 237char LowerBitSets::ID = 0; 238 239ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } 240 241bool LowerBitSets::doInitialization(Module &Mod) { 242 M = &Mod; 243 const DataLayout &DL = Mod.getDataLayout(); 244 245 Triple TargetTriple(M->getTargetTriple()); 246 LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); 247 248 Int1Ty = Type::getInt1Ty(M->getContext()); 249 Int8Ty = Type::getInt8Ty(M->getContext()); 250 Int32Ty = Type::getInt32Ty(M->getContext()); 251 Int32PtrTy = PointerType::getUnqual(Int32Ty); 252 Int64Ty = Type::getInt64Ty(M->getContext()); 253 IntPtrTy = DL.getIntPtrType(M->getContext(), 0); 254 255 BitSetNM = M->getNamedMetadata("llvm.bitsets"); 256 257 BitSetTestCallSites.clear(); 258 259 return false; 260} 261 262/// Build a bit set for BitSet using the object layouts in 263/// GlobalLayout. 264BitSetInfo LowerBitSets::buildBitSet( 265 MDString *BitSet, 266 const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { 267 BitSetBuilder BSB; 268 269 // Compute the byte offset of each element of this bitset. 270 if (BitSetNM) { 271 for (MDNode *Op : BitSetNM->operands()) { 272 if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) 273 continue; 274 auto OpGlobal = cast<GlobalVariable>( 275 cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); 276 uint64_t Offset = 277 cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2)) 278 ->getValue())->getZExtValue(); 279 280 Offset += GlobalLayout.find(OpGlobal)->second; 281 282 BSB.addOffset(Offset); 283 } 284 } 285 286 return BSB.build(); 287} 288 289/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in 290/// Bits. This pattern matches to the bt instruction on x86. 291static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, 292 Value *BitOffset) { 293 auto BitsType = cast<IntegerType>(Bits->getType()); 294 unsigned BitWidth = BitsType->getBitWidth(); 295 296 BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); 297 Value *BitIndex = 298 B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); 299 Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); 300 Value *MaskedBits = B.CreateAnd(Bits, BitMask); 301 return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); 302} 303 304ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { 305 // Create globals to stand in for byte arrays and masks. These never actually 306 // get initialized, we RAUW and erase them later in allocateByteArrays() once 307 // we know the offset and mask to use. 308 auto ByteArrayGlobal = new GlobalVariable( 309 *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); 310 auto MaskGlobal = new GlobalVariable( 311 *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); 312 313 ByteArrayInfos.emplace_back(); 314 ByteArrayInfo *BAI = &ByteArrayInfos.back(); 315 316 BAI->Bits = BSI.Bits; 317 BAI->BitSize = BSI.BitSize; 318 BAI->ByteArray = ByteArrayGlobal; 319 BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); 320 return BAI; 321} 322 323void LowerBitSets::allocateByteArrays() { 324 std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), 325 [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { 326 return BAI1.BitSize > BAI2.BitSize; 327 }); 328 329 std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); 330 331 ByteArrayBuilder BAB; 332 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 333 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 334 335 uint8_t Mask; 336 BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); 337 338 BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); 339 cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); 340 } 341 342 Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes); 343 auto ByteArray = 344 new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true, 345 GlobalValue::PrivateLinkage, ByteArrayConst); 346 347 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 348 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 349 350 Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), 351 ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; 352 Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( 353 ByteArrayConst->getType(), ByteArray, Idxs); 354 355 // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures 356 // that the pc-relative displacement is folded into the lea instead of the 357 // test instruction getting another displacement. 358 if (LinkerSubsectionsViaSymbols) { 359 BAI->ByteArray->replaceAllUsesWith(GEP); 360 } else { 361 GlobalAlias *Alias = GlobalAlias::create( 362 Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); 363 BAI->ByteArray->replaceAllUsesWith(Alias); 364 } 365 BAI->ByteArray->eraseFromParent(); 366 } 367 368 ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + 369 BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + 370 BAB.BitAllocs[6] + BAB.BitAllocs[7]; 371 ByteArraySizeBytes = BAB.Bytes.size(); 372} 373 374/// Build a test that bit BitOffset is set in BSI, where 375/// BitSetGlobal is a global containing the bits in BSI. 376Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, 377 ByteArrayInfo *&BAI, Value *BitOffset) { 378 if (BSI.BitSize <= 64) { 379 // If the bit set is sufficiently small, we can avoid a load by bit testing 380 // a constant. 381 IntegerType *BitsTy; 382 if (BSI.BitSize <= 32) 383 BitsTy = Int32Ty; 384 else 385 BitsTy = Int64Ty; 386 387 uint64_t Bits = 0; 388 for (auto Bit : BSI.Bits) 389 Bits |= uint64_t(1) << Bit; 390 Constant *BitsConst = ConstantInt::get(BitsTy, Bits); 391 return createMaskedBitTest(B, BitsConst, BitOffset); 392 } else { 393 if (!BAI) { 394 ++NumByteArraysCreated; 395 BAI = createByteArray(BSI); 396 } 397 398 Constant *ByteArray = BAI->ByteArray; 399 Type *Ty = BAI->ByteArray->getValueType(); 400 if (!LinkerSubsectionsViaSymbols && AvoidReuse) { 401 // Each use of the byte array uses a different alias. This makes the 402 // backend less likely to reuse previously computed byte array addresses, 403 // improving the security of the CFI mechanism based on this pass. 404 ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, 405 GlobalValue::PrivateLinkage, "bits_use", 406 ByteArray, M); 407 } 408 409 Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset); 410 Value *Byte = B.CreateLoad(ByteAddr); 411 412 Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); 413 return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); 414 } 415} 416 417/// Lower a llvm.bitset.test call to its implementation. Returns the value to 418/// replace the call with. 419Value *LowerBitSets::lowerBitSetCall( 420 CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, 421 GlobalVariable *CombinedGlobal, 422 const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) { 423 Value *Ptr = CI->getArgOperand(0); 424 const DataLayout &DL = M->getDataLayout(); 425 426 if (BSI.containsValue(DL, GlobalLayout, Ptr)) 427 return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext()); 428 429 Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy); 430 Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( 431 GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); 432 433 BasicBlock *InitialBB = CI->getParent(); 434 435 IRBuilder<> B(CI); 436 437 Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); 438 439 if (BSI.isSingleOffset()) 440 return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); 441 442 Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); 443 444 Value *BitOffset; 445 if (BSI.AlignLog2 == 0) { 446 BitOffset = PtrOffset; 447 } else { 448 // We need to check that the offset both falls within our range and is 449 // suitably aligned. We can check both properties at the same time by 450 // performing a right rotate by log2(alignment) followed by an integer 451 // comparison against the bitset size. The rotate will move the lower 452 // order bits that need to be zero into the higher order bits of the 453 // result, causing the comparison to fail if they are nonzero. The rotate 454 // also conveniently gives us a bit offset to use during the load from 455 // the bitset. 456 Value *OffsetSHR = 457 B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); 458 Value *OffsetSHL = B.CreateShl( 459 PtrOffset, 460 ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); 461 BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); 462 } 463 464 Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); 465 Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); 466 467 // If the bit set is all ones, testing against it is unnecessary. 468 if (BSI.isAllOnes()) 469 return OffsetInRange; 470 471 TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); 472 IRBuilder<> ThenB(Term); 473 474 // Now that we know that the offset is in range and aligned, load the 475 // appropriate bit from the bitset. 476 Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); 477 478 // The value we want is 0 if we came directly from the initial block 479 // (having failed the range or alignment checks), or the loaded bit if 480 // we came from the block in which we loaded it. 481 B.SetInsertPoint(CI); 482 PHINode *P = B.CreatePHI(Int1Ty, 2); 483 P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); 484 P->addIncoming(Bit, ThenB.GetInsertBlock()); 485 return P; 486} 487 488/// Given a disjoint set of bitsets and globals, layout the globals, build the 489/// bit sets and lower the llvm.bitset.test calls. 490void LowerBitSets::buildBitSetsFromGlobals( 491 const std::vector<MDString *> &BitSets, 492 const std::vector<GlobalVariable *> &Globals) { 493 // Build a new global with the combined contents of the referenced globals. 494 std::vector<Constant *> GlobalInits; 495 const DataLayout &DL = M->getDataLayout(); 496 for (GlobalVariable *G : Globals) { 497 GlobalInits.push_back(G->getInitializer()); 498 uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType()); 499 500 // Compute the amount of padding required to align the next element to the 501 // next power of 2. 502 uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; 503 504 // Cap at 128 was found experimentally to have a good data/instruction 505 // overhead tradeoff. 506 if (Padding > 128) 507 Padding = RoundUpToAlignment(InitSize, 128) - InitSize; 508 509 GlobalInits.push_back( 510 ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); 511 } 512 if (!GlobalInits.empty()) 513 GlobalInits.pop_back(); 514 Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); 515 auto CombinedGlobal = 516 new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, 517 GlobalValue::PrivateLinkage, NewInit); 518 519 const StructLayout *CombinedGlobalLayout = 520 DL.getStructLayout(cast<StructType>(NewInit->getType())); 521 522 // Compute the offsets of the original globals within the new global. 523 DenseMap<GlobalVariable *, uint64_t> GlobalLayout; 524 for (unsigned I = 0; I != Globals.size(); ++I) 525 // Multiply by 2 to account for padding elements. 526 GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); 527 528 // For each bitset in this disjoint set... 529 for (MDString *BS : BitSets) { 530 // Build the bitset. 531 BitSetInfo BSI = buildBitSet(BS, GlobalLayout); 532 533 ByteArrayInfo *BAI = 0; 534 535 // Lower each call to llvm.bitset.test for this bitset. 536 for (CallInst *CI : BitSetTestCallSites[BS]) { 537 ++NumBitSetCallsLowered; 538 Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout); 539 CI->replaceAllUsesWith(Lowered); 540 CI->eraseFromParent(); 541 } 542 } 543 544 // Build aliases pointing to offsets into the combined global for each 545 // global from which we built the combined global, and replace references 546 // to the original globals with references to the aliases. 547 for (unsigned I = 0; I != Globals.size(); ++I) { 548 // Multiply by 2 to account for padding elements. 549 Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), 550 ConstantInt::get(Int32Ty, I * 2)}; 551 Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( 552 NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); 553 if (LinkerSubsectionsViaSymbols) { 554 Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); 555 } else { 556 GlobalAlias *GAlias = GlobalAlias::create( 557 Globals[I]->getType()->getElementType(), 558 Globals[I]->getType()->getAddressSpace(), Globals[I]->getLinkage(), 559 "", CombinedGlobalElemPtr, M); 560 GAlias->takeName(Globals[I]); 561 Globals[I]->replaceAllUsesWith(GAlias); 562 } 563 Globals[I]->eraseFromParent(); 564 } 565} 566 567/// Lower all bit sets in this module. 568bool LowerBitSets::buildBitSets() { 569 Function *BitSetTestFunc = 570 M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); 571 if (!BitSetTestFunc) 572 return false; 573 574 // Equivalence class set containing bitsets and the globals they reference. 575 // This is used to partition the set of bitsets in the module into disjoint 576 // sets. 577 typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>> 578 GlobalClassesTy; 579 GlobalClassesTy GlobalClasses; 580 581 for (const Use &U : BitSetTestFunc->uses()) { 582 auto CI = cast<CallInst>(U.getUser()); 583 584 auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); 585 if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata())) 586 report_fatal_error( 587 "Second argument of llvm.bitset.test must be metadata string"); 588 auto BitSet = cast<MDString>(BitSetMDVal->getMetadata()); 589 590 // Add the call site to the list of call sites for this bit set. We also use 591 // BitSetTestCallSites to keep track of whether we have seen this bit set 592 // before. If we have, we don't need to re-add the referenced globals to the 593 // equivalence class. 594 std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator, 595 bool> Ins = 596 BitSetTestCallSites.insert( 597 std::make_pair(BitSet, std::vector<CallInst *>())); 598 Ins.first->second.push_back(CI); 599 if (!Ins.second) 600 continue; 601 602 // Add the bitset to the equivalence class. 603 GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); 604 GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); 605 606 if (!BitSetNM) 607 continue; 608 609 // Verify the bitset metadata and add the referenced globals to the bitset's 610 // equivalence class. 611 for (MDNode *Op : BitSetNM->operands()) { 612 if (Op->getNumOperands() != 3) 613 report_fatal_error( 614 "All operands of llvm.bitsets metadata must have 3 elements"); 615 616 if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) 617 continue; 618 619 auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1)); 620 if (!OpConstMD) 621 report_fatal_error("Bit set element must be a constant"); 622 auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue()); 623 if (!OpGlobal) 624 report_fatal_error("Bit set element must refer to global"); 625 626 auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); 627 if (!OffsetConstMD) 628 report_fatal_error("Bit set element offset must be a constant"); 629 auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); 630 if (!OffsetInt) 631 report_fatal_error( 632 "Bit set element offset must be an integer constant"); 633 634 CurSet = GlobalClasses.unionSets( 635 CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); 636 } 637 } 638 639 if (GlobalClasses.empty()) 640 return false; 641 642 // For each disjoint set we found... 643 for (GlobalClassesTy::iterator I = GlobalClasses.begin(), 644 E = GlobalClasses.end(); 645 I != E; ++I) { 646 if (!I->isLeader()) continue; 647 648 ++NumBitSetDisjointSets; 649 650 // Build the list of bitsets and referenced globals in this disjoint set. 651 std::vector<MDString *> BitSets; 652 std::vector<GlobalVariable *> Globals; 653 llvm::DenseMap<MDString *, uint64_t> BitSetIndices; 654 llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices; 655 for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); 656 MI != GlobalClasses.member_end(); ++MI) { 657 if ((*MI).is<MDString *>()) { 658 BitSetIndices[MI->get<MDString *>()] = BitSets.size(); 659 BitSets.push_back(MI->get<MDString *>()); 660 } else { 661 GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size(); 662 Globals.push_back(MI->get<GlobalVariable *>()); 663 } 664 } 665 666 // For each bitset, build a set of indices that refer to globals referenced 667 // by the bitset. 668 std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); 669 if (BitSetNM) { 670 for (MDNode *Op : BitSetNM->operands()) { 671 // Op = { bitset name, global, offset } 672 if (!Op->getOperand(1)) 673 continue; 674 auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0))); 675 if (I == BitSetIndices.end()) 676 continue; 677 678 auto OpGlobal = cast<GlobalVariable>( 679 cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); 680 BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); 681 } 682 } 683 684 // Order the sets of indices by size. The GlobalLayoutBuilder works best 685 // when given small index sets first. 686 std::stable_sort( 687 BitSetMembers.begin(), BitSetMembers.end(), 688 [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { 689 return O1.size() < O2.size(); 690 }); 691 692 // Create a GlobalLayoutBuilder and provide it with index sets as layout 693 // fragments. The GlobalLayoutBuilder tries to lay out members of fragments 694 // as close together as possible. 695 GlobalLayoutBuilder GLB(Globals.size()); 696 for (auto &&MemSet : BitSetMembers) 697 GLB.addFragment(MemSet); 698 699 // Build a vector of globals with the computed layout. 700 std::vector<GlobalVariable *> OrderedGlobals(Globals.size()); 701 auto OGI = OrderedGlobals.begin(); 702 for (auto &&F : GLB.Fragments) 703 for (auto &&Offset : F) 704 *OGI++ = Globals[Offset]; 705 706 // Order bitsets by name for determinism. 707 std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) { 708 return S1->getString() < S2->getString(); 709 }); 710 711 // Build the bitsets from this disjoint set. 712 buildBitSetsFromGlobals(BitSets, OrderedGlobals); 713 } 714 715 allocateByteArrays(); 716 717 return true; 718} 719 720bool LowerBitSets::eraseBitSetMetadata() { 721 if (!BitSetNM) 722 return false; 723 724 M->eraseNamedMetadata(BitSetNM); 725 return true; 726} 727 728bool LowerBitSets::runOnModule(Module &M) { 729 bool Changed = buildBitSets(); 730 Changed |= eraseBitSetMetadata(); 731 return Changed; 732} 733