1//===-- LowerBitSets.cpp - Bitset lowering pass ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass lowers bitset metadata and calls to the llvm.bitset.test intrinsic. 11// See http://llvm.org/docs/LangRef.html#bitsets for more information. 12// 13//===----------------------------------------------------------------------===// 14 15#include "llvm/Transforms/IPO/LowerBitSets.h" 16#include "llvm/Transforms/IPO.h" 17#include "llvm/ADT/EquivalenceClasses.h" 18#include "llvm/ADT/Statistic.h" 19#include "llvm/ADT/Triple.h" 20#include "llvm/IR/Constant.h" 21#include "llvm/IR/Constants.h" 22#include "llvm/IR/Function.h" 23#include "llvm/IR/GlobalObject.h" 24#include "llvm/IR/GlobalVariable.h" 25#include "llvm/IR/IRBuilder.h" 26#include "llvm/IR/Instructions.h" 27#include "llvm/IR/Intrinsics.h" 28#include "llvm/IR/Module.h" 29#include "llvm/IR/Operator.h" 30#include "llvm/Pass.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/raw_ostream.h" 33#include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 35using namespace llvm; 36 37#define DEBUG_TYPE "lowerbitsets" 38 39STATISTIC(ByteArraySizeBits, "Byte array size in bits"); 40STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); 41STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); 42STATISTIC(NumBitSetCallsLowered, "Number of bitset calls lowered"); 43STATISTIC(NumBitSetDisjointSets, "Number of disjoint sets of bitsets"); 44 45static cl::opt<bool> AvoidReuse( 46 "lowerbitsets-avoid-reuse", 47 cl::desc("Try to avoid reuse of byte array addresses using aliases"), 48 cl::Hidden, cl::init(true)); 49 50bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { 51 if (Offset < ByteOffset) 52 return false; 53 54 if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) 55 return false; 56 57 uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; 58 if (BitOffset >= BitSize) 59 return false; 60 61 return Bits.count(BitOffset); 62} 63 64bool BitSetInfo::containsValue( 65 const DataLayout &DL, 66 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V, 67 uint64_t COffset) const { 68 if (auto GV = dyn_cast<GlobalObject>(V)) { 69 auto I = GlobalLayout.find(GV); 70 if (I == GlobalLayout.end()) 71 return false; 72 return containsGlobalOffset(I->second + COffset); 73 } 74 75 if (auto GEP = dyn_cast<GEPOperator>(V)) { 76 APInt APOffset(DL.getPointerSizeInBits(0), 0); 77 bool Result = GEP->accumulateConstantOffset(DL, APOffset); 78 if (!Result) 79 return false; 80 COffset += APOffset.getZExtValue(); 81 return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), 82 COffset); 83 } 84 85 if (auto Op = dyn_cast<Operator>(V)) { 86 if (Op->getOpcode() == Instruction::BitCast) 87 return containsValue(DL, GlobalLayout, Op->getOperand(0), COffset); 88 89 if (Op->getOpcode() == Instruction::Select) 90 return containsValue(DL, GlobalLayout, Op->getOperand(1), COffset) && 91 containsValue(DL, GlobalLayout, Op->getOperand(2), COffset); 92 } 93 94 return false; 95} 96 97void BitSetInfo::print(raw_ostream &OS) const { 98 OS << "offset " << ByteOffset << " size " << BitSize << " align " 99 << (1 << AlignLog2); 100 101 if (isAllOnes()) { 102 OS << " all-ones\n"; 103 return; 104 } 105 106 OS << " { "; 107 for (uint64_t B : Bits) 108 OS << B << ' '; 109 OS << "}\n"; 110} 111 112BitSetInfo BitSetBuilder::build() { 113 if (Min > Max) 114 Min = 0; 115 116 // Normalize each offset against the minimum observed offset, and compute 117 // the bitwise OR of each of the offsets. The number of trailing zeros 118 // in the mask gives us the log2 of the alignment of all offsets, which 119 // allows us to compress the bitset by only storing one bit per aligned 120 // address. 121 uint64_t Mask = 0; 122 for (uint64_t &Offset : Offsets) { 123 Offset -= Min; 124 Mask |= Offset; 125 } 126 127 BitSetInfo BSI; 128 BSI.ByteOffset = Min; 129 130 BSI.AlignLog2 = 0; 131 if (Mask != 0) 132 BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined); 133 134 // Build the compressed bitset while normalizing the offsets against the 135 // computed alignment. 136 BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; 137 for (uint64_t Offset : Offsets) { 138 Offset >>= BSI.AlignLog2; 139 BSI.Bits.insert(Offset); 140 } 141 142 return BSI; 143} 144 145void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { 146 // Create a new fragment to hold the layout for F. 147 Fragments.emplace_back(); 148 std::vector<uint64_t> &Fragment = Fragments.back(); 149 uint64_t FragmentIndex = Fragments.size() - 1; 150 151 for (auto ObjIndex : F) { 152 uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; 153 if (OldFragmentIndex == 0) { 154 // We haven't seen this object index before, so just add it to the current 155 // fragment. 156 Fragment.push_back(ObjIndex); 157 } else { 158 // This index belongs to an existing fragment. Copy the elements of the 159 // old fragment into this one and clear the old fragment. We don't update 160 // the fragment map just yet, this ensures that any further references to 161 // indices from the old fragment in this fragment do not insert any more 162 // indices. 163 std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; 164 Fragment.insert(Fragment.end(), OldFragment.begin(), OldFragment.end()); 165 OldFragment.clear(); 166 } 167 } 168 169 // Update the fragment map to point our object indices to this fragment. 170 for (uint64_t ObjIndex : Fragment) 171 FragmentMap[ObjIndex] = FragmentIndex; 172} 173 174void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, 175 uint64_t BitSize, uint64_t &AllocByteOffset, 176 uint8_t &AllocMask) { 177 // Find the smallest current allocation. 178 unsigned Bit = 0; 179 for (unsigned I = 1; I != BitsPerByte; ++I) 180 if (BitAllocs[I] < BitAllocs[Bit]) 181 Bit = I; 182 183 AllocByteOffset = BitAllocs[Bit]; 184 185 // Add our size to it. 186 unsigned ReqSize = AllocByteOffset + BitSize; 187 BitAllocs[Bit] = ReqSize; 188 if (Bytes.size() < ReqSize) 189 Bytes.resize(ReqSize); 190 191 // Set our bits. 192 AllocMask = 1 << Bit; 193 for (uint64_t B : Bits) 194 Bytes[AllocByteOffset + B] |= AllocMask; 195} 196 197namespace { 198 199struct ByteArrayInfo { 200 std::set<uint64_t> Bits; 201 uint64_t BitSize; 202 GlobalVariable *ByteArray; 203 Constant *Mask; 204}; 205 206struct LowerBitSets : public ModulePass { 207 static char ID; 208 LowerBitSets() : ModulePass(ID) { 209 initializeLowerBitSetsPass(*PassRegistry::getPassRegistry()); 210 } 211 212 Module *M; 213 214 bool LinkerSubsectionsViaSymbols; 215 Triple::ArchType Arch; 216 Triple::ObjectFormatType ObjectFormat; 217 IntegerType *Int1Ty; 218 IntegerType *Int8Ty; 219 IntegerType *Int32Ty; 220 Type *Int32PtrTy; 221 IntegerType *Int64Ty; 222 IntegerType *IntPtrTy; 223 224 // The llvm.bitsets named metadata. 225 NamedMDNode *BitSetNM; 226 227 // Mapping from bitset identifiers to the call sites that test them. 228 DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites; 229 230 std::vector<ByteArrayInfo> ByteArrayInfos; 231 232 BitSetInfo 233 buildBitSet(Metadata *BitSet, 234 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); 235 ByteArrayInfo *createByteArray(BitSetInfo &BSI); 236 void allocateByteArrays(); 237 Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI, 238 Value *BitOffset); 239 void lowerBitSetCalls(ArrayRef<Metadata *> BitSets, 240 Constant *CombinedGlobalAddr, 241 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); 242 Value * 243 lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, 244 Constant *CombinedGlobal, 245 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout); 246 void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets, 247 ArrayRef<GlobalVariable *> Globals); 248 unsigned getJumpTableEntrySize(); 249 Type *getJumpTableEntryType(); 250 Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest, 251 unsigned Distance); 252 void verifyBitSetMDNode(MDNode *Op); 253 void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, 254 ArrayRef<Function *> Functions); 255 void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets, 256 ArrayRef<GlobalObject *> Globals); 257 bool buildBitSets(); 258 bool eraseBitSetMetadata(); 259 260 bool doInitialization(Module &M) override; 261 bool runOnModule(Module &M) override; 262}; 263 264} // anonymous namespace 265 266INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets", 267 "Lower bitset metadata", false, false) 268INITIALIZE_PASS_END(LowerBitSets, "lowerbitsets", 269 "Lower bitset metadata", false, false) 270char LowerBitSets::ID = 0; 271 272ModulePass *llvm::createLowerBitSetsPass() { return new LowerBitSets; } 273 274bool LowerBitSets::doInitialization(Module &Mod) { 275 M = &Mod; 276 const DataLayout &DL = Mod.getDataLayout(); 277 278 Triple TargetTriple(M->getTargetTriple()); 279 LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX(); 280 Arch = TargetTriple.getArch(); 281 ObjectFormat = TargetTriple.getObjectFormat(); 282 283 Int1Ty = Type::getInt1Ty(M->getContext()); 284 Int8Ty = Type::getInt8Ty(M->getContext()); 285 Int32Ty = Type::getInt32Ty(M->getContext()); 286 Int32PtrTy = PointerType::getUnqual(Int32Ty); 287 Int64Ty = Type::getInt64Ty(M->getContext()); 288 IntPtrTy = DL.getIntPtrType(M->getContext(), 0); 289 290 BitSetNM = M->getNamedMetadata("llvm.bitsets"); 291 292 BitSetTestCallSites.clear(); 293 294 return false; 295} 296 297/// Build a bit set for BitSet using the object layouts in 298/// GlobalLayout. 299BitSetInfo LowerBitSets::buildBitSet( 300 Metadata *BitSet, 301 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { 302 BitSetBuilder BSB; 303 304 // Compute the byte offset of each element of this bitset. 305 if (BitSetNM) { 306 for (MDNode *Op : BitSetNM->operands()) { 307 if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) 308 continue; 309 Constant *OpConst = 310 cast<ConstantAsMetadata>(Op->getOperand(1))->getValue(); 311 if (auto GA = dyn_cast<GlobalAlias>(OpConst)) 312 OpConst = GA->getAliasee(); 313 auto OpGlobal = dyn_cast<GlobalObject>(OpConst); 314 if (!OpGlobal) 315 continue; 316 uint64_t Offset = 317 cast<ConstantInt>(cast<ConstantAsMetadata>(Op->getOperand(2)) 318 ->getValue())->getZExtValue(); 319 320 Offset += GlobalLayout.find(OpGlobal)->second; 321 322 BSB.addOffset(Offset); 323 } 324 } 325 326 return BSB.build(); 327} 328 329/// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in 330/// Bits. This pattern matches to the bt instruction on x86. 331static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, 332 Value *BitOffset) { 333 auto BitsType = cast<IntegerType>(Bits->getType()); 334 unsigned BitWidth = BitsType->getBitWidth(); 335 336 BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); 337 Value *BitIndex = 338 B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); 339 Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); 340 Value *MaskedBits = B.CreateAnd(Bits, BitMask); 341 return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); 342} 343 344ByteArrayInfo *LowerBitSets::createByteArray(BitSetInfo &BSI) { 345 // Create globals to stand in for byte arrays and masks. These never actually 346 // get initialized, we RAUW and erase them later in allocateByteArrays() once 347 // we know the offset and mask to use. 348 auto ByteArrayGlobal = new GlobalVariable( 349 *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); 350 auto MaskGlobal = new GlobalVariable( 351 *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); 352 353 ByteArrayInfos.emplace_back(); 354 ByteArrayInfo *BAI = &ByteArrayInfos.back(); 355 356 BAI->Bits = BSI.Bits; 357 BAI->BitSize = BSI.BitSize; 358 BAI->ByteArray = ByteArrayGlobal; 359 BAI->Mask = ConstantExpr::getPtrToInt(MaskGlobal, Int8Ty); 360 return BAI; 361} 362 363void LowerBitSets::allocateByteArrays() { 364 std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(), 365 [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { 366 return BAI1.BitSize > BAI2.BitSize; 367 }); 368 369 std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); 370 371 ByteArrayBuilder BAB; 372 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 373 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 374 375 uint8_t Mask; 376 BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); 377 378 BAI->Mask->replaceAllUsesWith(ConstantInt::get(Int8Ty, Mask)); 379 cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent(); 380 } 381 382 Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes); 383 auto ByteArray = 384 new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true, 385 GlobalValue::PrivateLinkage, ByteArrayConst); 386 387 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 388 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 389 390 Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), 391 ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; 392 Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( 393 ByteArrayConst->getType(), ByteArray, Idxs); 394 395 // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures 396 // that the pc-relative displacement is folded into the lea instead of the 397 // test instruction getting another displacement. 398 if (LinkerSubsectionsViaSymbols) { 399 BAI->ByteArray->replaceAllUsesWith(GEP); 400 } else { 401 GlobalAlias *Alias = GlobalAlias::create( 402 Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M); 403 BAI->ByteArray->replaceAllUsesWith(Alias); 404 } 405 BAI->ByteArray->eraseFromParent(); 406 } 407 408 ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + 409 BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + 410 BAB.BitAllocs[6] + BAB.BitAllocs[7]; 411 ByteArraySizeBytes = BAB.Bytes.size(); 412} 413 414/// Build a test that bit BitOffset is set in BSI, where 415/// BitSetGlobal is a global containing the bits in BSI. 416Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, 417 ByteArrayInfo *&BAI, Value *BitOffset) { 418 if (BSI.BitSize <= 64) { 419 // If the bit set is sufficiently small, we can avoid a load by bit testing 420 // a constant. 421 IntegerType *BitsTy; 422 if (BSI.BitSize <= 32) 423 BitsTy = Int32Ty; 424 else 425 BitsTy = Int64Ty; 426 427 uint64_t Bits = 0; 428 for (auto Bit : BSI.Bits) 429 Bits |= uint64_t(1) << Bit; 430 Constant *BitsConst = ConstantInt::get(BitsTy, Bits); 431 return createMaskedBitTest(B, BitsConst, BitOffset); 432 } else { 433 if (!BAI) { 434 ++NumByteArraysCreated; 435 BAI = createByteArray(BSI); 436 } 437 438 Constant *ByteArray = BAI->ByteArray; 439 Type *Ty = BAI->ByteArray->getValueType(); 440 if (!LinkerSubsectionsViaSymbols && AvoidReuse) { 441 // Each use of the byte array uses a different alias. This makes the 442 // backend less likely to reuse previously computed byte array addresses, 443 // improving the security of the CFI mechanism based on this pass. 444 ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0, 445 GlobalValue::PrivateLinkage, "bits_use", 446 ByteArray, M); 447 } 448 449 Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset); 450 Value *Byte = B.CreateLoad(ByteAddr); 451 452 Value *ByteAndMask = B.CreateAnd(Byte, BAI->Mask); 453 return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); 454 } 455} 456 457/// Lower a llvm.bitset.test call to its implementation. Returns the value to 458/// replace the call with. 459Value *LowerBitSets::lowerBitSetCall( 460 CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI, 461 Constant *CombinedGlobalIntAddr, 462 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { 463 Value *Ptr = CI->getArgOperand(0); 464 const DataLayout &DL = M->getDataLayout(); 465 466 if (BSI.containsValue(DL, GlobalLayout, Ptr)) 467 return ConstantInt::getTrue(M->getContext()); 468 469 Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd( 470 CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)); 471 472 BasicBlock *InitialBB = CI->getParent(); 473 474 IRBuilder<> B(CI); 475 476 Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); 477 478 if (BSI.isSingleOffset()) 479 return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); 480 481 Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); 482 483 Value *BitOffset; 484 if (BSI.AlignLog2 == 0) { 485 BitOffset = PtrOffset; 486 } else { 487 // We need to check that the offset both falls within our range and is 488 // suitably aligned. We can check both properties at the same time by 489 // performing a right rotate by log2(alignment) followed by an integer 490 // comparison against the bitset size. The rotate will move the lower 491 // order bits that need to be zero into the higher order bits of the 492 // result, causing the comparison to fail if they are nonzero. The rotate 493 // also conveniently gives us a bit offset to use during the load from 494 // the bitset. 495 Value *OffsetSHR = 496 B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2)); 497 Value *OffsetSHL = B.CreateShl( 498 PtrOffset, 499 ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2)); 500 BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); 501 } 502 503 Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize); 504 Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst); 505 506 // If the bit set is all ones, testing against it is unnecessary. 507 if (BSI.isAllOnes()) 508 return OffsetInRange; 509 510 TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false); 511 IRBuilder<> ThenB(Term); 512 513 // Now that we know that the offset is in range and aligned, load the 514 // appropriate bit from the bitset. 515 Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset); 516 517 // The value we want is 0 if we came directly from the initial block 518 // (having failed the range or alignment checks), or the loaded bit if 519 // we came from the block in which we loaded it. 520 B.SetInsertPoint(CI); 521 PHINode *P = B.CreatePHI(Int1Ty, 2); 522 P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); 523 P->addIncoming(Bit, ThenB.GetInsertBlock()); 524 return P; 525} 526 527/// Given a disjoint set of bitsets and globals, layout the globals, build the 528/// bit sets and lower the llvm.bitset.test calls. 529void LowerBitSets::buildBitSetsFromGlobalVariables( 530 ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) { 531 // Build a new global with the combined contents of the referenced globals. 532 // This global is a struct whose even-indexed elements contain the original 533 // contents of the referenced globals and whose odd-indexed elements contain 534 // any padding required to align the next element to the next power of 2. 535 std::vector<Constant *> GlobalInits; 536 const DataLayout &DL = M->getDataLayout(); 537 for (GlobalVariable *G : Globals) { 538 GlobalInits.push_back(G->getInitializer()); 539 uint64_t InitSize = DL.getTypeAllocSize(G->getValueType()); 540 541 // Compute the amount of padding required. 542 uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize; 543 544 // Cap at 128 was found experimentally to have a good data/instruction 545 // overhead tradeoff. 546 if (Padding > 128) 547 Padding = RoundUpToAlignment(InitSize, 128) - InitSize; 548 549 GlobalInits.push_back( 550 ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); 551 } 552 if (!GlobalInits.empty()) 553 GlobalInits.pop_back(); 554 Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits); 555 auto *CombinedGlobal = 556 new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true, 557 GlobalValue::PrivateLinkage, NewInit); 558 559 StructType *NewTy = cast<StructType>(NewInit->getType()); 560 const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy); 561 562 // Compute the offsets of the original globals within the new global. 563 DenseMap<GlobalObject *, uint64_t> GlobalLayout; 564 for (unsigned I = 0; I != Globals.size(); ++I) 565 // Multiply by 2 to account for padding elements. 566 GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2); 567 568 lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout); 569 570 // Build aliases pointing to offsets into the combined global for each 571 // global from which we built the combined global, and replace references 572 // to the original globals with references to the aliases. 573 for (unsigned I = 0; I != Globals.size(); ++I) { 574 // Multiply by 2 to account for padding elements. 575 Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), 576 ConstantInt::get(Int32Ty, I * 2)}; 577 Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( 578 NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); 579 if (LinkerSubsectionsViaSymbols) { 580 Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr); 581 } else { 582 assert(Globals[I]->getType()->getAddressSpace() == 0); 583 GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0, 584 Globals[I]->getLinkage(), "", 585 CombinedGlobalElemPtr, M); 586 GAlias->setVisibility(Globals[I]->getVisibility()); 587 GAlias->takeName(Globals[I]); 588 Globals[I]->replaceAllUsesWith(GAlias); 589 } 590 Globals[I]->eraseFromParent(); 591 } 592} 593 594void LowerBitSets::lowerBitSetCalls( 595 ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr, 596 const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) { 597 Constant *CombinedGlobalIntAddr = 598 ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy); 599 600 // For each bitset in this disjoint set... 601 for (Metadata *BS : BitSets) { 602 // Build the bitset. 603 BitSetInfo BSI = buildBitSet(BS, GlobalLayout); 604 DEBUG({ 605 if (auto BSS = dyn_cast<MDString>(BS)) 606 dbgs() << BSS->getString() << ": "; 607 else 608 dbgs() << "<unnamed>: "; 609 BSI.print(dbgs()); 610 }); 611 612 ByteArrayInfo *BAI = nullptr; 613 614 // Lower each call to llvm.bitset.test for this bitset. 615 for (CallInst *CI : BitSetTestCallSites[BS]) { 616 ++NumBitSetCallsLowered; 617 Value *Lowered = 618 lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout); 619 CI->replaceAllUsesWith(Lowered); 620 CI->eraseFromParent(); 621 } 622 } 623} 624 625void LowerBitSets::verifyBitSetMDNode(MDNode *Op) { 626 if (Op->getNumOperands() != 3) 627 report_fatal_error( 628 "All operands of llvm.bitsets metadata must have 3 elements"); 629 if (!Op->getOperand(1)) 630 return; 631 632 auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1)); 633 if (!OpConstMD) 634 report_fatal_error("Bit set element must be a constant"); 635 auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue()); 636 if (!OpGlobal) 637 return; 638 639 if (OpGlobal->isThreadLocal()) 640 report_fatal_error("Bit set element may not be thread-local"); 641 if (OpGlobal->hasSection()) 642 report_fatal_error("Bit set element may not have an explicit section"); 643 644 if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker()) 645 report_fatal_error("Bit set global var element must be a definition"); 646 647 auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); 648 if (!OffsetConstMD) 649 report_fatal_error("Bit set element offset must be a constant"); 650 auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); 651 if (!OffsetInt) 652 report_fatal_error("Bit set element offset must be an integer constant"); 653} 654 655static const unsigned kX86JumpTableEntrySize = 8; 656 657unsigned LowerBitSets::getJumpTableEntrySize() { 658 if (Arch != Triple::x86 && Arch != Triple::x86_64) 659 report_fatal_error("Unsupported architecture for jump tables"); 660 661 return kX86JumpTableEntrySize; 662} 663 664// Create a constant representing a jump table entry for the target. This 665// consists of an instruction sequence containing a relative branch to Dest. The 666// constant will be laid out at address Src+(Len*Distance) where Len is the 667// target-specific jump table entry size. 668Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest, 669 unsigned Distance) { 670 if (Arch != Triple::x86 && Arch != Triple::x86_64) 671 report_fatal_error("Unsupported architecture for jump tables"); 672 673 const unsigned kJmpPCRel32Code = 0xe9; 674 const unsigned kInt3Code = 0xcc; 675 676 ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code); 677 678 // Build a constant representing the displacement between the constant's 679 // address and Dest. This will resolve to a PC32 relocation referring to Dest. 680 Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy); 681 Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy); 682 Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt); 683 ConstantInt *DispOffset = 684 ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5); 685 Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset); 686 OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty); 687 688 ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code); 689 690 Constant *Fields[] = { 691 Jmp, OffsetedDisp, Int3, Int3, Int3, 692 }; 693 return ConstantStruct::getAnon(Fields, /*Packed=*/true); 694} 695 696Type *LowerBitSets::getJumpTableEntryType() { 697 if (Arch != Triple::x86 && Arch != Triple::x86_64) 698 report_fatal_error("Unsupported architecture for jump tables"); 699 700 return StructType::get(M->getContext(), 701 {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty}, 702 /*Packed=*/true); 703} 704 705/// Given a disjoint set of bitsets and functions, build a jump table for the 706/// functions, build the bit sets and lower the llvm.bitset.test calls. 707void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets, 708 ArrayRef<Function *> Functions) { 709 // Unlike the global bitset builder, the function bitset builder cannot 710 // re-arrange functions in a particular order and base its calculations on the 711 // layout of the functions' entry points, as we have no idea how large a 712 // particular function will end up being (the size could even depend on what 713 // this pass does!) Instead, we build a jump table, which is a block of code 714 // consisting of one branch instruction for each of the functions in the bit 715 // set that branches to the target function, and redirect any taken function 716 // addresses to the corresponding jump table entry. In the object file's 717 // symbol table, the symbols for the target functions also refer to the jump 718 // table entries, so that addresses taken outside the module will pass any 719 // verification done inside the module. 720 // 721 // In more concrete terms, suppose we have three functions f, g, h which are 722 // members of a single bitset, and a function foo that returns their 723 // addresses: 724 // 725 // f: 726 // mov 0, %eax 727 // ret 728 // 729 // g: 730 // mov 1, %eax 731 // ret 732 // 733 // h: 734 // mov 2, %eax 735 // ret 736 // 737 // foo: 738 // mov f, %eax 739 // mov g, %edx 740 // mov h, %ecx 741 // ret 742 // 743 // To create a jump table for these functions, we instruct the LLVM code 744 // generator to output a jump table in the .text section. This is done by 745 // representing the instructions in the jump table as an LLVM constant and 746 // placing them in a global variable in the .text section. The end result will 747 // (conceptually) look like this: 748 // 749 // f: 750 // jmp .Ltmp0 ; 5 bytes 751 // int3 ; 1 byte 752 // int3 ; 1 byte 753 // int3 ; 1 byte 754 // 755 // g: 756 // jmp .Ltmp1 ; 5 bytes 757 // int3 ; 1 byte 758 // int3 ; 1 byte 759 // int3 ; 1 byte 760 // 761 // h: 762 // jmp .Ltmp2 ; 5 bytes 763 // int3 ; 1 byte 764 // int3 ; 1 byte 765 // int3 ; 1 byte 766 // 767 // .Ltmp0: 768 // mov 0, %eax 769 // ret 770 // 771 // .Ltmp1: 772 // mov 1, %eax 773 // ret 774 // 775 // .Ltmp2: 776 // mov 2, %eax 777 // ret 778 // 779 // foo: 780 // mov f, %eax 781 // mov g, %edx 782 // mov h, %ecx 783 // ret 784 // 785 // Because the addresses of f, g, h are evenly spaced at a power of 2, in the 786 // normal case the check can be carried out using the same kind of simple 787 // arithmetic that we normally use for globals. 788 789 assert(!Functions.empty()); 790 791 // Build a simple layout based on the regular layout of jump tables. 792 DenseMap<GlobalObject *, uint64_t> GlobalLayout; 793 unsigned EntrySize = getJumpTableEntrySize(); 794 for (unsigned I = 0; I != Functions.size(); ++I) 795 GlobalLayout[Functions[I]] = I * EntrySize; 796 797 // Create a constant to hold the jump table. 798 ArrayType *JumpTableType = 799 ArrayType::get(getJumpTableEntryType(), Functions.size()); 800 auto JumpTable = new GlobalVariable(*M, JumpTableType, 801 /*isConstant=*/true, 802 GlobalValue::PrivateLinkage, nullptr); 803 JumpTable->setSection(ObjectFormat == Triple::MachO 804 ? "__TEXT,__text,regular,pure_instructions" 805 : ".text"); 806 lowerBitSetCalls(BitSets, JumpTable, GlobalLayout); 807 808 // Build aliases pointing to offsets into the jump table, and replace 809 // references to the original functions with references to the aliases. 810 for (unsigned I = 0; I != Functions.size(); ++I) { 811 Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( 812 ConstantExpr::getGetElementPtr( 813 JumpTableType, JumpTable, 814 ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), 815 ConstantInt::get(IntPtrTy, I)}), 816 Functions[I]->getType()); 817 if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) { 818 Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr); 819 } else { 820 assert(Functions[I]->getType()->getAddressSpace() == 0); 821 GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0, 822 Functions[I]->getLinkage(), "", 823 CombinedGlobalElemPtr, M); 824 GAlias->setVisibility(Functions[I]->getVisibility()); 825 GAlias->takeName(Functions[I]); 826 Functions[I]->replaceAllUsesWith(GAlias); 827 } 828 if (!Functions[I]->isDeclarationForLinker()) 829 Functions[I]->setLinkage(GlobalValue::PrivateLinkage); 830 } 831 832 // Build and set the jump table's initializer. 833 std::vector<Constant *> JumpTableEntries; 834 for (unsigned I = 0; I != Functions.size(); ++I) 835 JumpTableEntries.push_back( 836 createJumpTableEntry(JumpTable, Functions[I], I)); 837 JumpTable->setInitializer( 838 ConstantArray::get(JumpTableType, JumpTableEntries)); 839} 840 841void LowerBitSets::buildBitSetsFromDisjointSet( 842 ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) { 843 llvm::DenseMap<Metadata *, uint64_t> BitSetIndices; 844 llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices; 845 for (unsigned I = 0; I != BitSets.size(); ++I) 846 BitSetIndices[BitSets[I]] = I; 847 for (unsigned I = 0; I != Globals.size(); ++I) 848 GlobalIndices[Globals[I]] = I; 849 850 // For each bitset, build a set of indices that refer to globals referenced by 851 // the bitset. 852 std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size()); 853 if (BitSetNM) { 854 for (MDNode *Op : BitSetNM->operands()) { 855 // Op = { bitset name, global, offset } 856 if (!Op->getOperand(1)) 857 continue; 858 auto I = BitSetIndices.find(Op->getOperand(0)); 859 if (I == BitSetIndices.end()) 860 continue; 861 862 auto OpGlobal = dyn_cast<GlobalObject>( 863 cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); 864 if (!OpGlobal) 865 continue; 866 BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]); 867 } 868 } 869 870 // Order the sets of indices by size. The GlobalLayoutBuilder works best 871 // when given small index sets first. 872 std::stable_sort( 873 BitSetMembers.begin(), BitSetMembers.end(), 874 [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) { 875 return O1.size() < O2.size(); 876 }); 877 878 // Create a GlobalLayoutBuilder and provide it with index sets as layout 879 // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as 880 // close together as possible. 881 GlobalLayoutBuilder GLB(Globals.size()); 882 for (auto &&MemSet : BitSetMembers) 883 GLB.addFragment(MemSet); 884 885 // Build the bitsets from this disjoint set. 886 if (Globals.empty() || isa<GlobalVariable>(Globals[0])) { 887 // Build a vector of global variables with the computed layout. 888 std::vector<GlobalVariable *> OrderedGVs(Globals.size()); 889 auto OGI = OrderedGVs.begin(); 890 for (auto &&F : GLB.Fragments) { 891 for (auto &&Offset : F) { 892 auto GV = dyn_cast<GlobalVariable>(Globals[Offset]); 893 if (!GV) 894 report_fatal_error( 895 "Bit set may not contain both global variables and functions"); 896 *OGI++ = GV; 897 } 898 } 899 900 buildBitSetsFromGlobalVariables(BitSets, OrderedGVs); 901 } else { 902 // Build a vector of functions with the computed layout. 903 std::vector<Function *> OrderedFns(Globals.size()); 904 auto OFI = OrderedFns.begin(); 905 for (auto &&F : GLB.Fragments) { 906 for (auto &&Offset : F) { 907 auto Fn = dyn_cast<Function>(Globals[Offset]); 908 if (!Fn) 909 report_fatal_error( 910 "Bit set may not contain both global variables and functions"); 911 *OFI++ = Fn; 912 } 913 } 914 915 buildBitSetsFromFunctions(BitSets, OrderedFns); 916 } 917} 918 919/// Lower all bit sets in this module. 920bool LowerBitSets::buildBitSets() { 921 Function *BitSetTestFunc = 922 M->getFunction(Intrinsic::getName(Intrinsic::bitset_test)); 923 if (!BitSetTestFunc) 924 return false; 925 926 // Equivalence class set containing bitsets and the globals they reference. 927 // This is used to partition the set of bitsets in the module into disjoint 928 // sets. 929 typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>> 930 GlobalClassesTy; 931 GlobalClassesTy GlobalClasses; 932 933 // Verify the bitset metadata and build a mapping from bitset identifiers to 934 // their last observed index in BitSetNM. This will used later to 935 // deterministically order the list of bitset identifiers. 936 llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices; 937 if (BitSetNM) { 938 for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) { 939 MDNode *Op = BitSetNM->getOperand(I); 940 verifyBitSetMDNode(Op); 941 BitSetIdIndices[Op->getOperand(0)] = I; 942 } 943 } 944 945 for (const Use &U : BitSetTestFunc->uses()) { 946 auto CI = cast<CallInst>(U.getUser()); 947 948 auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); 949 if (!BitSetMDVal) 950 report_fatal_error( 951 "Second argument of llvm.bitset.test must be metadata"); 952 auto BitSet = BitSetMDVal->getMetadata(); 953 954 // Add the call site to the list of call sites for this bit set. We also use 955 // BitSetTestCallSites to keep track of whether we have seen this bit set 956 // before. If we have, we don't need to re-add the referenced globals to the 957 // equivalence class. 958 std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator, 959 bool> Ins = 960 BitSetTestCallSites.insert( 961 std::make_pair(BitSet, std::vector<CallInst *>())); 962 Ins.first->second.push_back(CI); 963 if (!Ins.second) 964 continue; 965 966 // Add the bitset to the equivalence class. 967 GlobalClassesTy::iterator GCI = GlobalClasses.insert(BitSet); 968 GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); 969 970 if (!BitSetNM) 971 continue; 972 973 // Add the referenced globals to the bitset's equivalence class. 974 for (MDNode *Op : BitSetNM->operands()) { 975 if (Op->getOperand(0) != BitSet || !Op->getOperand(1)) 976 continue; 977 978 auto OpGlobal = dyn_cast<GlobalObject>( 979 cast<ConstantAsMetadata>(Op->getOperand(1))->getValue()); 980 if (!OpGlobal) 981 continue; 982 983 CurSet = GlobalClasses.unionSets( 984 CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal))); 985 } 986 } 987 988 if (GlobalClasses.empty()) 989 return false; 990 991 // Build a list of disjoint sets ordered by their maximum BitSetNM index 992 // for determinism. 993 std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets; 994 for (GlobalClassesTy::iterator I = GlobalClasses.begin(), 995 E = GlobalClasses.end(); 996 I != E; ++I) { 997 if (!I->isLeader()) continue; 998 ++NumBitSetDisjointSets; 999 1000 unsigned MaxIndex = 0; 1001 for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); 1002 MI != GlobalClasses.member_end(); ++MI) { 1003 if ((*MI).is<Metadata *>()) 1004 MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]); 1005 } 1006 Sets.emplace_back(I, MaxIndex); 1007 } 1008 std::sort(Sets.begin(), Sets.end(), 1009 [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1, 1010 const std::pair<GlobalClassesTy::iterator, unsigned> &S2) { 1011 return S1.second < S2.second; 1012 }); 1013 1014 // For each disjoint set we found... 1015 for (const auto &S : Sets) { 1016 // Build the list of bitsets in this disjoint set. 1017 std::vector<Metadata *> BitSets; 1018 std::vector<GlobalObject *> Globals; 1019 for (GlobalClassesTy::member_iterator MI = 1020 GlobalClasses.member_begin(S.first); 1021 MI != GlobalClasses.member_end(); ++MI) { 1022 if ((*MI).is<Metadata *>()) 1023 BitSets.push_back(MI->get<Metadata *>()); 1024 else 1025 Globals.push_back(MI->get<GlobalObject *>()); 1026 } 1027 1028 // Order bitsets by BitSetNM index for determinism. This ordering is stable 1029 // as there is a one-to-one mapping between metadata and indices. 1030 std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) { 1031 return BitSetIdIndices[M1] < BitSetIdIndices[M2]; 1032 }); 1033 1034 // Lower the bitsets in this disjoint set. 1035 buildBitSetsFromDisjointSet(BitSets, Globals); 1036 } 1037 1038 allocateByteArrays(); 1039 1040 return true; 1041} 1042 1043bool LowerBitSets::eraseBitSetMetadata() { 1044 if (!BitSetNM) 1045 return false; 1046 1047 M->eraseNamedMetadata(BitSetNM); 1048 return true; 1049} 1050 1051bool LowerBitSets::runOnModule(Module &M) { 1052 bool Changed = buildBitSets(); 1053 Changed |= eraseBitSetMetadata(); 1054 return Changed; 1055} 1056