GlobalModuleIndex.cpp revision 5100135ecf29d4246e6bd34318da9eb22c16b86a
1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the GlobalModuleIndex class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ASTReaderInternals.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/OnDiskHashTable.h" 17#include "clang/Serialization/ASTBitCodes.h" 18#include "clang/Serialization/GlobalModuleIndex.h" 19#include "llvm/ADT/DenseMap.h" 20#include "llvm/ADT/MapVector.h" 21#include "llvm/ADT/SmallString.h" 22#include "llvm/ADT/StringExtras.h" 23#include "llvm/Bitcode/BitstreamReader.h" 24#include "llvm/Bitcode/BitstreamWriter.h" 25#include "llvm/Support/FileSystem.h" 26#include "llvm/Support/LockFileManager.h" 27#include "llvm/Support/MemoryBuffer.h" 28#include "llvm/Support/PathV2.h" 29using namespace clang; 30using namespace serialization; 31 32//----------------------------------------------------------------------------// 33// Shared constants 34//----------------------------------------------------------------------------// 35namespace { 36 enum { 37 /// \brief The block containing the index. 38 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID 39 }; 40 41 /// \brief Describes the record types in the index. 42 enum IndexRecordTypes { 43 /// \brief Contains version information and potentially other metadata, 44 /// used to determine if we can read this global index file. 45 METADATA, 46 /// \brief Describes a module, including its file name and dependencies. 47 MODULE, 48 /// \brief The index for identifiers. 49 IDENTIFIER_INDEX 50 }; 51} 52 53/// \brief The name of the global index file. 54static const char * const IndexFileName = "modules.idx"; 55 56/// \brief The global index file version. 57static const unsigned CurrentVersion = 1; 58 59//----------------------------------------------------------------------------// 60// Global module index writer. 61//----------------------------------------------------------------------------// 62 63namespace { 64 /// \brief Provides information about a specific module file. 65 struct ModuleFileInfo { 66 /// \brief The numberic ID for this module file. 67 unsigned ID; 68 69 /// \brief The set of modules on which this module depends. Each entry is 70 /// a module ID. 71 SmallVector<unsigned, 4> Dependencies; 72 }; 73 74 /// \brief Builder that generates the global module index file. 75 class GlobalModuleIndexBuilder { 76 FileManager &FileMgr; 77 78 /// \brief Mapping from files to module file information. 79 typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap; 80 81 /// \brief Information about each of the known module files. 82 ModuleFilesMap ModuleFiles; 83 84 /// \brief Mapping from identifiers to the list of module file IDs that 85 /// consider this identifier to be interesting. 86 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap; 87 88 /// \brief A mapping from all interesting identifiers to the set of module 89 /// files in which those identifiers are considered interesting. 90 InterestingIdentifierMap InterestingIdentifiers; 91 92 /// \brief Write the block-info block for the global module index file. 93 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream); 94 95 /// \brief Retrieve the module file information for the given file. 96 ModuleFileInfo &getModuleFileInfo(const FileEntry *File) { 97 llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known 98 = ModuleFiles.find(File); 99 if (Known != ModuleFiles.end()) 100 return Known->second; 101 102 unsigned NewID = ModuleFiles.size(); 103 ModuleFileInfo &Info = ModuleFiles[File]; 104 Info.ID = NewID; 105 return Info; 106 } 107 108 public: 109 explicit GlobalModuleIndexBuilder(FileManager &FileMgr) : FileMgr(FileMgr){} 110 111 /// \brief Load the contents of the given module file into the builder. 112 /// 113 /// \returns true if an error occurred, false otherwise. 114 bool loadModuleFile(const FileEntry *File); 115 116 /// \brief Write the index to the given bitstream. 117 void writeIndex(llvm::BitstreamWriter &Stream); 118 }; 119} 120 121static void emitBlockID(unsigned ID, const char *Name, 122 llvm::BitstreamWriter &Stream, 123 SmallVectorImpl<uint64_t> &Record) { 124 Record.clear(); 125 Record.push_back(ID); 126 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record); 127 128 // Emit the block name if present. 129 if (Name == 0 || Name[0] == 0) return; 130 Record.clear(); 131 while (*Name) 132 Record.push_back(*Name++); 133 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record); 134} 135 136static void emitRecordID(unsigned ID, const char *Name, 137 llvm::BitstreamWriter &Stream, 138 SmallVectorImpl<uint64_t> &Record) { 139 Record.clear(); 140 Record.push_back(ID); 141 while (*Name) 142 Record.push_back(*Name++); 143 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); 144} 145 146void 147GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) { 148 SmallVector<uint64_t, 64> Record; 149 Stream.EnterSubblock(llvm::bitc::BLOCKINFO_BLOCK_ID, 3); 150 151#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record) 152#define RECORD(X) emitRecordID(X, #X, Stream, Record) 153 BLOCK(GLOBAL_INDEX_BLOCK); 154 RECORD(METADATA); 155 RECORD(MODULE); 156 RECORD(IDENTIFIER_INDEX); 157#undef RECORD 158#undef BLOCK 159 160 Stream.ExitBlock(); 161} 162 163namespace clang { 164 class InterestingASTIdentifierLookupTrait 165 : public serialization::reader::ASTIdentifierLookupTraitBase { 166 167 public: 168 /// \brief The identifier and whether it is "interesting". 169 typedef std::pair<StringRef, bool> data_type; 170 171 data_type ReadData(const internal_key_type& k, 172 const unsigned char* d, 173 unsigned DataLen) { 174 // The first bit indicates whether this identifier is interesting. 175 // That's all we care about. 176 using namespace clang::io; 177 unsigned RawID = ReadUnalignedLE32(d); 178 bool IsInteresting = RawID & 0x01; 179 return std::make_pair(k, IsInteresting); 180 } 181 }; 182} 183 184bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) { 185 // Open the module file. 186 OwningPtr<llvm::MemoryBuffer> Buffer; 187 Buffer.reset(FileMgr.getBufferForFile(File)); 188 if (!Buffer) { 189 return true; 190 } 191 192 // Initialize the input stream 193 llvm::BitstreamReader InStreamFile; 194 llvm::BitstreamCursor InStream; 195 InStreamFile.init((const unsigned char *)Buffer->getBufferStart(), 196 (const unsigned char *)Buffer->getBufferEnd()); 197 InStream.init(InStreamFile); 198 199 // Sniff for the signature. 200 if (InStream.Read(8) != 'C' || 201 InStream.Read(8) != 'P' || 202 InStream.Read(8) != 'C' || 203 InStream.Read(8) != 'H') { 204 return true; 205 } 206 207 // Record this module file and assign it a unique ID (if it doesn't have 208 // one already). 209 unsigned ID = getModuleFileInfo(File).ID; 210 211 // Search for the blocks and records we care about. 212 enum { Outer, ControlBlock, ASTBlock } State = Outer; 213 bool Done = false; 214 while (!Done) { 215 const unsigned Flags = llvm::BitstreamCursor::AF_DontPopBlockAtEnd; 216 llvm::BitstreamEntry Entry = InStream.advance(Flags); 217 switch (Entry.Kind) { 218 case llvm::BitstreamEntry::Error: 219 return true; 220 221 case llvm::BitstreamEntry::Record: 222 // In the outer state, just skip the record. We don't care. 223 if (State == Outer) { 224 InStream.skipRecord(Entry.ID); 225 continue; 226 } 227 228 // Handle potentially-interesting records below. 229 break; 230 231 case llvm::BitstreamEntry::SubBlock: 232 if (State == Outer && Entry.ID == CONTROL_BLOCK_ID) { 233 if (InStream.EnterSubBlock(CONTROL_BLOCK_ID)) 234 return true; 235 236 // Found the control block. 237 State = ControlBlock; 238 continue; 239 } 240 241 if (State == Outer && Entry.ID == AST_BLOCK_ID) { 242 if (InStream.EnterSubBlock(AST_BLOCK_ID)) 243 return true; 244 245 // Found the AST block. 246 State = ASTBlock; 247 continue; 248 249 } 250 251 if (InStream.SkipBlock()) 252 return true; 253 254 continue; 255 256 case llvm::BitstreamEntry::EndBlock: 257 if (State == Outer) { 258 Done = true; 259 } 260 State = Outer; 261 continue; 262 } 263 264 // Read the given record. 265 SmallVector<uint64_t, 64> Record; 266 StringRef Blob; 267 unsigned Code = InStream.readRecord(Entry.ID, Record, &Blob); 268 269 // Handle module dependencies. 270 if (State == ControlBlock && Code == IMPORTS) { 271 // Load each of the imported PCH files. 272 unsigned Idx = 0, N = Record.size(); 273 while (Idx < N) { 274 // Read information about the AST file. 275 276 // Skip the imported kind 277 ++Idx; 278 279 // Skip the import location 280 ++Idx; 281 282 // Retrieve the imported file name. 283 unsigned Length = Record[Idx++]; 284 SmallString<128> ImportedFile(Record.begin() + Idx, 285 Record.begin() + Idx + Length); 286 Idx += Length; 287 288 // Find the imported module file. 289 const FileEntry *DependsOnFile = FileMgr.getFile(ImportedFile); 290 if (!DependsOnFile) 291 return true; 292 293 // Record the dependency. 294 unsigned DependsOnID = getModuleFileInfo(DependsOnFile).ID; 295 getModuleFileInfo(File).Dependencies.push_back(DependsOnID); 296 } 297 298 continue; 299 } 300 301 // Handle the identifier table 302 if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) { 303 typedef OnDiskChainedHashTable<InterestingASTIdentifierLookupTrait> 304 InterestingIdentifierTable; 305 llvm::OwningPtr<InterestingIdentifierTable> 306 Table(InterestingIdentifierTable::Create( 307 (const unsigned char *)Blob.data() + Record[0], 308 (const unsigned char *)Blob.data())); 309 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(), 310 DEnd = Table->data_end(); 311 D != DEnd; ++D) { 312 std::pair<StringRef, bool> Ident = *D; 313 if (Ident.second) 314 InterestingIdentifiers[Ident.first].push_back(ID); 315 } 316 } 317 318 // FIXME: Handle the selector table. 319 320 // We don't care about this record. 321 } 322 323 return false; 324} 325 326namespace { 327 328/// \brief Trait used to generate the identifier index as an on-disk hash 329/// table. 330class IdentifierIndexWriterTrait { 331public: 332 typedef StringRef key_type; 333 typedef StringRef key_type_ref; 334 typedef SmallVector<unsigned, 2> data_type; 335 typedef const SmallVector<unsigned, 2> &data_type_ref; 336 337 static unsigned ComputeHash(key_type_ref Key) { 338 return llvm::HashString(Key); 339 } 340 341 std::pair<unsigned,unsigned> 342 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) { 343 unsigned KeyLen = Key.size(); 344 unsigned DataLen = Data.size() * 4; 345 clang::io::Emit16(Out, KeyLen); 346 clang::io::Emit16(Out, DataLen); 347 return std::make_pair(KeyLen, DataLen); 348 } 349 350 void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) { 351 Out.write(Key.data(), KeyLen); 352 } 353 354 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data, 355 unsigned DataLen) { 356 for (unsigned I = 0, N = Data.size(); I != N; ++I) 357 clang::io::Emit32(Out, Data[I]); 358 } 359}; 360 361} 362 363void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) { 364 using namespace llvm; 365 366 // Emit the file header. 367 Stream.Emit((unsigned)'B', 8); 368 Stream.Emit((unsigned)'C', 8); 369 Stream.Emit((unsigned)'G', 8); 370 Stream.Emit((unsigned)'I', 8); 371 372 // Write the block-info block, which describes the records in this bitcode 373 // file. 374 emitBlockInfoBlock(Stream); 375 376 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3); 377 378 // Write the metadata. 379 SmallVector<uint64_t, 2> Record; 380 Record.push_back(CurrentVersion); 381 Stream.EmitRecord(METADATA, Record); 382 383 // Write the set of known module files. 384 for (ModuleFilesMap::iterator M = ModuleFiles.begin(), 385 MEnd = ModuleFiles.end(); 386 M != MEnd; ++M) { 387 Record.clear(); 388 Record.push_back(M->second.ID); 389 Record.push_back(M->first->getSize()); 390 Record.push_back(M->first->getModificationTime()); 391 392 // File name 393 StringRef Name(M->first->getName()); 394 Record.push_back(Name.size()); 395 Record.append(Name.begin(), Name.end()); 396 397 // Dependencies 398 Record.push_back(M->second.Dependencies.size()); 399 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end()); 400 Stream.EmitRecord(MODULE, Record); 401 } 402 403 // Write the identifier -> module file mapping. 404 { 405 OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator; 406 IdentifierIndexWriterTrait Trait; 407 408 // Populate the hash table. 409 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(), 410 IEnd = InterestingIdentifiers.end(); 411 I != IEnd; ++I) { 412 Generator.insert(I->first(), I->second, Trait); 413 } 414 415 // Create the on-disk hash table in a buffer. 416 SmallString<4096> IdentifierTable; 417 uint32_t BucketOffset; 418 { 419 llvm::raw_svector_ostream Out(IdentifierTable); 420 // Make sure that no bucket is at offset 0 421 clang::io::Emit32(Out, 0); 422 BucketOffset = Generator.Emit(Out, Trait); 423 } 424 425 // Create a blob abbreviation 426 BitCodeAbbrev *Abbrev = new BitCodeAbbrev(); 427 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX)); 428 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); 429 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); 430 unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev); 431 432 // Write the identifier table 433 Record.clear(); 434 Record.push_back(IDENTIFIER_INDEX); 435 Record.push_back(BucketOffset); 436 Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable.str()); 437 } 438 439 // FIXME: Selectors. 440 441 Stream.ExitBlock(); 442} 443 444GlobalModuleIndex::ErrorCode 445GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) { 446 llvm::SmallString<128> IndexPath; 447 IndexPath += Path; 448 llvm::sys::path::append(IndexPath, IndexFileName); 449 450 // Coordinate building the global index file with other processes that might 451 // try to do the same. 452 llvm::LockFileManager Locked(IndexPath); 453 switch (Locked) { 454 case llvm::LockFileManager::LFS_Error: 455 return EC_IOError; 456 457 case llvm::LockFileManager::LFS_Owned: 458 // We're responsible for building the index ourselves. Do so below. 459 break; 460 461 case llvm::LockFileManager::LFS_Shared: 462 // Someone else is responsible for building the index. We don't care 463 // when they finish, so we're done. 464 return EC_Building; 465 } 466 467 // The module index builder. 468 GlobalModuleIndexBuilder Builder(FileMgr); 469 470 // Load each of the module files. 471 llvm::error_code EC; 472 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd; 473 D != DEnd && !EC; 474 D.increment(EC)) { 475 // If this isn't a module file, we don't care. 476 if (llvm::sys::path::extension(D->path()) != ".pcm") { 477 // ... unless it's a .pcm.lock file, which indicates that someone is 478 // in the process of rebuilding a module. They'll rebuild the index 479 // at the end of that translation unit, so we don't have to. 480 if (llvm::sys::path::extension(D->path()) == ".pcm.lock") 481 return EC_Building; 482 483 continue; 484 } 485 486 // If we can't find the module file, skip it. 487 const FileEntry *ModuleFile = FileMgr.getFile(D->path()); 488 if (!ModuleFile) 489 continue; 490 491 // Load this module file. 492 if (Builder.loadModuleFile(ModuleFile)) 493 return EC_IOError; 494 } 495 496 // The output buffer, into which the global index will be written. 497 SmallVector<char, 16> OutputBuffer; 498 { 499 llvm::BitstreamWriter OutputStream(OutputBuffer); 500 Builder.writeIndex(OutputStream); 501 } 502 503 // Write the global index file to a temporary file. 504 llvm::SmallString<128> IndexTmpPath; 505 int TmpFD; 506 if (llvm::sys::fs::unique_file(IndexPath + "-%%%%%%%%", TmpFD, IndexTmpPath)) 507 return EC_IOError; 508 509 // Open the temporary global index file for output. 510 std::string ErrorInfo; 511 llvm::raw_fd_ostream Out(IndexTmpPath.c_str(), ErrorInfo, 512 llvm::raw_fd_ostream::F_Binary); 513 if (Out.has_error()) 514 return EC_IOError; 515 516 // Write the index. 517 Out.write(OutputBuffer.data(), OutputBuffer.size()); 518 Out.close(); 519 if (Out.has_error()) 520 return EC_IOError; 521 522 // Remove the old index file. It isn't relevant any more. 523 bool OldIndexExisted; 524 llvm::sys::fs::remove(IndexPath.str(), OldIndexExisted); 525 526 // Rename the newly-written index file to the proper name. 527 if (llvm::sys::fs::rename(IndexTmpPath.str(), IndexPath.str())) { 528 // Rename failed; just remove the 529 llvm::sys::fs::remove(IndexTmpPath.str(), OldIndexExisted); 530 return EC_IOError; 531 } 532 533 // We're done. 534 return EC_None; 535} 536