GlobalModuleIndex.cpp revision d2db16f59601bc028f4f973990cc6d4abe95b91a
1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the GlobalModuleIndex class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ASTReaderInternals.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/OnDiskHashTable.h"
17#include "clang/Serialization/ASTBitCodes.h"
18#include "clang/Serialization/GlobalModuleIndex.h"
19#include "llvm/ADT/DenseMap.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/Bitcode/BitstreamReader.h"
24#include "llvm/Bitcode/BitstreamWriter.h"
25#include "llvm/Support/FileSystem.h"
26#include "llvm/Support/LockFileManager.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/PathV2.h"
29using namespace clang;
30using namespace serialization;
31
32//----------------------------------------------------------------------------//
33// Shared constants
34//----------------------------------------------------------------------------//
35namespace {
36  enum {
37    /// \brief The block containing the index.
38    GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
39  };
40
41  /// \brief Describes the record types in the index.
42  enum IndexRecordTypes {
43    /// \brief Contains version information and potentially other metadata,
44    /// used to determine if we can read this global index file.
45    METADATA,
46    /// \brief Describes a module, including its file name and dependencies.
47    MODULE,
48    /// \brief The index for identifiers.
49    IDENTIFIER_INDEX
50  };
51}
52
53/// \brief The name of the global index file.
54static const char * const IndexFileName = "modules.idx";
55
56/// \brief The global index file version.
57static const unsigned CurrentVersion = 1;
58
59//----------------------------------------------------------------------------//
60// Global module index writer.
61//----------------------------------------------------------------------------//
62
63namespace {
64  /// \brief Provides information about a specific module file.
65  struct ModuleFileInfo {
66    /// \brief The numberic ID for this module file.
67    unsigned ID;
68
69    /// \brief The set of modules on which this module depends. Each entry is
70    /// a module ID.
71    SmallVector<unsigned, 4> Dependencies;
72  };
73
74  /// \brief Builder that generates the global module index file.
75  class GlobalModuleIndexBuilder {
76    FileManager &FileMgr;
77
78    /// \brief Mapping from files to module file information.
79    typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap;
80
81    /// \brief Information about each of the known module files.
82    ModuleFilesMap ModuleFiles;
83
84    /// \brief Mapping from identifiers to the list of module file IDs that
85    /// consider this identifier to be interesting.
86    typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
87
88    /// \brief A mapping from all interesting identifiers to the set of module
89    /// files in which those identifiers are considered interesting.
90    InterestingIdentifierMap InterestingIdentifiers;
91
92    /// \brief Write the block-info block for the global module index file.
93    void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
94
95    /// \brief Retrieve the module file information for the given file.
96    ModuleFileInfo &getModuleFileInfo(const FileEntry *File) {
97      llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known
98        = ModuleFiles.find(File);
99      if (Known != ModuleFiles.end())
100        return Known->second;
101
102      unsigned NewID = ModuleFiles.size();
103      ModuleFileInfo &Info = ModuleFiles[File];
104      Info.ID = NewID;
105      return Info;
106    }
107
108  public:
109    explicit GlobalModuleIndexBuilder(FileManager &FileMgr) : FileMgr(FileMgr){}
110
111    /// \brief Load the contents of the given module file into the builder.
112    ///
113    /// \returns true if an error occurred, false otherwise.
114    bool loadModuleFile(const FileEntry *File);
115
116    /// \brief Write the index to the given bitstream.
117    void writeIndex(llvm::BitstreamWriter &Stream);
118  };
119}
120
121static void emitBlockID(unsigned ID, const char *Name,
122                        llvm::BitstreamWriter &Stream,
123                        SmallVectorImpl<uint64_t> &Record) {
124  Record.clear();
125  Record.push_back(ID);
126  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
127
128  // Emit the block name if present.
129  if (Name == 0 || Name[0] == 0) return;
130  Record.clear();
131  while (*Name)
132    Record.push_back(*Name++);
133  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
134}
135
136static void emitRecordID(unsigned ID, const char *Name,
137                         llvm::BitstreamWriter &Stream,
138                         SmallVectorImpl<uint64_t> &Record) {
139  Record.clear();
140  Record.push_back(ID);
141  while (*Name)
142    Record.push_back(*Name++);
143  Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
144}
145
146void
147GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
148  SmallVector<uint64_t, 64> Record;
149  Stream.EnterSubblock(llvm::bitc::BLOCKINFO_BLOCK_ID, 3);
150
151#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
152#define RECORD(X) emitRecordID(X, #X, Stream, Record)
153  BLOCK(GLOBAL_INDEX_BLOCK);
154  RECORD(METADATA);
155  RECORD(MODULE);
156  RECORD(IDENTIFIER_INDEX);
157#undef RECORD
158#undef BLOCK
159
160  Stream.ExitBlock();
161}
162
163namespace clang {
164  class InterestingASTIdentifierLookupTrait
165    : public serialization::reader::ASTIdentifierLookupTraitBase {
166
167  public:
168    /// \brief The identifier and whether it is "interesting".
169    typedef std::pair<StringRef, bool> data_type;
170
171    data_type ReadData(const internal_key_type& k,
172                       const unsigned char* d,
173                       unsigned DataLen) {
174      // The first bit indicates whether this identifier is interesting.
175      // That's all we care about.
176      using namespace clang::io;
177      unsigned RawID = ReadUnalignedLE32(d);
178      bool IsInteresting = RawID & 0x01;
179      return std::make_pair(k, IsInteresting);
180    }
181  };
182}
183
184bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
185  // Open the module file.
186  OwningPtr<llvm::MemoryBuffer> Buffer;
187  Buffer.reset(FileMgr.getBufferForFile(File));
188  if (!Buffer) {
189    return true;
190  }
191
192  // Initialize the input stream
193  llvm::BitstreamReader InStreamFile;
194  llvm::BitstreamCursor InStream;
195  InStreamFile.init((const unsigned char *)Buffer->getBufferStart(),
196                  (const unsigned char *)Buffer->getBufferEnd());
197  InStream.init(InStreamFile);
198
199  // Sniff for the signature.
200  if (InStream.Read(8) != 'C' ||
201      InStream.Read(8) != 'P' ||
202      InStream.Read(8) != 'C' ||
203      InStream.Read(8) != 'H') {
204    return true;
205  }
206
207  // Record this module file and assign it a unique ID (if it doesn't have
208  // one already).
209  unsigned ID = getModuleFileInfo(File).ID;
210
211  // Search for the blocks and records we care about.
212  enum { Outer, ControlBlock, ASTBlock } State = Outer;
213  bool Done = false;
214  while (!Done) {
215    const unsigned Flags = llvm::BitstreamCursor::AF_DontPopBlockAtEnd;
216    llvm::BitstreamEntry Entry = InStream.advance(Flags);
217    switch (Entry.Kind) {
218    case llvm::BitstreamEntry::Error:
219      return true;
220
221    case llvm::BitstreamEntry::Record:
222      // In the outer state, just skip the record. We don't care.
223      if (State == Outer) {
224        InStream.skipRecord(Entry.ID);
225        continue;
226      }
227
228      // Handle potentially-interesting records below.
229      break;
230
231    case llvm::BitstreamEntry::SubBlock:
232      if (State == Outer && Entry.ID == CONTROL_BLOCK_ID) {
233        if (InStream.EnterSubBlock(CONTROL_BLOCK_ID))
234          return true;
235
236        // Found the control block.
237        State = ControlBlock;
238        continue;
239      }
240
241      if (State == Outer && Entry.ID == AST_BLOCK_ID) {
242        if (InStream.EnterSubBlock(AST_BLOCK_ID))
243          return true;
244
245        // Found the AST block.
246        State = ASTBlock;
247        continue;
248
249      }
250
251      if (InStream.SkipBlock())
252        return true;
253
254      continue;
255
256    case llvm::BitstreamEntry::EndBlock:
257      if (State == Outer) {
258        Done = true;
259      }
260      State = Outer;
261      continue;
262    }
263
264    // Read the given record.
265    SmallVector<uint64_t, 64> Record;
266    StringRef Blob;
267    unsigned Code = InStream.readRecord(Entry.ID, Record, &Blob);
268
269    // Handle module dependencies.
270    if (State == ControlBlock && Code == IMPORTS) {
271      // Load each of the imported PCH files.
272      unsigned Idx = 0, N = Record.size();
273      while (Idx < N) {
274        // Read information about the AST file.
275
276        // Skip the imported kind
277        ++Idx;
278
279        // Skip the import location
280        ++Idx;
281
282        // Retrieve the imported file name.
283        unsigned Length = Record[Idx++];
284        SmallString<128> ImportedFile(Record.begin() + Idx,
285                                      Record.begin() + Idx + Length);
286        Idx += Length;
287
288        // Find the imported module file.
289        const FileEntry *DependsOnFile = FileMgr.getFile(ImportedFile);
290        if (!DependsOnFile)
291          return true;
292
293        // Record the dependency.
294        unsigned DependsOnID = getModuleFileInfo(DependsOnFile).ID;
295        getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
296      }
297
298      continue;
299    }
300
301    // Handle the identifier table
302    if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
303      typedef OnDiskChainedHashTable<InterestingASTIdentifierLookupTrait>
304        InterestingIdentifierTable;
305      llvm::OwningPtr<InterestingIdentifierTable>
306        Table(InterestingIdentifierTable::Create(
307                (const unsigned char *)Blob.data() + Record[0],
308                (const unsigned char *)Blob.data()));
309      for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
310                                                     DEnd = Table->data_end();
311           D != DEnd; ++D) {
312        std::pair<StringRef, bool> Ident = *D;
313        if (Ident.second)
314          InterestingIdentifiers[Ident.first].push_back(ID);
315      }
316    }
317
318    // FIXME: Handle the selector table.
319
320    // We don't care about this record.
321  }
322
323  return false;
324}
325
326namespace {
327
328/// \brief Trait used to generate the identifier index as an on-disk hash
329/// table.
330class IdentifierIndexWriterTrait {
331public:
332  typedef StringRef key_type;
333  typedef StringRef key_type_ref;
334  typedef SmallVector<unsigned, 2> data_type;
335  typedef const SmallVector<unsigned, 2> &data_type_ref;
336
337  static unsigned ComputeHash(key_type_ref Key) {
338    return llvm::HashString(Key);
339  }
340
341  std::pair<unsigned,unsigned>
342  EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
343    unsigned KeyLen = Key.size();
344    unsigned DataLen = Data.size() * 4;
345    clang::io::Emit16(Out, KeyLen);
346    clang::io::Emit16(Out, DataLen);
347    return std::make_pair(KeyLen, DataLen);
348  }
349
350  void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
351    Out.write(Key.data(), KeyLen);
352  }
353
354  void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
355                unsigned DataLen) {
356    for (unsigned I = 0, N = Data.size(); I != N; ++I)
357      clang::io::Emit32(Out, Data[I]);
358  }
359};
360
361}
362
363void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
364  using namespace llvm;
365
366  // Emit the file header.
367  Stream.Emit((unsigned)'B', 8);
368  Stream.Emit((unsigned)'C', 8);
369  Stream.Emit((unsigned)'G', 8);
370  Stream.Emit((unsigned)'I', 8);
371
372  // Write the block-info block, which describes the records in this bitcode
373  // file.
374  emitBlockInfoBlock(Stream);
375
376  Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
377
378  // Write the metadata.
379  SmallVector<uint64_t, 2> Record;
380  Record.push_back(CurrentVersion);
381  Stream.EmitRecord(METADATA, Record);
382
383  // Write the set of known module files.
384  for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
385                                MEnd = ModuleFiles.end();
386       M != MEnd; ++M) {
387    Record.clear();
388    Record.push_back(M->second.ID);
389    Record.push_back(M->first->getSize());
390    Record.push_back(M->first->getModificationTime());
391
392    // File name
393    StringRef Name(M->first->getName());
394    Record.push_back(Name.size());
395    Record.append(Name.begin(), Name.end());
396
397    // Dependencies
398    Record.push_back(M->second.Dependencies.size());
399    Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
400    Stream.EmitRecord(MODULE, Record);
401  }
402
403  // Write the identifier -> module file mapping.
404  {
405    OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
406    IdentifierIndexWriterTrait Trait;
407
408    // Populate the hash table.
409    for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
410                                            IEnd = InterestingIdentifiers.end();
411         I != IEnd; ++I) {
412      Generator.insert(I->first(), I->second, Trait);
413    }
414
415    // Create the on-disk hash table in a buffer.
416    SmallString<4096> IdentifierTable;
417    uint32_t BucketOffset;
418    {
419      llvm::raw_svector_ostream Out(IdentifierTable);
420      // Make sure that no bucket is at offset 0
421      clang::io::Emit32(Out, 0);
422      BucketOffset = Generator.Emit(Out, Trait);
423    }
424
425    // Create a blob abbreviation
426    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
427    Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
428    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
429    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
430    unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
431
432    // Write the identifier table
433    Record.clear();
434    Record.push_back(IDENTIFIER_INDEX);
435    Record.push_back(BucketOffset);
436    Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable.str());
437  }
438
439  // FIXME: Selectors.
440
441  Stream.ExitBlock();
442}
443
444GlobalModuleIndex::ErrorCode
445GlobalModuleIndex::writeIndex(FileManager &FileMgr, StringRef Path) {
446  llvm::SmallString<128> IndexPath;
447  IndexPath += Path;
448  llvm::sys::path::append(IndexPath, IndexFileName);
449
450  // Coordinate building the global index file with other processes that might
451  // try to do the same.
452  llvm::LockFileManager Locked(IndexPath);
453  switch (Locked) {
454  case llvm::LockFileManager::LFS_Error:
455    return EC_IOError;
456
457  case llvm::LockFileManager::LFS_Owned:
458    // We're responsible for building the index ourselves. Do so below.
459    break;
460
461  case llvm::LockFileManager::LFS_Shared:
462    // Someone else is responsible for building the index. We don't care
463    // when they finish, so we're done.
464    return EC_Building;
465  }
466
467  // The module index builder.
468  GlobalModuleIndexBuilder Builder(FileMgr);
469
470  // Load each of the module files.
471  llvm::error_code EC;
472  for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
473       D != DEnd && !EC;
474       D.increment(EC)) {
475    // If this isn't a module file, we don't care.
476    if (llvm::sys::path::extension(D->path()) != ".pcm") {
477      // ... unless it's a .pcm.lock file, which indicates that someone is
478      // in the process of rebuilding a module. They'll rebuild the index
479      // at the end of that translation unit, so we don't have to.
480      if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
481        return EC_Building;
482
483      continue;
484    }
485
486    // If we can't find the module file, skip it.
487    const FileEntry *ModuleFile = FileMgr.getFile(D->path());
488    if (!ModuleFile)
489      continue;
490
491    // Load this module file.
492    if (Builder.loadModuleFile(ModuleFile))
493      return EC_IOError;
494  }
495
496  // The output buffer, into which the global index will be written.
497  SmallVector<char, 16> OutputBuffer;
498  {
499    llvm::BitstreamWriter OutputStream(OutputBuffer);
500    Builder.writeIndex(OutputStream);
501  }
502
503  // Write the global index file to a temporary file.
504  llvm::SmallString<128> IndexTmpPath;
505  int TmpFD;
506  if (llvm::sys::fs::unique_file(IndexPath + "-%%%%%%%%", TmpFD, IndexTmpPath))
507    return EC_IOError;
508
509  // Open the temporary global index file for output.
510  llvm::raw_fd_ostream Out(TmpFD, true);
511  if (Out.has_error())
512    return EC_IOError;
513
514  // Write the index.
515  Out.write(OutputBuffer.data(), OutputBuffer.size());
516  Out.close();
517  if (Out.has_error())
518    return EC_IOError;
519
520  // Remove the old index file. It isn't relevant any more.
521  bool OldIndexExisted;
522  llvm::sys::fs::remove(IndexPath.str(), OldIndexExisted);
523
524  // Rename the newly-written index file to the proper name.
525  if (llvm::sys::fs::rename(IndexTmpPath.str(), IndexPath.str())) {
526    // Rename failed; just remove the
527    llvm::sys::fs::remove(IndexTmpPath.str(), OldIndexExisted);
528    return EC_IOError;
529  }
530
531  // We're done.
532  return EC_None;
533}
534