PTHLexer.cpp revision a4bd8eb4d6d4b625f6bbb62fc180b02eab6433ed
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
17#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/ADT/OwningPtr.h"
24#include "llvm/Support/Compiler.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/MemoryBuffer.h"
27#include "llvm/System/Host.h"
28using namespace clang;
29
30#define DISK_TOKEN_SIZE (1+1+2+4+4)
31
32//===----------------------------------------------------------------------===//
33// Utility methods for reading from the mmap'ed PTH file.
34//===----------------------------------------------------------------------===//
35
36static inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) {
37  uint16_t V = ((uint16_t)Data[0]) |
38               ((uint16_t)Data[1] <<  8);
39  Data += 2;
40  return V;
41}
42
43static inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) {
44  uint32_t V = ((uint32_t)Data[0])  |
45               ((uint32_t)Data[1] << 8)  |
46               ((uint32_t)Data[2] << 16) |
47               ((uint32_t)Data[3] << 24);
48  Data += 4;
49  return V;
50}
51
52static inline uint32_t ReadLE32(const unsigned char *&Data) {
53  // Hosts that directly support little-endian 32-bit loads can just
54  // use them.  Big-endian hosts need a bswap.
55  uint32_t V = *((uint32_t*)Data);
56  if (llvm::sys::isBigEndianHost())
57    V = llvm::ByteSwap_32(V);
58  Data += 4;
59  return V;
60}
61
62// Bernstein hash function:
63// This is basically copy-and-paste from StringMap.  This likely won't
64// stay here, which is why I didn't both to expose this function from
65// String Map.
66static unsigned BernsteinHash(const char* x) {
67  unsigned int R = 0;
68  for ( ; *x != '\0' ; ++x) R = R * 33 + *x;
69  return R + (R >> 5);
70}
71
72static unsigned BernsteinHash(const char* x, unsigned n) {
73  unsigned int R = 0;
74  for (unsigned i = 0 ; i < n ; ++i, ++x) R = R * 33 + *x;
75  return R + (R >> 5);
76}
77
78//===----------------------------------------------------------------------===//
79// PTHLexer methods.
80//===----------------------------------------------------------------------===//
81
82PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
83                   const unsigned char *ppcond, PTHManager &PM)
84  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
85    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
86
87  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
88}
89
90void PTHLexer::Lex(Token& Tok) {
91LexNextToken:
92
93  //===--------------------------------------==//
94  // Read the raw token data.
95  //===--------------------------------------==//
96
97  // Shadow CurPtr into an automatic variable.
98  const unsigned char *CurPtrShadow = CurPtr;
99
100  // Read in the data for the token.
101  unsigned Word0 = ReadLE32(CurPtrShadow);
102  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
103  uint32_t FileOffset = ReadLE32(CurPtrShadow);
104
105  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
106  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
107  uint32_t Len = Word0 >> 16;
108
109  CurPtr = CurPtrShadow;
110
111  //===--------------------------------------==//
112  // Construct the token itself.
113  //===--------------------------------------==//
114
115  Tok.startToken();
116  Tok.setKind(TKind);
117  Tok.setFlag(TFlags);
118  assert(!LexingRawMode);
119  Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
120  Tok.setLength(Len);
121
122  // Handle identifiers.
123  if (Tok.isLiteral()) {
124    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
125  }
126  else if (IdentifierID) {
127    MIOpt.ReadToken();
128    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
129
130    Tok.setIdentifierInfo(II);
131
132    // Change the kind of this identifier to the appropriate token kind, e.g.
133    // turning "for" into a keyword.
134    Tok.setKind(II->getTokenID());
135
136    if (II->isHandleIdentifierCase())
137      PP->HandleIdentifier(Tok);
138    return;
139  }
140
141  //===--------------------------------------==//
142  // Process the token.
143  //===--------------------------------------==//
144#if 0
145  SourceManager& SM = PP->getSourceManager();
146  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
147    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
148    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
149    << '\n';
150#endif
151
152  if (TKind == tok::eof) {
153    // Save the end-of-file token.
154    EofToken = Tok;
155
156    Preprocessor *PPCache = PP;
157
158    assert(!ParsingPreprocessorDirective);
159    assert(!LexingRawMode);
160
161    // FIXME: Issue diagnostics similar to Lexer.
162    if (PP->HandleEndOfFile(Tok, false))
163      return;
164
165    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
166    return PPCache->Lex(Tok);
167  }
168
169  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
170    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
171    assert(!LexingRawMode);
172    PP->HandleDirective(Tok);
173
174    if (PP->isCurrentLexer(this))
175      goto LexNextToken;
176
177    return PP->Lex(Tok);
178  }
179
180  if (TKind == tok::eom) {
181    assert(ParsingPreprocessorDirective);
182    ParsingPreprocessorDirective = false;
183    return;
184  }
185
186  MIOpt.ReadToken();
187}
188
189// FIXME: We can just grab the last token instead of storing a copy
190// into EofToken.
191void PTHLexer::getEOF(Token& Tok) {
192  assert(EofToken.is(tok::eof));
193  Tok = EofToken;
194}
195
196void PTHLexer::DiscardToEndOfLine() {
197  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
198         "Must be in a preprocessing directive!");
199
200  // We assume that if the preprocessor wishes to discard to the end of
201  // the line that it also means to end the current preprocessor directive.
202  ParsingPreprocessorDirective = false;
203
204  // Skip tokens by only peeking at their token kind and the flags.
205  // We don't need to actually reconstruct full tokens from the token buffer.
206  // This saves some copies and it also reduces IdentifierInfo* lookup.
207  const unsigned char* p = CurPtr;
208  while (1) {
209    // Read the token kind.  Are we at the end of the file?
210    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
211    if (x == tok::eof) break;
212
213    // Read the token flags.  Are we at the start of the next line?
214    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
215    if (y & Token::StartOfLine) break;
216
217    // Skip to the next token.
218    p += DISK_TOKEN_SIZE;
219  }
220
221  CurPtr = p;
222}
223
224/// SkipBlock - Used by Preprocessor to skip the current conditional block.
225bool PTHLexer::SkipBlock() {
226  assert(CurPPCondPtr && "No cached PP conditional information.");
227  assert(LastHashTokPtr && "No known '#' token.");
228
229  const unsigned char* HashEntryI = 0;
230  uint32_t Offset;
231  uint32_t TableIdx;
232
233  do {
234    // Read the token offset from the side-table.
235    Offset = ReadLE32(CurPPCondPtr);
236
237    // Read the target table index from the side-table.
238    TableIdx = ReadLE32(CurPPCondPtr);
239
240    // Compute the actual memory address of the '#' token data for this entry.
241    HashEntryI = TokBuf + Offset;
242
243    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
244    //  contain nested blocks.  In the side-table we can jump over these
245    //  nested blocks instead of doing a linear search if the next "sibling"
246    //  entry is not at a location greater than LastHashTokPtr.
247    if (HashEntryI < LastHashTokPtr && TableIdx) {
248      // In the side-table we are still at an entry for a '#' token that
249      // is earlier than the last one we saw.  Check if the location we would
250      // stride gets us closer.
251      const unsigned char* NextPPCondPtr =
252        PPCond + TableIdx*(sizeof(uint32_t)*2);
253      assert(NextPPCondPtr >= CurPPCondPtr);
254      // Read where we should jump to.
255      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
256      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
257
258      if (HashEntryJ <= LastHashTokPtr) {
259        // Jump directly to the next entry in the side table.
260        HashEntryI = HashEntryJ;
261        Offset = TmpOffset;
262        TableIdx = ReadLE32(NextPPCondPtr);
263        CurPPCondPtr = NextPPCondPtr;
264      }
265    }
266  }
267  while (HashEntryI < LastHashTokPtr);
268  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
269  assert(TableIdx && "No jumping from #endifs.");
270
271  // Update our side-table iterator.
272  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
273  assert(NextPPCondPtr >= CurPPCondPtr);
274  CurPPCondPtr = NextPPCondPtr;
275
276  // Read where we should jump to.
277  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
278  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
279
280  // By construction NextIdx will be zero if this is a #endif.  This is useful
281  // to know to obviate lexing another token.
282  bool isEndif = NextIdx == 0;
283
284  // This case can occur when we see something like this:
285  //
286  //  #if ...
287  //   /* a comment or nothing */
288  //  #elif
289  //
290  // If we are skipping the first #if block it will be the case that CurPtr
291  // already points 'elif'.  Just return.
292
293  if (CurPtr > HashEntryI) {
294    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
295    // Did we reach a #endif?  If so, go ahead and consume that token as well.
296    if (isEndif)
297      CurPtr += DISK_TOKEN_SIZE*2;
298    else
299      LastHashTokPtr = HashEntryI;
300
301    return isEndif;
302  }
303
304  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
305  CurPtr = HashEntryI;
306
307  // Update the location of the last observed '#'.  This is useful if we
308  // are skipping multiple blocks.
309  LastHashTokPtr = CurPtr;
310
311  // Skip the '#' token.
312  assert(((tok::TokenKind)*CurPtr) == tok::hash);
313  CurPtr += DISK_TOKEN_SIZE;
314
315  // Did we reach a #endif?  If so, go ahead and consume that token as well.
316  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
317
318  return isEndif;
319}
320
321SourceLocation PTHLexer::getSourceLocation() {
322  // getSourceLocation is not on the hot path.  It is used to get the location
323  // of the next token when transitioning back to this lexer when done
324  // handling a #included file.  Just read the necessary data from the token
325  // data buffer to construct the SourceLocation object.
326  // NOTE: This is a virtual function; hence it is defined out-of-line.
327  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
328  uint32_t Offset = ReadLE32(OffsetPtr);
329  return FileStartLoc.getFileLocWithOffset(Offset);
330}
331
332//===----------------------------------------------------------------------===//
333// OnDiskChainedHashTable
334//===----------------------------------------------------------------------===//
335
336template<typename Info>
337class OnDiskChainedHashTable {
338  const unsigned NumBuckets;
339  const unsigned NumEntries;
340  const unsigned char* const Buckets;
341  const unsigned char* const Base;
342public:
343  typedef typename Info::internal_key_type internal_key_type;
344  typedef typename Info::external_key_type external_key_type;
345  typedef typename Info::data_type         data_type;
346
347  OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
348                         const unsigned char* buckets,
349                         const unsigned char* base)
350    : NumBuckets(numBuckets), NumEntries(numEntries),
351      Buckets(buckets), Base(base) {
352        assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
353               "'buckets' must have a 4-byte alignment");
354      }
355
356
357  bool isEmpty() const { return NumEntries == 0; }
358
359  class iterator {
360    const unsigned char* const data;
361    const unsigned len;
362  public:
363    iterator() : data(0), len(0) {}
364    iterator(const unsigned char* d, unsigned l) : data(d), len(l) {}
365
366    data_type operator*() const { return Info::ReadData(data, len); }
367    bool operator==(const iterator& X) const { return X.data == data; }
368    bool operator!=(const iterator& X) const { return X.data != data; }
369  };
370
371  iterator find(const external_key_type& eKey) {
372    const internal_key_type& iKey = Info::GetInternalKey(eKey);
373    unsigned key_hash = Info::ComputeHash(iKey);
374
375    // Each bucket is just a 32-bit offset into the PTH file.
376    unsigned idx = key_hash & (NumBuckets - 1);
377    const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
378
379    unsigned offset = ReadLE32(Bucket);
380    if (offset == 0) return iterator(); // Empty bucket.
381    const unsigned char* Items = Base + offset;
382
383    // 'Items' starts with a 16-bit unsigned integer representing the
384    // number of items in this bucket.
385    unsigned len = ReadUnalignedLE16(Items);
386
387    for (unsigned i = 0; i < len; ++i) {
388      // Read the hash.
389      uint32_t item_hash = ReadUnalignedLE32(Items);
390
391      // Determine the length of the key and the data.
392      const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items);
393      unsigned item_len = L.first + L.second;
394
395      // Compare the hashes.  If they are not the same, skip the entry entirely.
396      if (item_hash != key_hash) {
397        Items += item_len;
398        continue;
399      }
400
401      // Read the key.
402      const internal_key_type& X =
403        Info::ReadKey((const unsigned char* const) Items, L.first);
404
405      // If the key doesn't match just skip reading the value.
406      if (!Info::EqualKey(X, iKey)) {
407        Items += item_len;
408        continue;
409      }
410
411      // The key matches!
412      return iterator(Items + L.first, L.second);
413    }
414
415    return iterator();
416  }
417
418  iterator end() const { return iterator(); }
419
420
421  static OnDiskChainedHashTable* Create(const unsigned char* buckets,
422                                        const unsigned char* const base) {
423
424    assert(buckets > base);
425    assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
426           "buckets should be 4-byte aligned.");
427
428    unsigned numBuckets = ReadLE32(buckets);
429    unsigned numEntries = ReadLE32(buckets);
430    return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
431                                            base);
432  }
433};
434
435//===----------------------------------------------------------------------===//
436// PTH file lookup: map from strings to file data.
437//===----------------------------------------------------------------------===//
438
439/// PTHFileLookup - This internal data structure is used by the PTHManager
440///  to map from FileEntry objects managed by FileManager to offsets within
441///  the PTH file.
442namespace {
443class VISIBILITY_HIDDEN PTHFileData {
444  const uint32_t TokenOff;
445  const uint32_t PPCondOff;
446public:
447  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
448    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
449
450  uint32_t getTokenOffset() const { return TokenOff; }
451  uint32_t getPPCondOffset() const { return PPCondOff; }
452};
453
454class VISIBILITY_HIDDEN PTHFileLookupTrait {
455public:
456  typedef PTHFileData      data_type;
457  typedef const FileEntry* external_key_type;
458  typedef const char*      internal_key_type;
459
460  static bool EqualKey(const char* a, const char* b) {
461    return strcmp(a, b) == 0;
462  }
463
464  static unsigned ComputeHash(const char* x) {
465    return BernsteinHash(x);
466  }
467
468  static const char* GetInternalKey(const FileEntry* FE) {
469    return FE->getName();
470  }
471
472  static std::pair<unsigned, unsigned>
473  ReadKeyDataLength(const unsigned char*& d) {
474    return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U);
475  }
476
477  static const char* ReadKey(const unsigned char* d, unsigned) {
478    return (const char*) d;
479  }
480
481  static PTHFileData ReadData(const unsigned char* d, unsigned) {
482    uint32_t x = ::ReadUnalignedLE32(d);
483    uint32_t y = ::ReadUnalignedLE32(d);
484    return PTHFileData(x, y);
485  }
486};
487
488class VISIBILITY_HIDDEN PTHStringLookupTrait {
489public:
490  typedef uint32_t
491          data_type;
492
493  typedef const std::pair<const char*, unsigned>
494          external_key_type;
495
496  typedef external_key_type internal_key_type;
497
498  static bool EqualKey(const internal_key_type& a,
499                       const internal_key_type& b) {
500    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
501                                  : false;
502  }
503
504  static unsigned ComputeHash(const internal_key_type& a) {
505    return BernsteinHash(a.first, a.second);
506  }
507
508  // This hopefully will just get inlined and removed by the optimizer.
509  static const internal_key_type&
510  GetInternalKey(const external_key_type& x) { return x; }
511
512  static std::pair<unsigned, unsigned>
513  ReadKeyDataLength(const unsigned char*& d) {
514    return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t));
515  }
516
517  static std::pair<const char*, unsigned>
518  ReadKey(const unsigned char* d, unsigned n) {
519      assert(n >= 2 && d[n-1] == '\0');
520      return std::make_pair((const char*) d, n-1);
521    }
522
523  static uint32_t ReadData(const unsigned char* d, unsigned) {
524    return ::ReadUnalignedLE32(d);
525  }
526};
527
528} // end anonymous namespace
529
530typedef OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
531typedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
532
533//===----------------------------------------------------------------------===//
534// PTHManager methods.
535//===----------------------------------------------------------------------===//
536
537PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
538                       const unsigned char* idDataTable,
539                       IdentifierInfo** perIDCache,
540                       void* stringIdLookup, unsigned numIds,
541                       const unsigned char* spellingBase)
542: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
543  IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
544  NumIds(numIds), PP(0), SpellingBase(spellingBase) {}
545
546PTHManager::~PTHManager() {
547  delete Buf;
548  delete (PTHFileLookup*) FileLookup;
549  delete (PTHStringIdLookup*) StringIdLookup;
550  free(PerIDCache);
551}
552
553static void InvalidPTH(Diagnostic *Diags, const char* Msg = 0) {
554  if (!Diags) return;
555  if (!Msg) Msg = "Invalid or corrupted PTH file";
556  unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note, Msg);
557  Diags->Report(FullSourceLoc(), DiagID);
558}
559
560PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) {
561  // Memory map the PTH file.
562  llvm::OwningPtr<llvm::MemoryBuffer>
563  File(llvm::MemoryBuffer::getFile(file.c_str()));
564
565  if (!File) {
566    if (Diags) {
567      unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note,
568                                               "PTH file %0 could not be read");
569      Diags->Report(FullSourceLoc(), DiagID) << file;
570    }
571
572    return 0;
573  }
574
575  // Get the buffer ranges and check if there are at least three 32-bit
576  // words at the end of the file.
577  const unsigned char* BufBeg = (unsigned char*)File->getBufferStart();
578  const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd();
579
580  // Check the prologue of the file.
581  if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) ||
582      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) {
583    InvalidPTH(Diags);
584    return 0;
585  }
586
587  // Read the PTH version.
588  const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1);
589  unsigned Version = ReadLE32(p);
590
591  if (Version != PTHManager::Version) {
592    InvalidPTH(Diags,
593        Version < PTHManager::Version
594        ? "PTH file uses an older PTH format that is no longer supported"
595        : "PTH file uses a newer PTH format that cannot be read");
596    return 0;
597  }
598
599  // Compute the address of the index table at the end of the PTH file.
600  const unsigned char *PrologueOffset = p;
601
602  if (PrologueOffset >= BufEnd) {
603    InvalidPTH(Diags);
604    return 0;
605  }
606
607  // Construct the file lookup table.  This will be used for mapping from
608  // FileEntry*'s to cached tokens.
609  const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
610  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
611
612  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
613    InvalidPTH(Diags);
614    return 0; // FIXME: Proper error diagnostic?
615  }
616
617  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
618  if (FL->isEmpty()) {
619    InvalidPTH(Diags, "PTH file contains no cached source data");
620    return 0;
621  }
622
623  // Get the location of the table mapping from persistent ids to the
624  // data needed to reconstruct identifiers.
625  const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
626  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
627
628  if (!(IData >= BufBeg && IData < BufEnd)) {
629    InvalidPTH(Diags);
630    return 0;
631  }
632
633  // Get the location of the hashtable mapping between strings and
634  // persistent IDs.
635  const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
636  const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset);
637  if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
638    InvalidPTH(Diags);
639    return 0;
640  }
641
642  llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable,
643                                                                  BufBeg));
644  if (SL->isEmpty()) {
645    InvalidPTH(Diags, "PTH file contains no identifiers.");
646    return 0;
647  }
648
649  // Get the location of the spelling cache.
650  const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
651  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
652  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
653    InvalidPTH(Diags);
654    return 0;
655  }
656
657  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
658  uint32_t NumIds = ReadLE32(IData);
659
660  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
661  // so that we in the best case only zero out memory once when the OS returns
662  // us new pages.
663  IdentifierInfo** PerIDCache = 0;
664
665  if (NumIds) {
666    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
667    if (!PerIDCache) {
668      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
669      return 0;
670    }
671  }
672
673  // Create the new PTHManager.
674  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
675                        SL.take(), NumIds, spellingBase);
676}
677IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
678  // Look in the PTH file for the string data for the IdentifierInfo object.
679  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
680  const unsigned char* IDData =
681    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
682  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
683
684  // Allocate the object.
685  std::pair<IdentifierInfo,const unsigned char*> *Mem =
686    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
687
688  Mem->second = IDData;
689  assert(IDData[0] != '\0');
690  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
691
692  // Store the new IdentifierInfo in the cache.
693  PerIDCache[PersistentID] = II;
694  assert(II->getName() && II->getName()[0] != '\0');
695  return II;
696}
697
698IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
699  PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
700  // Double check our assumption that the last character isn't '\0'.
701  assert(NameStart[NameEnd-NameStart-1] != '\0');
702  PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart,
703                                                         NameEnd - NameStart));
704  if (I == SL.end()) // No identifier found?
705    return 0;
706
707  // Match found.  Return the identifier!
708  assert(*I > 0);
709  return GetIdentifierInfo(*I-1);
710}
711
712PTHLexer *PTHManager::CreateLexer(FileID FID) {
713  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
714  if (!FE)
715    return 0;
716
717  // Lookup the FileEntry object in our file lookup data structure.  It will
718  // return a variant that indicates whether or not there is an offset within
719  // the PTH file that contains cached tokens.
720  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
721  PTHFileLookup::iterator I = PFL.find(FE);
722
723  if (I == PFL.end()) // No tokens available?
724    return 0;
725
726  const PTHFileData& FileData = *I;
727
728  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
729  // Compute the offset of the token data within the buffer.
730  const unsigned char* data = BufStart + FileData.getTokenOffset();
731
732  // Get the location of pp-conditional table.
733  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
734  uint32_t Len = ReadLE32(ppcond);
735  if (Len == 0) ppcond = 0;
736
737  assert(PP && "No preprocessor set yet!");
738  return new PTHLexer(*PP, FID, data, ppcond, *this);
739}
740