PTHLexer.cpp revision d8c02929fe70f03111be73e7b8c402c724238ee9
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
17#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/ADT/StringMap.h"
23#include "llvm/ADT/OwningPtr.h"
24#include "llvm/Support/Compiler.h"
25#include "llvm/Support/MathExtras.h"
26#include "llvm/Support/MemoryBuffer.h"
27#include "llvm/System/Host.h"
28using namespace clang;
29
30#define DISK_TOKEN_SIZE (1+1+2+4+4)
31
32//===----------------------------------------------------------------------===//
33// Utility methods for reading from the mmap'ed PTH file.
34//===----------------------------------------------------------------------===//
35
36static inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) {
37  uint16_t V = ((uint16_t)Data[0]) |
38               ((uint16_t)Data[1] <<  8);
39  Data += 2;
40  return V;
41}
42
43static inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) {
44  uint32_t V = ((uint32_t)Data[0])  |
45               ((uint32_t)Data[1] << 8)  |
46               ((uint32_t)Data[2] << 16) |
47               ((uint32_t)Data[3] << 24);
48  Data += 4;
49  return V;
50}
51
52static inline uint32_t ReadLE32(const unsigned char *&Data) {
53  // Hosts that directly support little-endian 32-bit loads can just
54  // use them.  Big-endian hosts need a bswap.
55  uint32_t V = *((uint32_t*)Data);
56  if (llvm::sys::isBigEndianHost())
57    V = llvm::ByteSwap_32(V);
58  Data += 4;
59  return V;
60}
61
62
63//===----------------------------------------------------------------------===//
64// PTHLexer methods.
65//===----------------------------------------------------------------------===//
66
67PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
68                   const unsigned char *ppcond, PTHManager &PM)
69  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
70    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
71
72  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
73}
74
75void PTHLexer::Lex(Token& Tok) {
76LexNextToken:
77
78  //===--------------------------------------==//
79  // Read the raw token data.
80  //===--------------------------------------==//
81
82  // Shadow CurPtr into an automatic variable.
83  const unsigned char *CurPtrShadow = CurPtr;
84
85  // Read in the data for the token.
86  unsigned Word0 = ReadLE32(CurPtrShadow);
87  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
88  uint32_t FileOffset = ReadLE32(CurPtrShadow);
89
90  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
91  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
92  uint32_t Len = Word0 >> 16;
93
94  CurPtr = CurPtrShadow;
95
96  //===--------------------------------------==//
97  // Construct the token itself.
98  //===--------------------------------------==//
99
100  Tok.startToken();
101  Tok.setKind(TKind);
102  Tok.setFlag(TFlags);
103  assert(!LexingRawMode);
104  Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
105  Tok.setLength(Len);
106
107  // Handle identifiers.
108  if (Tok.isLiteral()) {
109    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
110  }
111  else if (IdentifierID) {
112    MIOpt.ReadToken();
113    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
114
115    Tok.setIdentifierInfo(II);
116
117    // Change the kind of this identifier to the appropriate token kind, e.g.
118    // turning "for" into a keyword.
119    Tok.setKind(II->getTokenID());
120
121    if (II->isHandleIdentifierCase())
122      PP->HandleIdentifier(Tok);
123    return;
124  }
125
126  //===--------------------------------------==//
127  // Process the token.
128  //===--------------------------------------==//
129#if 0
130  SourceManager& SM = PP->getSourceManager();
131  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
132    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
133    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
134    << '\n';
135#endif
136
137  if (TKind == tok::eof) {
138    // Save the end-of-file token.
139    EofToken = Tok;
140
141    Preprocessor *PPCache = PP;
142
143    assert(!ParsingPreprocessorDirective);
144    assert(!LexingRawMode);
145
146    // FIXME: Issue diagnostics similar to Lexer.
147    if (PP->HandleEndOfFile(Tok, false))
148      return;
149
150    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
151    return PPCache->Lex(Tok);
152  }
153
154  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
155    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
156    assert(!LexingRawMode);
157    PP->HandleDirective(Tok);
158
159    if (PP->isCurrentLexer(this))
160      goto LexNextToken;
161
162    return PP->Lex(Tok);
163  }
164
165  if (TKind == tok::eom) {
166    assert(ParsingPreprocessorDirective);
167    ParsingPreprocessorDirective = false;
168    return;
169  }
170
171  MIOpt.ReadToken();
172}
173
174// FIXME: We can just grab the last token instead of storing a copy
175// into EofToken.
176void PTHLexer::getEOF(Token& Tok) {
177  assert(EofToken.is(tok::eof));
178  Tok = EofToken;
179}
180
181void PTHLexer::DiscardToEndOfLine() {
182  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
183         "Must be in a preprocessing directive!");
184
185  // We assume that if the preprocessor wishes to discard to the end of
186  // the line that it also means to end the current preprocessor directive.
187  ParsingPreprocessorDirective = false;
188
189  // Skip tokens by only peeking at their token kind and the flags.
190  // We don't need to actually reconstruct full tokens from the token buffer.
191  // This saves some copies and it also reduces IdentifierInfo* lookup.
192  const unsigned char* p = CurPtr;
193  while (1) {
194    // Read the token kind.  Are we at the end of the file?
195    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
196    if (x == tok::eof) break;
197
198    // Read the token flags.  Are we at the start of the next line?
199    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
200    if (y & Token::StartOfLine) break;
201
202    // Skip to the next token.
203    p += DISK_TOKEN_SIZE;
204  }
205
206  CurPtr = p;
207}
208
209/// SkipBlock - Used by Preprocessor to skip the current conditional block.
210bool PTHLexer::SkipBlock() {
211  assert(CurPPCondPtr && "No cached PP conditional information.");
212  assert(LastHashTokPtr && "No known '#' token.");
213
214  const unsigned char* HashEntryI = 0;
215  uint32_t Offset;
216  uint32_t TableIdx;
217
218  do {
219    // Read the token offset from the side-table.
220    Offset = ReadLE32(CurPPCondPtr);
221
222    // Read the target table index from the side-table.
223    TableIdx = ReadLE32(CurPPCondPtr);
224
225    // Compute the actual memory address of the '#' token data for this entry.
226    HashEntryI = TokBuf + Offset;
227
228    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
229    //  contain nested blocks.  In the side-table we can jump over these
230    //  nested blocks instead of doing a linear search if the next "sibling"
231    //  entry is not at a location greater than LastHashTokPtr.
232    if (HashEntryI < LastHashTokPtr && TableIdx) {
233      // In the side-table we are still at an entry for a '#' token that
234      // is earlier than the last one we saw.  Check if the location we would
235      // stride gets us closer.
236      const unsigned char* NextPPCondPtr =
237        PPCond + TableIdx*(sizeof(uint32_t)*2);
238      assert(NextPPCondPtr >= CurPPCondPtr);
239      // Read where we should jump to.
240      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
241      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
242
243      if (HashEntryJ <= LastHashTokPtr) {
244        // Jump directly to the next entry in the side table.
245        HashEntryI = HashEntryJ;
246        Offset = TmpOffset;
247        TableIdx = ReadLE32(NextPPCondPtr);
248        CurPPCondPtr = NextPPCondPtr;
249      }
250    }
251  }
252  while (HashEntryI < LastHashTokPtr);
253  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
254  assert(TableIdx && "No jumping from #endifs.");
255
256  // Update our side-table iterator.
257  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
258  assert(NextPPCondPtr >= CurPPCondPtr);
259  CurPPCondPtr = NextPPCondPtr;
260
261  // Read where we should jump to.
262  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
263  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
264
265  // By construction NextIdx will be zero if this is a #endif.  This is useful
266  // to know to obviate lexing another token.
267  bool isEndif = NextIdx == 0;
268
269  // This case can occur when we see something like this:
270  //
271  //  #if ...
272  //   /* a comment or nothing */
273  //  #elif
274  //
275  // If we are skipping the first #if block it will be the case that CurPtr
276  // already points 'elif'.  Just return.
277
278  if (CurPtr > HashEntryI) {
279    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
280    // Did we reach a #endif?  If so, go ahead and consume that token as well.
281    if (isEndif)
282      CurPtr += DISK_TOKEN_SIZE*2;
283    else
284      LastHashTokPtr = HashEntryI;
285
286    return isEndif;
287  }
288
289  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
290  CurPtr = HashEntryI;
291
292  // Update the location of the last observed '#'.  This is useful if we
293  // are skipping multiple blocks.
294  LastHashTokPtr = CurPtr;
295
296  // Skip the '#' token.
297  assert(((tok::TokenKind)*CurPtr) == tok::hash);
298  CurPtr += DISK_TOKEN_SIZE;
299
300  // Did we reach a #endif?  If so, go ahead and consume that token as well.
301  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
302
303  return isEndif;
304}
305
306SourceLocation PTHLexer::getSourceLocation() {
307  // getSourceLocation is not on the hot path.  It is used to get the location
308  // of the next token when transitioning back to this lexer when done
309  // handling a #included file.  Just read the necessary data from the token
310  // data buffer to construct the SourceLocation object.
311  // NOTE: This is a virtual function; hence it is defined out-of-line.
312  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
313  uint32_t Offset = ReadLE32(OffsetPtr);
314  return FileStartLoc.getFileLocWithOffset(Offset);
315}
316
317//===----------------------------------------------------------------------===//
318// OnDiskChainedHashTable
319//===----------------------------------------------------------------------===//
320
321template<typename Info>
322class OnDiskChainedHashTable {
323  const unsigned NumBuckets;
324  const unsigned NumEntries;
325  const unsigned char* const Buckets;
326  const unsigned char* const Base;
327public:
328  typedef typename Info::internal_key_type internal_key_type;
329  typedef typename Info::external_key_type external_key_type;
330  typedef typename Info::data_type         data_type;
331
332  OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
333                         const unsigned char* buckets,
334                         const unsigned char* base)
335    : NumBuckets(numBuckets), NumEntries(numEntries),
336      Buckets(buckets), Base(base) {
337        assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
338               "'buckets' must have a 4-byte alignment");
339      }
340
341
342  bool isEmpty() const { return NumEntries == 0; }
343
344  class iterator {
345    const unsigned char* const data;
346    const unsigned len;
347  public:
348    iterator() : data(0), len(0) {}
349    iterator(const unsigned char* d, unsigned l) : data(d), len(l) {}
350
351    data_type operator*() const { return Info::ReadData(data, len); }
352    bool operator==(const iterator& X) const { return X.data == data; }
353    bool operator!=(const iterator& X) const { return X.data != data; }
354  };
355
356  iterator find(const external_key_type& eKey) {
357    const internal_key_type& iKey = Info::GetInternalKey(eKey);
358    unsigned key_hash = Info::ComputeHash(iKey);
359
360    // Each bucket is just a 32-bit offset into the PTH file.
361    unsigned idx = key_hash & (NumBuckets - 1);
362    const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
363
364    unsigned offset = ReadLE32(Bucket);
365    if (offset == 0) return iterator(); // Empty bucket.
366    const unsigned char* Items = Base + offset;
367
368    // 'Items' starts with a 16-bit unsigned integer representing the
369    // number of items in this bucket.
370    unsigned len = ReadUnalignedLE16(Items);
371
372    for (unsigned i = 0; i < len; ++i) {
373      // Read the hash.
374      uint32_t item_hash = ReadUnalignedLE32(Items);
375
376      // Determine the length of the key and the data.
377      const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items);
378      unsigned item_len = L.first + L.second;
379
380      // Compare the hashes.  If they are not the same, skip the entry entirely.
381      if (item_hash != key_hash) {
382        Items += item_len;
383        continue;
384      }
385
386      // Read the key.
387      const internal_key_type& X =
388        Info::ReadKey((const unsigned char* const) Items, L.first);
389
390      // If the key doesn't match just skip reading the value.
391      if (!Info::EqualKey(X, iKey)) {
392        Items += item_len;
393        continue;
394      }
395
396      // The key matches!
397      return iterator(Items + L.first, L.second);
398    }
399
400    return iterator();
401  }
402
403  iterator end() const { return iterator(); }
404
405
406  static OnDiskChainedHashTable* Create(const unsigned char* buckets,
407                                        const unsigned char* const base) {
408
409    assert(buckets > base);
410    assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
411           "buckets should be 4-byte aligned.");
412
413    unsigned numBuckets = ReadLE32(buckets);
414    unsigned numEntries = ReadLE32(buckets);
415    return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
416                                            base);
417  }
418};
419
420//===----------------------------------------------------------------------===//
421// PTH file lookup: map from strings to file data.
422//===----------------------------------------------------------------------===//
423
424/// PTHFileLookup - This internal data structure is used by the PTHManager
425///  to map from FileEntry objects managed by FileManager to offsets within
426///  the PTH file.
427namespace {
428class VISIBILITY_HIDDEN PTHFileData {
429  const uint32_t TokenOff;
430  const uint32_t PPCondOff;
431public:
432  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
433    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
434
435  uint32_t getTokenOffset() const { return TokenOff; }
436  uint32_t getPPCondOffset() const { return PPCondOff; }
437};
438
439class VISIBILITY_HIDDEN PTHFileLookupTrait {
440public:
441  typedef PTHFileData      data_type;
442  typedef const FileEntry* external_key_type;
443  typedef const char*      internal_key_type;
444
445  static bool EqualKey(const char* a, const char* b) {
446    return strcmp(a, b) == 0;
447  }
448
449  static unsigned ComputeHash(const char* x) {
450    // More copy-paste nonsense.  Will refactor.
451    unsigned int R = 0;
452    for (; *x != '\0' ; ++x) R = R * 33 + *x;
453    return R + (R >> 5);
454  }
455
456  static const char* GetInternalKey(const FileEntry* FE) {
457    return FE->getName();
458  }
459
460  static std::pair<unsigned, unsigned>
461  ReadKeyDataLength(const unsigned char*& d) {
462    return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U);
463  }
464
465  static const char* ReadKey(const unsigned char* d, unsigned) {
466    return (const char*) d;
467  }
468
469  static PTHFileData ReadData(const unsigned char* d, unsigned) {
470    uint32_t x = ::ReadUnalignedLE32(d);
471    uint32_t y = ::ReadUnalignedLE32(d);
472    return PTHFileData(x, y);
473  }
474};
475} // end anonymous namespace
476
477typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup;
478
479//===----------------------------------------------------------------------===//
480// PTHManager methods.
481//===----------------------------------------------------------------------===//
482
483PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
484                       const unsigned char* idDataTable,
485                       IdentifierInfo** perIDCache,
486                       const unsigned char* sortedIdTable, unsigned numIds,
487                       const unsigned char* spellingBase)
488: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
489  IdDataTable(idDataTable), SortedIdTable(sortedIdTable),
490  NumIds(numIds), PP(0), SpellingBase(spellingBase) {}
491
492PTHManager::~PTHManager() {
493  delete Buf;
494  delete (PTHFileLookup*) FileLookup;
495  free(PerIDCache);
496}
497
498static void InvalidPTH(Diagnostic *Diags, const char* Msg = 0) {
499  if (!Diags) return;
500  if (!Msg) Msg = "Invalid or corrupted PTH file";
501  unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note, Msg);
502  Diags->Report(FullSourceLoc(), DiagID);
503}
504
505PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) {
506  // Memory map the PTH file.
507  llvm::OwningPtr<llvm::MemoryBuffer>
508  File(llvm::MemoryBuffer::getFile(file.c_str()));
509
510  if (!File) {
511    if (Diags) {
512      unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note,
513                                               "PTH file %0 could not be read");
514      Diags->Report(FullSourceLoc(), DiagID) << file;
515    }
516
517    return 0;
518  }
519
520  // Get the buffer ranges and check if there are at least three 32-bit
521  // words at the end of the file.
522  const unsigned char* BufBeg = (unsigned char*)File->getBufferStart();
523  const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd();
524
525  // Check the prologue of the file.
526  if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) ||
527      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) {
528    InvalidPTH(Diags);
529    return 0;
530  }
531
532  // Read the PTH version.
533  const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1);
534  unsigned Version = ReadLE32(p);
535
536  if (Version != PTHManager::Version) {
537    InvalidPTH(Diags,
538        Version < PTHManager::Version
539        ? "PTH file uses an older PTH format that is no longer supported"
540        : "PTH file uses a newer PTH format that cannot be read");
541    return 0;
542  }
543
544  // Compute the address of the index table at the end of the PTH file.
545  const unsigned char *EndTable = BufBeg + ReadLE32(p);
546
547  if (EndTable >= BufEnd) {
548    InvalidPTH(Diags);
549    return 0;
550  }
551
552  // Construct the file lookup table.  This will be used for mapping from
553  // FileEntry*'s to cached tokens.
554  const unsigned char* FileTableOffset = EndTable + sizeof(uint32_t)*3;
555  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
556
557  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
558    InvalidPTH(Diags);
559    return 0; // FIXME: Proper error diagnostic?
560  }
561
562  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
563  if (FL->isEmpty()) {
564    InvalidPTH(Diags, "PTH file contains no cached source data");
565    return 0;
566  }
567
568  // Get the location of the table mapping from persistent ids to the
569  // data needed to reconstruct identifiers.
570  const unsigned char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
571  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
572
573  if (!(IData >= BufBeg && IData < BufEnd)) {
574    InvalidPTH(Diags);
575    return 0;
576  }
577
578  // Get the location of the lexigraphically-sorted table of persistent IDs.
579  const unsigned char* SortedIdTableOffset = EndTable + sizeof(uint32_t)*2;
580  const unsigned char* SortedIdTable = BufBeg + ReadLE32(SortedIdTableOffset);
581  if (!(SortedIdTable >= BufBeg && SortedIdTable < BufEnd)) {
582    InvalidPTH(Diags);
583    return 0;
584  }
585
586  // Get the location of the spelling cache.
587  const unsigned char* spellingBaseOffset = EndTable + sizeof(uint32_t)*4;
588  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
589  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
590    InvalidPTH(Diags);
591    return 0;
592  }
593
594  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
595  uint32_t NumIds = ReadLE32(IData);
596
597  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
598  // so that we in the best case only zero out memory once when the OS returns
599  // us new pages.
600  IdentifierInfo** PerIDCache = 0;
601
602  if (NumIds) {
603    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
604    if (!PerIDCache) {
605      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
606      return 0;
607    }
608  }
609
610  // Create the new PTHManager.
611  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
612                        SortedIdTable, NumIds, spellingBase);
613}
614IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
615  // Look in the PTH file for the string data for the IdentifierInfo object.
616  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
617  const unsigned char* IDData =
618    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
619  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
620
621  // Allocate the object.
622  std::pair<IdentifierInfo,const unsigned char*> *Mem =
623    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
624
625  Mem->second = IDData;
626  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
627
628  // Store the new IdentifierInfo in the cache.
629  PerIDCache[PersistentID] = II;
630  return II;
631}
632
633IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
634  unsigned min = 0;
635  unsigned max = NumIds;
636  unsigned Len = NameEnd - NameStart;
637
638  do {
639    unsigned i = (max - min) / 2 + min;
640    const unsigned char *Ptr = SortedIdTable + (i * 4);
641
642    // Read the persistentID.
643    unsigned perID = ReadLE32(Ptr);
644
645    // Get the IdentifierInfo.
646    IdentifierInfo* II = GetIdentifierInfo(perID);
647
648    // First compare the lengths.
649    unsigned IILen = II->getLength();
650    if (Len < IILen) goto IsLess;
651    if (Len > IILen) goto IsGreater;
652
653    // Now compare the strings!
654    {
655      signed comp = strncmp(NameStart, II->getName(), Len);
656      if (comp < 0) goto IsLess;
657      if (comp > 0) goto IsGreater;
658    }
659    // We found a match!
660    return II;
661
662  IsGreater:
663    if (i == min) break;
664    min = i;
665    continue;
666
667  IsLess:
668    max = i;
669    assert(!(max == min) || (min == i));
670  }
671  while (min != max);
672
673  return 0;
674}
675
676
677PTHLexer *PTHManager::CreateLexer(FileID FID) {
678  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
679  if (!FE)
680    return 0;
681
682  // Lookup the FileEntry object in our file lookup data structure.  It will
683  // return a variant that indicates whether or not there is an offset within
684  // the PTH file that contains cached tokens.
685  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
686  PTHFileLookup::iterator I = PFL.find(FE);
687
688  if (I == PFL.end()) // No tokens available?
689    return 0;
690
691  const PTHFileData& FileData = *I;
692
693  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
694  // Compute the offset of the token data within the buffer.
695  const unsigned char* data = BufStart + FileData.getTokenOffset();
696
697  // Get the location of pp-conditional table.
698  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
699  uint32_t Len = ReadLE32(ppcond);
700  if (Len == 0) ppcond = 0;
701
702  assert(PP && "No preprocessor set yet!");
703  return new PTHLexer(*PP, FID, data, ppcond, *this);
704}
705