PTHLexer.cpp revision b60d7999d621fce608e03d39e82c0e7eda750054
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
17#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
26
27using namespace clang;
28
29#define DISK_TOKEN_SIZE (2+3*4)
30
31PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
32                   const char* ppcond, PTHManager& PM)
33  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
34    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) {
35    // Make sure the EofToken is completely clean.
36    EofToken.startToken();
37  }
38
39Token PTHLexer::GetToken() {
40  // Read the next token, or if we haven't advanced yet, get the last
41  // token read.
42  if (NeedsFetching) {
43    NeedsFetching = false;
44    ReadToken(LastFetched);
45  }
46
47  Token Tok = LastFetched;
48
49  // If we are in raw mode, zero out identifier pointers.  This is
50  // needed for 'pragma poison'.  Note that this requires that the Preprocessor
51  // can go back to the original source when it calls getSpelling().
52  if (LexingRawMode && Tok.is(tok::identifier))
53    Tok.setIdentifierInfo(0);
54
55  return Tok;
56}
57
58void PTHLexer::Lex(Token& Tok) {
59LexNextToken:
60  Tok = GetToken();
61
62  if (AtLastToken()) {
63    Preprocessor *PPCache = PP;
64
65    if (LexEndOfFile(Tok))
66      return;
67
68    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
69    return PPCache->Lex(Tok);
70  }
71
72  // Don't advance to the next token yet.  Check if we are at the
73  // start of a new line and we're processing a directive.  If so, we
74  // consume this token twice, once as an tok::eom.
75  if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
76    ParsingPreprocessorDirective = false;
77    Tok.setKind(tok::eom);
78    MIOpt.ReadToken();
79    return;
80  }
81
82  // Advance to the next token.
83  AdvanceToken();
84
85  if (Tok.is(tok::hash)) {
86    if (Tok.isAtStartOfLine()) {
87      LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
88      if (!LexingRawMode) {
89        PP->HandleDirective(Tok);
90
91        if (PP->isCurrentLexer(this))
92          goto LexNextToken;
93
94        return PP->Lex(Tok);
95      }
96    }
97  }
98
99  MIOpt.ReadToken();
100
101  if (Tok.is(tok::identifier)) {
102    if (LexingRawMode) return;
103    return PP->HandleIdentifier(Tok);
104  }
105}
106
107bool PTHLexer::LexEndOfFile(Token &Tok) {
108
109  if (ParsingPreprocessorDirective) {
110    ParsingPreprocessorDirective = false;
111    Tok.setKind(tok::eom);
112    MIOpt.ReadToken();
113    return true; // Have a token.
114  }
115
116  if (LexingRawMode) {
117    MIOpt.ReadToken();
118    return true;  // Have an eof token.
119  }
120
121  // FIXME: Issue diagnostics similar to Lexer.
122  return PP->HandleEndOfFile(Tok, false);
123}
124
125void PTHLexer::setEOF(Token& Tok) {
126  assert(!EofToken.is(tok::eof));
127  Tok = EofToken;
128}
129
130void PTHLexer::DiscardToEndOfLine() {
131  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
132         "Must be in a preprocessing directive!");
133
134  // Already at end-of-file?
135  if (AtLastToken())
136    return;
137
138  // Find the first token that is not the start of the *current* line.
139  Token T;
140  for (Lex(T); !AtLastToken(); Lex(T))
141    if (GetToken().isAtStartOfLine())
142      return;
143}
144
145//===----------------------------------------------------------------------===//
146// Utility methods for reading from the mmap'ed PTH file.
147//===----------------------------------------------------------------------===//
148
149static inline uint8_t Read8(const char*& data) {
150  return (uint8_t) *(data++);
151}
152
153static inline uint32_t Read32(const char*& data) {
154  uint32_t V = (uint32_t) Read8(data);
155  V |= (((uint32_t) Read8(data)) << 8);
156  V |= (((uint32_t) Read8(data)) << 16);
157  V |= (((uint32_t) Read8(data)) << 24);
158  return V;
159}
160
161/// SkipBlock - Used by Preprocessor to skip the current conditional block.
162bool PTHLexer::SkipBlock() {
163  assert(CurPPCondPtr && "No cached PP conditional information.");
164  assert(LastHashTokPtr && "No known '#' token.");
165
166  const char* HashEntryI = 0;
167  uint32_t Offset;
168  uint32_t TableIdx;
169
170  do {
171    // Read the token offset from the side-table.
172    Offset = Read32(CurPPCondPtr);
173
174    // Read the target table index from the side-table.
175    TableIdx = Read32(CurPPCondPtr);
176
177    // Compute the actual memory address of the '#' token data for this entry.
178    HashEntryI = TokBuf + Offset;
179
180    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
181    //  contain nested blocks.  In the side-table we can jump over these
182    //  nested blocks instead of doing a linear search if the next "sibling"
183    //  entry is not at a location greater than LastHashTokPtr.
184    if (HashEntryI < LastHashTokPtr && TableIdx) {
185      // In the side-table we are still at an entry for a '#' token that
186      // is earlier than the last one we saw.  Check if the location we would
187      // stride gets us closer.
188      const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
189      assert(NextPPCondPtr >= CurPPCondPtr);
190      // Read where we should jump to.
191      uint32_t TmpOffset = Read32(NextPPCondPtr);
192      const char* HashEntryJ = TokBuf + TmpOffset;
193
194      if (HashEntryJ <= LastHashTokPtr) {
195        // Jump directly to the next entry in the side table.
196        HashEntryI = HashEntryJ;
197        Offset = TmpOffset;
198        TableIdx = Read32(NextPPCondPtr);
199        CurPPCondPtr = NextPPCondPtr;
200      }
201    }
202  }
203  while (HashEntryI < LastHashTokPtr);
204  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
205  assert(TableIdx && "No jumping from #endifs.");
206
207  // Update our side-table iterator.
208  const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
209  assert(NextPPCondPtr >= CurPPCondPtr);
210  CurPPCondPtr = NextPPCondPtr;
211
212  // Read where we should jump to.
213  HashEntryI = TokBuf + Read32(NextPPCondPtr);
214  uint32_t NextIdx = Read32(NextPPCondPtr);
215
216  // By construction NextIdx will be zero if this is a #endif.  This is useful
217  // to know to obviate lexing another token.
218  bool isEndif = NextIdx == 0;
219  NeedsFetching = true;
220
221  // This case can occur when we see something like this:
222  //
223  //  #if ...
224  //   /* a comment or nothing */
225  //  #elif
226  //
227  // If we are skipping the first #if block it will be the case that CurPtr
228  // already points 'elif'.  Just return.
229
230  if (CurPtr > HashEntryI) {
231    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
232    // Did we reach a #endif?  If so, go ahead and consume that token as well.
233    if (isEndif)
234      CurPtr += DISK_TOKEN_SIZE;
235    else
236      LastHashTokPtr = HashEntryI;
237
238    return isEndif;
239  }
240
241  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
242  CurPtr = HashEntryI;
243
244  // Update the location of the last observed '#'.  This is useful if we
245  // are skipping multiple blocks.
246  LastHashTokPtr = CurPtr;
247
248#ifndef DEBUG
249  // In a debug build we should verify that the token is really a '#' that
250  // appears at the start of the line.
251  Token Tok;
252  ReadToken(Tok);
253  assert(Tok.isAtStartOfLine() && Tok.is(tok::hash));
254#else
255  // In a full release build we can just skip the token entirely.
256  CurPtr += DISK_TOKEN_SIZE;
257#endif
258
259  // Did we reach a #endif?  If so, go ahead and consume that token as well.
260  if (isEndif) { CurPtr += DISK_TOKEN_SIZE; }
261
262  return isEndif;
263}
264
265//===----------------------------------------------------------------------===//
266// Token reconstruction from the PTH file.
267//===----------------------------------------------------------------------===//
268
269void PTHLexer::ReadToken(Token& T) {
270  // Clear the token.
271  // FIXME: Setting the flags directly should obviate this step.
272  T.startToken();
273
274  // Shadow CurPtr into an automatic variable so that Read8 doesn't load and
275  // store back into the instance variable.
276  const char *CurPtrShadow = CurPtr;
277
278  // Read the type of the token.
279  T.setKind((tok::TokenKind) Read8(CurPtrShadow));
280
281  // Set flags.  This is gross, since we are really setting multiple flags.
282  T.setFlag((Token::TokenFlags) Read8(CurPtrShadow));
283
284  // Set the IdentifierInfo* (if any).
285  T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow));
286
287  // Set the SourceLocation.  Since all tokens are constructed using a
288  // raw lexer, they will all be offseted from the same FileID.
289  T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow)));
290
291  // Finally, read and set the length of the token.
292  T.setLength(Read32(CurPtrShadow));
293
294  CurPtr = CurPtrShadow;
295}
296
297//===----------------------------------------------------------------------===//
298// Internal Data Structures for PTH file lookup and resolving identifiers.
299//===----------------------------------------------------------------------===//
300
301
302/// PTHFileLookup - This internal data structure is used by the PTHManager
303///  to map from FileEntry objects managed by FileManager to offsets within
304///  the PTH file.
305namespace {
306class VISIBILITY_HIDDEN PTHFileLookup {
307public:
308  class Val {
309    uint32_t TokenOff;
310    uint32_t PPCondOff;
311
312  public:
313    Val() : TokenOff(~0) {}
314    Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {}
315
316    uint32_t getTokenOffset() const {
317      assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
318      return TokenOff;
319    }
320
321    uint32_t gettPPCondOffset() const {
322      assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
323      return PPCondOff;
324    }
325
326    bool isValid() const { return TokenOff != ~((uint32_t)0); }
327  };
328
329private:
330  llvm::StringMap<Val> FileMap;
331
332public:
333  PTHFileLookup() {};
334
335  Val Lookup(const FileEntry* FE) {
336    const char* s = FE->getName();
337    unsigned size = strlen(s);
338    return FileMap.GetOrCreateValue(s, s+size).getValue();
339  }
340
341  void ReadTable(const char* D) {
342    uint32_t N = Read32(D);     // Read the length of the table.
343
344    for ( ; N > 0; --N) {       // The rest of the data is the table itself.
345      uint32_t len = Read32(D);
346      const char* s = D;
347      D += len;
348      uint32_t TokenOff = Read32(D);
349      FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D));
350    }
351  }
352};
353} // end anonymous namespace
354
355//===----------------------------------------------------------------------===//
356// PTHManager methods.
357//===----------------------------------------------------------------------===//
358
359PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
360                       const char* idDataTable, IdentifierInfo** perIDCache,
361                       Preprocessor& pp)
362: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
363  IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
364
365PTHManager::~PTHManager() {
366  delete Buf;
367  delete (PTHFileLookup*) FileLookup;
368  free(PerIDCache);
369}
370
371PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
372
373  // Memory map the PTH file.
374  llvm::OwningPtr<llvm::MemoryBuffer>
375  File(llvm::MemoryBuffer::getFile(file.c_str()));
376
377  if (!File)
378    return 0;
379
380  // Get the buffer ranges and check if there are at least three 32-bit
381  // words at the end of the file.
382  const char* BufBeg = File->getBufferStart();
383  const char* BufEnd = File->getBufferEnd();
384
385  if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
386    assert(false && "Invalid PTH file.");
387    return 0; // FIXME: Proper error diagnostic?
388  }
389
390  // Compute the address of the index table at the end of the PTH file.
391  // This table contains the offset of the file lookup table, the
392  // persistent ID -> identifer data table.
393  const char* EndTable = BufEnd - sizeof(uint32_t)*3;
394
395  // Construct the file lookup table.  This will be used for mapping from
396  // FileEntry*'s to cached tokens.
397  const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
398  const char* FileTable = BufBeg + Read32(FileTableOffset);
399
400  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
401    assert(false && "Invalid PTH file.");
402    return 0; // FIXME: Proper error diagnostic?
403  }
404
405  llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
406  FL->ReadTable(FileTable);
407
408  // Get the location of the table mapping from persistent ids to the
409  // data needed to reconstruct identifiers.
410  const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
411  const char* IData = BufBeg + Read32(IDTableOffset);
412  if (!(IData > BufBeg && IData < BufEnd)) {
413    assert(false && "Invalid PTH file.");
414    return 0; // FIXME: Proper error diagnostic?
415  }
416
417  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
418  uint32_t NumIds = Read32(IData);
419
420  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
421  // so that we in the best case only zero out memory once when the OS returns
422  // us new pages.
423  IdentifierInfo** PerIDCache =
424    (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
425
426  if (!PerIDCache) {
427    assert(false && "Could not allocate Persistent ID cache.");
428    return 0;
429  }
430
431  // Create the new lexer.
432  return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
433}
434
435IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
436  // Read the persistent ID from the PTH file.
437  uint32_t persistentID = Read32(D);
438
439  // A persistent ID of '0' always maps to NULL.
440  if (!persistentID)
441    return 0;
442
443  // Adjust the persistent ID by subtracting '1' so that it can be used
444  // as an index within a table in the PTH file.
445  --persistentID;
446
447  // Check if the IdentifierInfo has already been resolved.
448  IdentifierInfo*& II = PerIDCache[persistentID];
449  if (II) return II;
450
451  // Look in the PTH file for the string data for the IdentifierInfo object.
452  const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
453  const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
454  assert(IDData < Buf->getBufferEnd());
455
456  // Read the length of the string.
457  uint32_t len = Read32(IDData);
458
459  // Get the IdentifierInfo* with the specified string.
460  II = &ITable.get(IDData, IDData+len);
461  return II;
462}
463
464PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
465
466  if (!FE)
467    return 0;
468
469  // Lookup the FileEntry object in our file lookup data structure.  It will
470  // return a variant that indicates whether or not there is an offset within
471  // the PTH file that contains cached tokens.
472  PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE);
473
474  if (!FileData.isValid()) // No tokens available.
475    return 0;
476
477  // Compute the offset of the token data within the buffer.
478  const char* data = Buf->getBufferStart() + FileData.getTokenOffset();
479
480  // Get the location of pp-conditional table.
481  const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset();
482  uint32_t len = Read32(ppcond);
483  if (len == 0) ppcond = 0;
484
485  assert(data < Buf->getBufferEnd());
486  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
487                      *this);
488}
489