PTHLexer.cpp revision 0c6a77bc1f52f282a969538f139ebde429076ed3
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
17#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
26#include "llvm/ADT/DenseMap.h"
27
28using namespace clang;
29
30PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
31                   PTHManager& PM)
32  : TokBuf(D), PreprocessorLexer(&pp, fileloc), CurTokenIdx(0), PTHMgr(PM),
33    NeedsFetching(true) {
34    // Make sure the EofToken is completely clean.
35    EofToken.startToken();
36  }
37
38Token PTHLexer::GetToken() {
39  // Read the next token, or if we haven't advanced yet, get the last
40  // token read.
41  if (NeedsFetching) {
42    NeedsFetching = false;
43    ReadToken(LastFetched);
44  }
45
46  Token Tok = LastFetched;
47
48  // If we are in raw mode, zero out identifier pointers.  This is
49  // needed for 'pragma poison'.  Note that this requires that the Preprocessor
50  // can go back to the original source when it calls getSpelling().
51  if (LexingRawMode && Tok.is(tok::identifier))
52    Tok.setIdentifierInfo(0);
53
54  return Tok;
55}
56
57void PTHLexer::Lex(Token& Tok) {
58LexNextToken:
59  Tok = GetToken();
60
61  if (AtLastToken()) {
62    Preprocessor *PPCache = PP;
63
64    if (LexEndOfFile(Tok))
65      return;
66
67    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
68    return PPCache->Lex(Tok);
69  }
70
71  // Don't advance to the next token yet.  Check if we are at the
72  // start of a new line and we're processing a directive.  If so, we
73  // consume this token twice, once as an tok::eom.
74  if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
75    ParsingPreprocessorDirective = false;
76    Tok.setKind(tok::eom);
77    MIOpt.ReadToken();
78    return;
79  }
80
81  // Advance to the next token.
82  AdvanceToken();
83
84  if (Tok.is(tok::hash)) {
85    if (Tok.isAtStartOfLine() && !LexingRawMode) {
86      PP->HandleDirective(Tok);
87
88      if (PP->isCurrentLexer(this))
89        goto LexNextToken;
90
91      return PP->Lex(Tok);
92    }
93  }
94
95  MIOpt.ReadToken();
96
97  if (Tok.is(tok::identifier)) {
98    if (LexingRawMode) return;
99    return PP->HandleIdentifier(Tok);
100  }
101}
102
103bool PTHLexer::LexEndOfFile(Token &Tok) {
104
105  if (ParsingPreprocessorDirective) {
106    ParsingPreprocessorDirective = false;
107    Tok.setKind(tok::eom);
108    MIOpt.ReadToken();
109    return true; // Have a token.
110  }
111
112  if (LexingRawMode) {
113    MIOpt.ReadToken();
114    return true;  // Have an eof token.
115  }
116
117  // FIXME: Issue diagnostics similar to Lexer.
118  return PP->HandleEndOfFile(Tok, false);
119}
120
121void PTHLexer::setEOF(Token& Tok) {
122  assert(!EofToken.is(tok::eof));
123  Tok = EofToken;
124}
125
126void PTHLexer::DiscardToEndOfLine() {
127  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
128         "Must be in a preprocessing directive!");
129
130  // Already at end-of-file?
131  if (AtLastToken())
132    return;
133
134  // Find the first token that is not the start of the *current* line.
135  Token T;
136  for (Lex(T); !AtLastToken(); Lex(T))
137    if (GetToken().isAtStartOfLine())
138      return;
139}
140
141//===----------------------------------------------------------------------===//
142// Utility methods for reading from the mmap'ed PTH file.
143//===----------------------------------------------------------------------===//
144
145static inline uint8_t Read8(const char*& data) {
146  return (uint8_t) *(data++);
147}
148
149static inline uint32_t Read32(const char*& data) {
150  uint32_t V = (uint32_t) Read8(data);
151  V |= (((uint32_t) Read8(data)) << 8);
152  V |= (((uint32_t) Read8(data)) << 16);
153  V |= (((uint32_t) Read8(data)) << 24);
154  return V;
155}
156
157//===----------------------------------------------------------------------===//
158// Token reconstruction from the PTH file.
159//===----------------------------------------------------------------------===//
160
161void PTHLexer::ReadToken(Token& T) {
162  // Clear the token.
163  // FIXME: Setting the flags directly should obviate this step.
164  T.startToken();
165
166  // Read the type of the token.
167  T.setKind((tok::TokenKind) Read8(TokBuf));
168
169  // Set flags.  This is gross, since we are really setting multiple flags.
170  T.setFlag((Token::TokenFlags) Read8(TokBuf));
171
172  // Set the IdentifierInfo* (if any).
173  T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(TokBuf));
174
175  // Set the SourceLocation.  Since all tokens are constructed using a
176  // raw lexer, they will all be offseted from the same FileID.
177  T.setLocation(SourceLocation::getFileLoc(FileID, Read32(TokBuf)));
178
179  // Finally, read and set the length of the token.
180  T.setLength(Read32(TokBuf));
181}
182
183//===----------------------------------------------------------------------===//
184// Internal Data Structures for PTH file lookup and resolving identifiers.
185//===----------------------------------------------------------------------===//
186
187typedef llvm::DenseMap<uint32_t, IdentifierInfo*> IDCache;
188
189/// PTHFileLookup - This internal data structure is used by the PTHManager
190///  to map from FileEntry objects managed by FileManager to offsets within
191///  the PTH file.
192namespace {
193class VISIBILITY_HIDDEN PTHFileLookup {
194public:
195  class Val {
196    uint32_t v;
197
198  public:
199    Val() : v(~0) {}
200    Val(uint32_t x) : v(x) {}
201
202    operator uint32_t() const {
203      assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
204      return v;
205    }
206
207    Val& operator=(uint32_t x) { v = x; return *this; }
208    bool isValid() const { return v != ~((uint32_t)0); }
209  };
210
211private:
212  llvm::StringMap<Val> FileMap;
213
214public:
215  PTHFileLookup() {};
216
217  Val Lookup(const FileEntry* FE) {
218    const char* s = FE->getName();
219    unsigned size = strlen(s);
220    return FileMap.GetOrCreateValue(s, s+size).getValue();
221  }
222
223  void ReadTable(const char* D) {
224    uint32_t N = Read32(D);     // Read the length of the table.
225
226    for ( ; N > 0; --N) {       // The rest of the data is the table itself.
227      uint32_t len = Read32(D);
228      const char* s = D;
229      D += len;
230      FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D);
231    }
232  }
233};
234} // end anonymous namespace
235
236//===----------------------------------------------------------------------===//
237// PTHManager methods.
238//===----------------------------------------------------------------------===//
239
240PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
241                       const char* idDataTable, Preprocessor& pp)
242: Buf(buf), PersistentIDCache(0), FileLookup(fileLookup),
243IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
244
245PTHManager::~PTHManager() {
246  delete Buf;
247  delete (PTHFileLookup*) FileLookup;
248  delete (IDCache*) PersistentIDCache;
249}
250
251PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
252
253  // Memory map the PTH file.
254  llvm::OwningPtr<llvm::MemoryBuffer>
255  File(llvm::MemoryBuffer::getFile(file.c_str()));
256
257  if (!File)
258    return 0;
259
260  // Get the buffer ranges and check if there are at least three 32-bit
261  // words at the end of the file.
262  const char* BufBeg = File->getBufferStart();
263  const char* BufEnd = File->getBufferEnd();
264
265  if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
266    assert(false && "Invalid PTH file.");
267    return 0; // FIXME: Proper error diagnostic?
268  }
269
270  // Compute the address of the index table at the end of the PTH file.
271  // This table contains the offset of the file lookup table, the
272  // persistent ID -> identifer data table.
273  const char* EndTable = BufEnd - sizeof(uint32_t)*3;
274
275  // Construct the file lookup table.  This will be used for mapping from
276  // FileEntry*'s to cached tokens.
277  const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
278  const char* FileTable = BufBeg + Read32(FileTableOffset);
279
280  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
281    assert(false && "Invalid PTH file.");
282    return 0; // FIXME: Proper error diagnostic?
283  }
284
285  llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
286  FL->ReadTable(FileTable);
287
288  // Get the location of the table mapping from persistent ids to the
289  // data needed to reconstruct identifiers.
290  const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
291  const char* IData = BufBeg + Read32(IDTableOffset);
292  if (!(IData > BufBeg && IData < BufEnd)) {
293    assert(false && "Invalid PTH file.");
294    return 0; // FIXME: Proper error diagnostic?
295  }
296
297  return new PTHManager(File.take(), FL.take(), IData, PP);
298}
299
300IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
301  // Read the persistent ID from the PTH file.
302  uint32_t persistentID = Read32(D);
303
304  // A persistent ID of '0' always maps to NULL.
305  if (!persistentID)
306    return 0;
307
308  // Adjust the persistent ID by subtracting '1' so that it can be used
309  // as an index within a table in the PTH file.
310  --persistentID;
311
312  // Check if the IdentifierInfo has already been resolved.
313  if (!PersistentIDCache)
314    PersistentIDCache = new IDCache();
315
316  // FIXME: We can make this an array, but what is the performance tradeoff?
317  IdentifierInfo*& II = (*((IDCache*) PersistentIDCache))[persistentID];
318  if (II) return II;
319
320  // Look in the PTH file for the string data for the IdentifierInfo object.
321  const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
322  const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
323  assert(IDData < Buf->getBufferEnd());
324
325  // Read the length of the string.
326  uint32_t len = Read32(IDData);
327
328  // Get the IdentifierInfo* with the specified string.
329  II = &ITable.get(IDData, IDData+len);
330  return II;
331}
332
333PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
334
335  if (!FE)
336    return 0;
337
338  // Lookup the FileEntry object in our file lookup data structure.  It will
339  // return a variant that indicates whether or not there is an offset within
340  // the PTH file that contains cached tokens.
341  PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE);
342
343  if (!Off.isValid()) // No tokens available.
344    return 0;
345
346  // Compute the offset of the token data within the buffer.
347  const char* data = Buf->getBufferStart() + Off;
348  assert(data < Buf->getBufferEnd());
349  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this);
350}
351