PTHLexer.cpp revision cd223444d1680290efe11da657faafc9a1ac14ba
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the PTHLexer interface.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/Basic/TokenKinds.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Basic/IdentifierTable.h"
17#include "clang/Lex/PTHLexer.h"
18#include "clang/Lex/Preprocessor.h"
19#include "clang/Lex/PTHManager.h"
20#include "clang/Lex/Token.h"
21#include "clang/Lex/Preprocessor.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/ADT/StringMap.h"
25#include "llvm/ADT/OwningPtr.h"
26
27using namespace clang;
28
29PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D,
30                   PTHManager& PM)
31  : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
32    PTHMgr(PM),
33    NeedsFetching(true) {
34    // Make sure the EofToken is completely clean.
35    EofToken.startToken();
36  }
37
38Token PTHLexer::GetToken() {
39  // Read the next token, or if we haven't advanced yet, get the last
40  // token read.
41  if (NeedsFetching) {
42    NeedsFetching = false;
43    ReadToken(LastFetched);
44  }
45
46  Token Tok = LastFetched;
47
48  // If we are in raw mode, zero out identifier pointers.  This is
49  // needed for 'pragma poison'.  Note that this requires that the Preprocessor
50  // can go back to the original source when it calls getSpelling().
51  if (LexingRawMode && Tok.is(tok::identifier))
52    Tok.setIdentifierInfo(0);
53
54  return Tok;
55}
56
57void PTHLexer::Lex(Token& Tok) {
58LexNextToken:
59  Tok = GetToken();
60
61  if (AtLastToken()) {
62    Preprocessor *PPCache = PP;
63
64    if (LexEndOfFile(Tok))
65      return;
66
67    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
68    return PPCache->Lex(Tok);
69  }
70
71  // Don't advance to the next token yet.  Check if we are at the
72  // start of a new line and we're processing a directive.  If so, we
73  // consume this token twice, once as an tok::eom.
74  if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) {
75    ParsingPreprocessorDirective = false;
76    Tok.setKind(tok::eom);
77    MIOpt.ReadToken();
78    return;
79  }
80
81  // Advance to the next token.
82  AdvanceToken();
83
84  if (Tok.is(tok::hash)) {
85    if (Tok.isAtStartOfLine() && !LexingRawMode) {
86      LastHashTokPtr = CurPtr;
87
88      PP->HandleDirective(Tok);
89
90      if (PP->isCurrentLexer(this))
91        goto LexNextToken;
92
93      return PP->Lex(Tok);
94    }
95  }
96
97  MIOpt.ReadToken();
98
99  if (Tok.is(tok::identifier)) {
100    if (LexingRawMode) return;
101    return PP->HandleIdentifier(Tok);
102  }
103}
104
105bool PTHLexer::LexEndOfFile(Token &Tok) {
106
107  if (ParsingPreprocessorDirective) {
108    ParsingPreprocessorDirective = false;
109    Tok.setKind(tok::eom);
110    MIOpt.ReadToken();
111    return true; // Have a token.
112  }
113
114  if (LexingRawMode) {
115    MIOpt.ReadToken();
116    return true;  // Have an eof token.
117  }
118
119  // FIXME: Issue diagnostics similar to Lexer.
120  return PP->HandleEndOfFile(Tok, false);
121}
122
123void PTHLexer::setEOF(Token& Tok) {
124  assert(!EofToken.is(tok::eof));
125  Tok = EofToken;
126}
127
128void PTHLexer::DiscardToEndOfLine() {
129  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
130         "Must be in a preprocessing directive!");
131
132  // Already at end-of-file?
133  if (AtLastToken())
134    return;
135
136  // Find the first token that is not the start of the *current* line.
137  Token T;
138  for (Lex(T); !AtLastToken(); Lex(T))
139    if (GetToken().isAtStartOfLine())
140      return;
141}
142
143//===----------------------------------------------------------------------===//
144// Utility methods for reading from the mmap'ed PTH file.
145//===----------------------------------------------------------------------===//
146
147static inline uint8_t Read8(const char*& data) {
148  return (uint8_t) *(data++);
149}
150
151static inline uint32_t Read32(const char*& data) {
152  uint32_t V = (uint32_t) Read8(data);
153  V |= (((uint32_t) Read8(data)) << 8);
154  V |= (((uint32_t) Read8(data)) << 16);
155  V |= (((uint32_t) Read8(data)) << 24);
156  return V;
157}
158
159//===----------------------------------------------------------------------===//
160// Token reconstruction from the PTH file.
161//===----------------------------------------------------------------------===//
162
163void PTHLexer::ReadToken(Token& T) {
164  // Clear the token.
165  // FIXME: Setting the flags directly should obviate this step.
166  T.startToken();
167
168  // Read the type of the token.
169  T.setKind((tok::TokenKind) Read8(CurPtr));
170
171  // Set flags.  This is gross, since we are really setting multiple flags.
172  T.setFlag((Token::TokenFlags) Read8(CurPtr));
173
174  // Set the IdentifierInfo* (if any).
175  T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtr));
176
177  // Set the SourceLocation.  Since all tokens are constructed using a
178  // raw lexer, they will all be offseted from the same FileID.
179  T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr)));
180
181  // Finally, read and set the length of the token.
182  T.setLength(Read32(CurPtr));
183}
184
185//===----------------------------------------------------------------------===//
186// Internal Data Structures for PTH file lookup and resolving identifiers.
187//===----------------------------------------------------------------------===//
188
189
190/// PTHFileLookup - This internal data structure is used by the PTHManager
191///  to map from FileEntry objects managed by FileManager to offsets within
192///  the PTH file.
193namespace {
194class VISIBILITY_HIDDEN PTHFileLookup {
195public:
196  class Val {
197    uint32_t v;
198
199  public:
200    Val() : v(~0) {}
201    Val(uint32_t x) : v(x) {}
202
203    operator uint32_t() const {
204      assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized.");
205      return v;
206    }
207
208    Val& operator=(uint32_t x) { v = x; return *this; }
209    bool isValid() const { return v != ~((uint32_t)0); }
210  };
211
212private:
213  llvm::StringMap<Val> FileMap;
214
215public:
216  PTHFileLookup() {};
217
218  Val Lookup(const FileEntry* FE) {
219    const char* s = FE->getName();
220    unsigned size = strlen(s);
221    return FileMap.GetOrCreateValue(s, s+size).getValue();
222  }
223
224  void ReadTable(const char* D) {
225    uint32_t N = Read32(D);     // Read the length of the table.
226
227    for ( ; N > 0; --N) {       // The rest of the data is the table itself.
228      uint32_t len = Read32(D);
229      const char* s = D;
230      D += len;
231      FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D);
232    }
233  }
234};
235} // end anonymous namespace
236
237//===----------------------------------------------------------------------===//
238// PTHManager methods.
239//===----------------------------------------------------------------------===//
240
241PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
242                       const char* idDataTable, IdentifierInfo** perIDCache,
243                       Preprocessor& pp)
244: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
245  IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
246
247PTHManager::~PTHManager() {
248  delete Buf;
249  delete (PTHFileLookup*) FileLookup;
250  free(PerIDCache);
251}
252
253PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
254
255  // Memory map the PTH file.
256  llvm::OwningPtr<llvm::MemoryBuffer>
257  File(llvm::MemoryBuffer::getFile(file.c_str()));
258
259  if (!File)
260    return 0;
261
262  // Get the buffer ranges and check if there are at least three 32-bit
263  // words at the end of the file.
264  const char* BufBeg = File->getBufferStart();
265  const char* BufEnd = File->getBufferEnd();
266
267  if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) {
268    assert(false && "Invalid PTH file.");
269    return 0; // FIXME: Proper error diagnostic?
270  }
271
272  // Compute the address of the index table at the end of the PTH file.
273  // This table contains the offset of the file lookup table, the
274  // persistent ID -> identifer data table.
275  const char* EndTable = BufEnd - sizeof(uint32_t)*3;
276
277  // Construct the file lookup table.  This will be used for mapping from
278  // FileEntry*'s to cached tokens.
279  const char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
280  const char* FileTable = BufBeg + Read32(FileTableOffset);
281
282  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
283    assert(false && "Invalid PTH file.");
284    return 0; // FIXME: Proper error diagnostic?
285  }
286
287  llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup());
288  FL->ReadTable(FileTable);
289
290  // Get the location of the table mapping from persistent ids to the
291  // data needed to reconstruct identifiers.
292  const char* IDTableOffset = EndTable + sizeof(uint32_t)*1;
293  const char* IData = BufBeg + Read32(IDTableOffset);
294  if (!(IData > BufBeg && IData < BufEnd)) {
295    assert(false && "Invalid PTH file.");
296    return 0; // FIXME: Proper error diagnostic?
297  }
298
299  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
300  uint32_t NumIds = Read32(IData);
301
302  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
303  // so that we in the best case only zero out memory once when the OS returns
304  // us new pages.
305  IdentifierInfo** PerIDCache =
306    (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache));
307
308  if (!PerIDCache) {
309    assert(false && "Could not allocate Persistent ID cache.");
310    return 0;
311  }
312
313  // Create the new lexer.
314  return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
315}
316
317IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) {
318  // Read the persistent ID from the PTH file.
319  uint32_t persistentID = Read32(D);
320
321  // A persistent ID of '0' always maps to NULL.
322  if (!persistentID)
323    return 0;
324
325  // Adjust the persistent ID by subtracting '1' so that it can be used
326  // as an index within a table in the PTH file.
327  --persistentID;
328
329  // Check if the IdentifierInfo has already been resolved.
330  IdentifierInfo*& II = PerIDCache[persistentID];
331  if (II) return II;
332
333  // Look in the PTH file for the string data for the IdentifierInfo object.
334  const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID;
335  const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
336  assert(IDData < Buf->getBufferEnd());
337
338  // Read the length of the string.
339  uint32_t len = Read32(IDData);
340
341  // Get the IdentifierInfo* with the specified string.
342  II = &ITable.get(IDData, IDData+len);
343  return II;
344}
345
346PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
347
348  if (!FE)
349    return 0;
350
351  // Lookup the FileEntry object in our file lookup data structure.  It will
352  // return a variant that indicates whether or not there is an offset within
353  // the PTH file that contains cached tokens.
354  PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE);
355
356  if (!Off.isValid()) // No tokens available.
357    return 0;
358
359  // Compute the offset of the token data within the buffer.
360  const char* data = Buf->getBufferStart() + Off;
361  assert(data < Buf->getBufferEnd());
362  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this);
363}
364