PTHLexer.cpp revision 41a2660377d215d004fe413c03874bd066b5384c
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26 27using namespace clang; 28 29#define DISK_TOKEN_SIZE (2+3*4) 30 31PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 32 const char* ppcond, PTHManager& PM) 33 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 34 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) { 35 // Make sure the EofToken is completely clean. 36 EofToken.startToken(); 37 } 38 39Token PTHLexer::GetToken() { 40 // Read the next token, or if we haven't advanced yet, get the last 41 // token read. 42 if (NeedsFetching) { 43 NeedsFetching = false; 44 ReadToken(LastFetched); 45 } 46 47 Token Tok = LastFetched; 48 49 // If we are in raw mode, zero out identifier pointers. This is 50 // needed for 'pragma poison'. Note that this requires that the Preprocessor 51 // can go back to the original source when it calls getSpelling(). 52 if (LexingRawMode && Tok.is(tok::identifier)) 53 Tok.setIdentifierInfo(0); 54 55 return Tok; 56} 57 58void PTHLexer::Lex(Token& Tok) { 59LexNextToken: 60 Tok = GetToken(); 61 62 if (AtLastToken()) { 63 Preprocessor *PPCache = PP; 64 65 if (LexEndOfFile(Tok)) 66 return; 67 68 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 69 return PPCache->Lex(Tok); 70 } 71 72 // Don't advance to the next token yet. Check if we are at the 73 // start of a new line and we're processing a directive. If so, we 74 // consume this token twice, once as an tok::eom. 75 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) { 76 ParsingPreprocessorDirective = false; 77 Tok.setKind(tok::eom); 78 MIOpt.ReadToken(); 79 return; 80 } 81 82 // Advance to the next token. 83 AdvanceToken(); 84 85 if (Tok.is(tok::hash)) { 86 if (Tok.isAtStartOfLine()) { 87 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 88 if (!LexingRawMode) { 89 PP->HandleDirective(Tok); 90 91 if (PP->isCurrentLexer(this)) 92 goto LexNextToken; 93 94 return PP->Lex(Tok); 95 } 96 } 97 } 98 99 MIOpt.ReadToken(); 100 101 if (Tok.is(tok::identifier)) { 102 if (LexingRawMode) return; 103 return PP->HandleIdentifier(Tok); 104 } 105} 106 107bool PTHLexer::LexEndOfFile(Token &Tok) { 108 109 if (ParsingPreprocessorDirective) { 110 ParsingPreprocessorDirective = false; 111 Tok.setKind(tok::eom); 112 MIOpt.ReadToken(); 113 return true; // Have a token. 114 } 115 116 if (LexingRawMode) { 117 MIOpt.ReadToken(); 118 return true; // Have an eof token. 119 } 120 121 // FIXME: Issue diagnostics similar to Lexer. 122 return PP->HandleEndOfFile(Tok, false); 123} 124 125void PTHLexer::setEOF(Token& Tok) { 126 assert(!EofToken.is(tok::eof)); 127 Tok = EofToken; 128} 129 130void PTHLexer::DiscardToEndOfLine() { 131 assert(ParsingPreprocessorDirective && ParsingFilename == false && 132 "Must be in a preprocessing directive!"); 133 134 // Already at end-of-file? 135 if (AtLastToken()) 136 return; 137 138 // Find the first token that is not the start of the *current* line. 139 Token T; 140 for (Lex(T); !AtLastToken(); Lex(T)) 141 if (GetToken().isAtStartOfLine()) 142 return; 143} 144 145//===----------------------------------------------------------------------===// 146// Utility methods for reading from the mmap'ed PTH file. 147//===----------------------------------------------------------------------===// 148 149static inline uint8_t Read8(const char*& data) { 150 return (uint8_t) *(data++); 151} 152 153static inline uint32_t Read32(const char*& data) { 154 uint32_t V = (uint32_t) Read8(data); 155 V |= (((uint32_t) Read8(data)) << 8); 156 V |= (((uint32_t) Read8(data)) << 16); 157 V |= (((uint32_t) Read8(data)) << 24); 158 return V; 159} 160 161/// SkipBlock - Used by Preprocessor to skip the current conditional block. 162bool PTHLexer::SkipBlock() { 163 assert(CurPPCondPtr && "No cached PP conditional information."); 164 assert(LastHashTokPtr && "No known '#' token."); 165 166 const char* HashEntryI = 0; 167 uint32_t Offset; 168 uint32_t TableIdx; 169 170 do { 171 // Read the token offset from the side-table. 172 Offset = Read32(CurPPCondPtr); 173 174 // Read the target table index from the side-table. 175 TableIdx = Read32(CurPPCondPtr); 176 177 // Compute the actual memory address of the '#' token data for this entry. 178 HashEntryI = TokBuf + Offset; 179 180 // Optmization: "Sibling jumping". #if...#else...#endif blocks can 181 // contain nested blocks. In the side-table we can jump over these 182 // nested blocks instead of doing a linear search if the next "sibling" 183 // entry is not at a location greater than LastHashTokPtr. 184 if (HashEntryI < LastHashTokPtr && TableIdx) { 185 // In the side-table we are still at an entry for a '#' token that 186 // is earlier than the last one we saw. Check if the location we would 187 // stride gets us closer. 188 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 189 assert(NextPPCondPtr >= CurPPCondPtr); 190 // Read where we should jump to. 191 uint32_t TmpOffset = Read32(NextPPCondPtr); 192 const char* HashEntryJ = TokBuf + TmpOffset; 193 194 if (HashEntryJ <= LastHashTokPtr) { 195 // Jump directly to the next entry in the side table. 196 HashEntryI = HashEntryJ; 197 Offset = TmpOffset; 198 TableIdx = Read32(NextPPCondPtr); 199 CurPPCondPtr = NextPPCondPtr; 200 } 201 } 202 } 203 while (HashEntryI < LastHashTokPtr); 204 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 205 assert(TableIdx && "No jumping from #endifs."); 206 207 // Update our side-table iterator. 208 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 209 assert(NextPPCondPtr >= CurPPCondPtr); 210 CurPPCondPtr = NextPPCondPtr; 211 212 // Read where we should jump to. 213 HashEntryI = TokBuf + Read32(NextPPCondPtr); 214 uint32_t NextIdx = Read32(NextPPCondPtr); 215 216 // By construction NextIdx will be zero if this is a #endif. This is useful 217 // to know to obviate lexing another token. 218 bool isEndif = NextIdx == 0; 219 NeedsFetching = true; 220 221 // This case can occur when we see something like this: 222 // 223 // #if ... 224 // /* a comment or nothing */ 225 // #elif 226 // 227 // If we are skipping the first #if block it will be the case that CurPtr 228 // already points 'elif'. Just return. 229 230 if (CurPtr > HashEntryI) { 231 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 232 // Did we reach a #endif? If so, go ahead and consume that token as well. 233 if (isEndif) 234 CurPtr += DISK_TOKEN_SIZE; 235 else 236 LastHashTokPtr = HashEntryI; 237 238 return isEndif; 239 } 240 241 // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 242 CurPtr = HashEntryI; 243 244 // Update the location of the last observed '#'. This is useful if we 245 // are skipping multiple blocks. 246 LastHashTokPtr = CurPtr; 247 248#ifndef DEBUG 249 // In a debug build we should verify that the token is really a '#' that 250 // appears at the start of the line. 251 Token Tok; 252 ReadToken(Tok); 253 assert(Tok.isAtStartOfLine() && Tok.is(tok::hash)); 254#else 255 // In a full release build we can just skip the token entirely. 256 CurPtr += DISK_TOKEN_SIZE; 257#endif 258 259 // Did we reach a #endif? If so, go ahead and consume that token as well. 260 if (isEndif) { CurPtr += DISK_TOKEN_SIZE; } 261 262 return isEndif; 263} 264 265//===----------------------------------------------------------------------===// 266// Token reconstruction from the PTH file. 267//===----------------------------------------------------------------------===// 268 269void PTHLexer::ReadToken(Token& T) { 270 // Clear the token. 271 // FIXME: Setting the flags directly should obviate this step. 272 T.startToken(); 273 274 // Read the type of the token. 275 T.setKind((tok::TokenKind) Read8(CurPtr)); 276 277 // Set flags. This is gross, since we are really setting multiple flags. 278 T.setFlag((Token::TokenFlags) Read8(CurPtr)); 279 280 // Set the IdentifierInfo* (if any). 281 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtr)); 282 283 // Set the SourceLocation. Since all tokens are constructed using a 284 // raw lexer, they will all be offseted from the same FileID. 285 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr))); 286 287 // Finally, read and set the length of the token. 288 T.setLength(Read32(CurPtr)); 289} 290 291//===----------------------------------------------------------------------===// 292// Internal Data Structures for PTH file lookup and resolving identifiers. 293//===----------------------------------------------------------------------===// 294 295 296/// PTHFileLookup - This internal data structure is used by the PTHManager 297/// to map from FileEntry objects managed by FileManager to offsets within 298/// the PTH file. 299namespace { 300class VISIBILITY_HIDDEN PTHFileLookup { 301public: 302 class Val { 303 uint32_t TokenOff; 304 uint32_t PPCondOff; 305 306 public: 307 Val() : TokenOff(~0) {} 308 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {} 309 310 uint32_t getTokenOffset() const { 311 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 312 return TokenOff; 313 } 314 315 uint32_t gettPPCondOffset() const { 316 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 317 return PPCondOff; 318 } 319 320 bool isValid() const { return TokenOff != ~((uint32_t)0); } 321 }; 322 323private: 324 llvm::StringMap<Val> FileMap; 325 326public: 327 PTHFileLookup() {}; 328 329 Val Lookup(const FileEntry* FE) { 330 const char* s = FE->getName(); 331 unsigned size = strlen(s); 332 return FileMap.GetOrCreateValue(s, s+size).getValue(); 333 } 334 335 void ReadTable(const char* D) { 336 uint32_t N = Read32(D); // Read the length of the table. 337 338 for ( ; N > 0; --N) { // The rest of the data is the table itself. 339 uint32_t len = Read32(D); 340 const char* s = D; 341 D += len; 342 uint32_t TokenOff = Read32(D); 343 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D)); 344 } 345 } 346}; 347} // end anonymous namespace 348 349//===----------------------------------------------------------------------===// 350// PTHManager methods. 351//===----------------------------------------------------------------------===// 352 353PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 354 const char* idDataTable, IdentifierInfo** perIDCache, 355 Preprocessor& pp) 356: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 357 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 358 359PTHManager::~PTHManager() { 360 delete Buf; 361 delete (PTHFileLookup*) FileLookup; 362 free(PerIDCache); 363} 364 365PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 366 367 // Memory map the PTH file. 368 llvm::OwningPtr<llvm::MemoryBuffer> 369 File(llvm::MemoryBuffer::getFile(file.c_str())); 370 371 if (!File) 372 return 0; 373 374 // Get the buffer ranges and check if there are at least three 32-bit 375 // words at the end of the file. 376 const char* BufBeg = File->getBufferStart(); 377 const char* BufEnd = File->getBufferEnd(); 378 379 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 380 assert(false && "Invalid PTH file."); 381 return 0; // FIXME: Proper error diagnostic? 382 } 383 384 // Compute the address of the index table at the end of the PTH file. 385 // This table contains the offset of the file lookup table, the 386 // persistent ID -> identifer data table. 387 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 388 389 // Construct the file lookup table. This will be used for mapping from 390 // FileEntry*'s to cached tokens. 391 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 392 const char* FileTable = BufBeg + Read32(FileTableOffset); 393 394 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 395 assert(false && "Invalid PTH file."); 396 return 0; // FIXME: Proper error diagnostic? 397 } 398 399 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 400 FL->ReadTable(FileTable); 401 402 // Get the location of the table mapping from persistent ids to the 403 // data needed to reconstruct identifiers. 404 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 405 const char* IData = BufBeg + Read32(IDTableOffset); 406 if (!(IData > BufBeg && IData < BufEnd)) { 407 assert(false && "Invalid PTH file."); 408 return 0; // FIXME: Proper error diagnostic? 409 } 410 411 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 412 uint32_t NumIds = Read32(IData); 413 414 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 415 // so that we in the best case only zero out memory once when the OS returns 416 // us new pages. 417 IdentifierInfo** PerIDCache = 418 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache)); 419 420 if (!PerIDCache) { 421 assert(false && "Could not allocate Persistent ID cache."); 422 return 0; 423 } 424 425 // Create the new lexer. 426 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); 427} 428 429IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) { 430 // Read the persistent ID from the PTH file. 431 uint32_t persistentID = Read32(D); 432 433 // A persistent ID of '0' always maps to NULL. 434 if (!persistentID) 435 return 0; 436 437 // Adjust the persistent ID by subtracting '1' so that it can be used 438 // as an index within a table in the PTH file. 439 --persistentID; 440 441 // Check if the IdentifierInfo has already been resolved. 442 IdentifierInfo*& II = PerIDCache[persistentID]; 443 if (II) return II; 444 445 // Look in the PTH file for the string data for the IdentifierInfo object. 446 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 447 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 448 assert(IDData < Buf->getBufferEnd()); 449 450 // Read the length of the string. 451 uint32_t len = Read32(IDData); 452 453 // Get the IdentifierInfo* with the specified string. 454 II = &ITable.get(IDData, IDData+len); 455 return II; 456} 457 458PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 459 460 if (!FE) 461 return 0; 462 463 // Lookup the FileEntry object in our file lookup data structure. It will 464 // return a variant that indicates whether or not there is an offset within 465 // the PTH file that contains cached tokens. 466 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE); 467 468 if (!FileData.isValid()) // No tokens available. 469 return 0; 470 471 // Compute the offset of the token data within the buffer. 472 const char* data = Buf->getBufferStart() + FileData.getTokenOffset(); 473 474 // Get the location of pp-conditional table. 475 const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset(); 476 uint32_t len = Read32(ppcond); 477 if (len == 0) ppcond = 0; 478 479 assert(data < Buf->getBufferEnd()); 480 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, 481 *this); 482} 483