PTHLexer.cpp revision b60d7999d621fce608e03d39e82c0e7eda750054
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26 27using namespace clang; 28 29#define DISK_TOKEN_SIZE (2+3*4) 30 31PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 32 const char* ppcond, PTHManager& PM) 33 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 34 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM), NeedsFetching(true) { 35 // Make sure the EofToken is completely clean. 36 EofToken.startToken(); 37 } 38 39Token PTHLexer::GetToken() { 40 // Read the next token, or if we haven't advanced yet, get the last 41 // token read. 42 if (NeedsFetching) { 43 NeedsFetching = false; 44 ReadToken(LastFetched); 45 } 46 47 Token Tok = LastFetched; 48 49 // If we are in raw mode, zero out identifier pointers. This is 50 // needed for 'pragma poison'. Note that this requires that the Preprocessor 51 // can go back to the original source when it calls getSpelling(). 52 if (LexingRawMode && Tok.is(tok::identifier)) 53 Tok.setIdentifierInfo(0); 54 55 return Tok; 56} 57 58void PTHLexer::Lex(Token& Tok) { 59LexNextToken: 60 Tok = GetToken(); 61 62 if (AtLastToken()) { 63 Preprocessor *PPCache = PP; 64 65 if (LexEndOfFile(Tok)) 66 return; 67 68 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 69 return PPCache->Lex(Tok); 70 } 71 72 // Don't advance to the next token yet. Check if we are at the 73 // start of a new line and we're processing a directive. If so, we 74 // consume this token twice, once as an tok::eom. 75 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) { 76 ParsingPreprocessorDirective = false; 77 Tok.setKind(tok::eom); 78 MIOpt.ReadToken(); 79 return; 80 } 81 82 // Advance to the next token. 83 AdvanceToken(); 84 85 if (Tok.is(tok::hash)) { 86 if (Tok.isAtStartOfLine()) { 87 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 88 if (!LexingRawMode) { 89 PP->HandleDirective(Tok); 90 91 if (PP->isCurrentLexer(this)) 92 goto LexNextToken; 93 94 return PP->Lex(Tok); 95 } 96 } 97 } 98 99 MIOpt.ReadToken(); 100 101 if (Tok.is(tok::identifier)) { 102 if (LexingRawMode) return; 103 return PP->HandleIdentifier(Tok); 104 } 105} 106 107bool PTHLexer::LexEndOfFile(Token &Tok) { 108 109 if (ParsingPreprocessorDirective) { 110 ParsingPreprocessorDirective = false; 111 Tok.setKind(tok::eom); 112 MIOpt.ReadToken(); 113 return true; // Have a token. 114 } 115 116 if (LexingRawMode) { 117 MIOpt.ReadToken(); 118 return true; // Have an eof token. 119 } 120 121 // FIXME: Issue diagnostics similar to Lexer. 122 return PP->HandleEndOfFile(Tok, false); 123} 124 125void PTHLexer::setEOF(Token& Tok) { 126 assert(!EofToken.is(tok::eof)); 127 Tok = EofToken; 128} 129 130void PTHLexer::DiscardToEndOfLine() { 131 assert(ParsingPreprocessorDirective && ParsingFilename == false && 132 "Must be in a preprocessing directive!"); 133 134 // Already at end-of-file? 135 if (AtLastToken()) 136 return; 137 138 // Find the first token that is not the start of the *current* line. 139 Token T; 140 for (Lex(T); !AtLastToken(); Lex(T)) 141 if (GetToken().isAtStartOfLine()) 142 return; 143} 144 145//===----------------------------------------------------------------------===// 146// Utility methods for reading from the mmap'ed PTH file. 147//===----------------------------------------------------------------------===// 148 149static inline uint8_t Read8(const char*& data) { 150 return (uint8_t) *(data++); 151} 152 153static inline uint32_t Read32(const char*& data) { 154 uint32_t V = (uint32_t) Read8(data); 155 V |= (((uint32_t) Read8(data)) << 8); 156 V |= (((uint32_t) Read8(data)) << 16); 157 V |= (((uint32_t) Read8(data)) << 24); 158 return V; 159} 160 161/// SkipBlock - Used by Preprocessor to skip the current conditional block. 162bool PTHLexer::SkipBlock() { 163 assert(CurPPCondPtr && "No cached PP conditional information."); 164 assert(LastHashTokPtr && "No known '#' token."); 165 166 const char* HashEntryI = 0; 167 uint32_t Offset; 168 uint32_t TableIdx; 169 170 do { 171 // Read the token offset from the side-table. 172 Offset = Read32(CurPPCondPtr); 173 174 // Read the target table index from the side-table. 175 TableIdx = Read32(CurPPCondPtr); 176 177 // Compute the actual memory address of the '#' token data for this entry. 178 HashEntryI = TokBuf + Offset; 179 180 // Optmization: "Sibling jumping". #if...#else...#endif blocks can 181 // contain nested blocks. In the side-table we can jump over these 182 // nested blocks instead of doing a linear search if the next "sibling" 183 // entry is not at a location greater than LastHashTokPtr. 184 if (HashEntryI < LastHashTokPtr && TableIdx) { 185 // In the side-table we are still at an entry for a '#' token that 186 // is earlier than the last one we saw. Check if the location we would 187 // stride gets us closer. 188 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 189 assert(NextPPCondPtr >= CurPPCondPtr); 190 // Read where we should jump to. 191 uint32_t TmpOffset = Read32(NextPPCondPtr); 192 const char* HashEntryJ = TokBuf + TmpOffset; 193 194 if (HashEntryJ <= LastHashTokPtr) { 195 // Jump directly to the next entry in the side table. 196 HashEntryI = HashEntryJ; 197 Offset = TmpOffset; 198 TableIdx = Read32(NextPPCondPtr); 199 CurPPCondPtr = NextPPCondPtr; 200 } 201 } 202 } 203 while (HashEntryI < LastHashTokPtr); 204 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 205 assert(TableIdx && "No jumping from #endifs."); 206 207 // Update our side-table iterator. 208 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 209 assert(NextPPCondPtr >= CurPPCondPtr); 210 CurPPCondPtr = NextPPCondPtr; 211 212 // Read where we should jump to. 213 HashEntryI = TokBuf + Read32(NextPPCondPtr); 214 uint32_t NextIdx = Read32(NextPPCondPtr); 215 216 // By construction NextIdx will be zero if this is a #endif. This is useful 217 // to know to obviate lexing another token. 218 bool isEndif = NextIdx == 0; 219 NeedsFetching = true; 220 221 // This case can occur when we see something like this: 222 // 223 // #if ... 224 // /* a comment or nothing */ 225 // #elif 226 // 227 // If we are skipping the first #if block it will be the case that CurPtr 228 // already points 'elif'. Just return. 229 230 if (CurPtr > HashEntryI) { 231 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 232 // Did we reach a #endif? If so, go ahead and consume that token as well. 233 if (isEndif) 234 CurPtr += DISK_TOKEN_SIZE; 235 else 236 LastHashTokPtr = HashEntryI; 237 238 return isEndif; 239 } 240 241 // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 242 CurPtr = HashEntryI; 243 244 // Update the location of the last observed '#'. This is useful if we 245 // are skipping multiple blocks. 246 LastHashTokPtr = CurPtr; 247 248#ifndef DEBUG 249 // In a debug build we should verify that the token is really a '#' that 250 // appears at the start of the line. 251 Token Tok; 252 ReadToken(Tok); 253 assert(Tok.isAtStartOfLine() && Tok.is(tok::hash)); 254#else 255 // In a full release build we can just skip the token entirely. 256 CurPtr += DISK_TOKEN_SIZE; 257#endif 258 259 // Did we reach a #endif? If so, go ahead and consume that token as well. 260 if (isEndif) { CurPtr += DISK_TOKEN_SIZE; } 261 262 return isEndif; 263} 264 265//===----------------------------------------------------------------------===// 266// Token reconstruction from the PTH file. 267//===----------------------------------------------------------------------===// 268 269void PTHLexer::ReadToken(Token& T) { 270 // Clear the token. 271 // FIXME: Setting the flags directly should obviate this step. 272 T.startToken(); 273 274 // Shadow CurPtr into an automatic variable so that Read8 doesn't load and 275 // store back into the instance variable. 276 const char *CurPtrShadow = CurPtr; 277 278 // Read the type of the token. 279 T.setKind((tok::TokenKind) Read8(CurPtrShadow)); 280 281 // Set flags. This is gross, since we are really setting multiple flags. 282 T.setFlag((Token::TokenFlags) Read8(CurPtrShadow)); 283 284 // Set the IdentifierInfo* (if any). 285 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtrShadow)); 286 287 // Set the SourceLocation. Since all tokens are constructed using a 288 // raw lexer, they will all be offseted from the same FileID. 289 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtrShadow))); 290 291 // Finally, read and set the length of the token. 292 T.setLength(Read32(CurPtrShadow)); 293 294 CurPtr = CurPtrShadow; 295} 296 297//===----------------------------------------------------------------------===// 298// Internal Data Structures for PTH file lookup and resolving identifiers. 299//===----------------------------------------------------------------------===// 300 301 302/// PTHFileLookup - This internal data structure is used by the PTHManager 303/// to map from FileEntry objects managed by FileManager to offsets within 304/// the PTH file. 305namespace { 306class VISIBILITY_HIDDEN PTHFileLookup { 307public: 308 class Val { 309 uint32_t TokenOff; 310 uint32_t PPCondOff; 311 312 public: 313 Val() : TokenOff(~0) {} 314 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {} 315 316 uint32_t getTokenOffset() const { 317 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 318 return TokenOff; 319 } 320 321 uint32_t gettPPCondOffset() const { 322 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 323 return PPCondOff; 324 } 325 326 bool isValid() const { return TokenOff != ~((uint32_t)0); } 327 }; 328 329private: 330 llvm::StringMap<Val> FileMap; 331 332public: 333 PTHFileLookup() {}; 334 335 Val Lookup(const FileEntry* FE) { 336 const char* s = FE->getName(); 337 unsigned size = strlen(s); 338 return FileMap.GetOrCreateValue(s, s+size).getValue(); 339 } 340 341 void ReadTable(const char* D) { 342 uint32_t N = Read32(D); // Read the length of the table. 343 344 for ( ; N > 0; --N) { // The rest of the data is the table itself. 345 uint32_t len = Read32(D); 346 const char* s = D; 347 D += len; 348 uint32_t TokenOff = Read32(D); 349 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D)); 350 } 351 } 352}; 353} // end anonymous namespace 354 355//===----------------------------------------------------------------------===// 356// PTHManager methods. 357//===----------------------------------------------------------------------===// 358 359PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 360 const char* idDataTable, IdentifierInfo** perIDCache, 361 Preprocessor& pp) 362: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 363 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 364 365PTHManager::~PTHManager() { 366 delete Buf; 367 delete (PTHFileLookup*) FileLookup; 368 free(PerIDCache); 369} 370 371PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 372 373 // Memory map the PTH file. 374 llvm::OwningPtr<llvm::MemoryBuffer> 375 File(llvm::MemoryBuffer::getFile(file.c_str())); 376 377 if (!File) 378 return 0; 379 380 // Get the buffer ranges and check if there are at least three 32-bit 381 // words at the end of the file. 382 const char* BufBeg = File->getBufferStart(); 383 const char* BufEnd = File->getBufferEnd(); 384 385 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 386 assert(false && "Invalid PTH file."); 387 return 0; // FIXME: Proper error diagnostic? 388 } 389 390 // Compute the address of the index table at the end of the PTH file. 391 // This table contains the offset of the file lookup table, the 392 // persistent ID -> identifer data table. 393 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 394 395 // Construct the file lookup table. This will be used for mapping from 396 // FileEntry*'s to cached tokens. 397 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 398 const char* FileTable = BufBeg + Read32(FileTableOffset); 399 400 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 401 assert(false && "Invalid PTH file."); 402 return 0; // FIXME: Proper error diagnostic? 403 } 404 405 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 406 FL->ReadTable(FileTable); 407 408 // Get the location of the table mapping from persistent ids to the 409 // data needed to reconstruct identifiers. 410 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 411 const char* IData = BufBeg + Read32(IDTableOffset); 412 if (!(IData > BufBeg && IData < BufEnd)) { 413 assert(false && "Invalid PTH file."); 414 return 0; // FIXME: Proper error diagnostic? 415 } 416 417 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 418 uint32_t NumIds = Read32(IData); 419 420 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 421 // so that we in the best case only zero out memory once when the OS returns 422 // us new pages. 423 IdentifierInfo** PerIDCache = 424 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache)); 425 426 if (!PerIDCache) { 427 assert(false && "Could not allocate Persistent ID cache."); 428 return 0; 429 } 430 431 // Create the new lexer. 432 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); 433} 434 435IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) { 436 // Read the persistent ID from the PTH file. 437 uint32_t persistentID = Read32(D); 438 439 // A persistent ID of '0' always maps to NULL. 440 if (!persistentID) 441 return 0; 442 443 // Adjust the persistent ID by subtracting '1' so that it can be used 444 // as an index within a table in the PTH file. 445 --persistentID; 446 447 // Check if the IdentifierInfo has already been resolved. 448 IdentifierInfo*& II = PerIDCache[persistentID]; 449 if (II) return II; 450 451 // Look in the PTH file for the string data for the IdentifierInfo object. 452 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 453 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 454 assert(IDData < Buf->getBufferEnd()); 455 456 // Read the length of the string. 457 uint32_t len = Read32(IDData); 458 459 // Get the IdentifierInfo* with the specified string. 460 II = &ITable.get(IDData, IDData+len); 461 return II; 462} 463 464PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 465 466 if (!FE) 467 return 0; 468 469 // Lookup the FileEntry object in our file lookup data structure. It will 470 // return a variant that indicates whether or not there is an offset within 471 // the PTH file that contains cached tokens. 472 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE); 473 474 if (!FileData.isValid()) // No tokens available. 475 return 0; 476 477 // Compute the offset of the token data within the buffer. 478 const char* data = Buf->getBufferStart() + FileData.getTokenOffset(); 479 480 // Get the location of pp-conditional table. 481 const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset(); 482 uint32_t len = Read32(ppcond); 483 if (len == 0) ppcond = 0; 484 485 assert(data < Buf->getBufferEnd()); 486 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, 487 *this); 488} 489