PTHLexer.cpp revision 18d9afb815bd8aff885dd64c5078760b3398d7be
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26 27using namespace clang; 28 29#define DISK_TOKEN_SIZE (1+1+3+4+2) 30 31//===----------------------------------------------------------------------===// 32// Utility methods for reading from the mmap'ed PTH file. 33//===----------------------------------------------------------------------===// 34 35static inline uint8_t Read8(const char*& data) { 36 return (uint8_t) *(data++); 37} 38 39static inline uint32_t Read32(const char*& data) { 40 uint32_t V = (uint32_t) Read8(data); 41 V |= (((uint32_t) Read8(data)) << 8); 42 V |= (((uint32_t) Read8(data)) << 16); 43 V |= (((uint32_t) Read8(data)) << 24); 44 return V; 45} 46 47//===----------------------------------------------------------------------===// 48// PTHLexer methods. 49//===----------------------------------------------------------------------===// 50 51PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 52 const char* ppcond, PTHManager& PM) 53 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 54 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {} 55 56void PTHLexer::Lex(Token& Tok) { 57LexNextToken: 58 59 //===--------------------------------------==// 60 // Read the raw token data. 61 //===--------------------------------------==// 62 63 // Shadow CurPtr into an automatic variable. 64 const unsigned char *CurPtrShadow = (const unsigned char*) CurPtr; 65 66 // Read in the data for the token. 14 bytes in total. 67 tok::TokenKind k = (tok::TokenKind) CurPtrShadow[0]; 68 Token::TokenFlags flags = (Token::TokenFlags) CurPtrShadow[1]; 69 70 uint32_t perID = ((uint32_t) CurPtrShadow[2]) 71 | (((uint32_t) CurPtrShadow[3]) << 8) 72 | (((uint32_t) CurPtrShadow[4]) << 16); 73 74 uint32_t FileOffset = ((uint32_t) CurPtrShadow[5]) 75 | (((uint32_t) CurPtrShadow[6]) << 8) 76 | (((uint32_t) CurPtrShadow[7]) << 16) 77 | (((uint32_t) CurPtrShadow[8]) << 24); 78 79 uint32_t Len = ((uint32_t) CurPtrShadow[9]) 80 | (((uint32_t) CurPtrShadow[10]) << 8); 81 82 CurPtr = (const char*) (CurPtrShadow + DISK_TOKEN_SIZE); 83 84 //===--------------------------------------==// 85 // Construct the token itself. 86 //===--------------------------------------==// 87 88 Tok.startToken(); 89 Tok.setKind(k); 90 Tok.setFlag(flags); 91 Tok.setIdentifierInfo(perID ? PTHMgr.GetIdentifierInfo(perID-1) : 0); 92 Tok.setLocation(SourceLocation::getFileLoc(FileID, FileOffset)); 93 Tok.setLength(Len); 94 95 //===--------------------------------------==// 96 // Process the token. 97 //===--------------------------------------==// 98 99 if (Tok.is(tok::eof)) { 100 // Save the end-of-file token. 101 EofToken = Tok; 102 103 Preprocessor *PPCache = PP; 104 105 if (LexEndOfFile(Tok)) 106 return; 107 108 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 109 return PPCache->Lex(Tok); 110 } 111 112 MIOpt.ReadToken(); 113 114 if (Tok.is(tok::eom)) { 115 ParsingPreprocessorDirective = false; 116 return; 117 } 118 119#if 0 120 SourceManager& SM = PP->getSourceManager(); 121 SourceLocation L = Tok.getLocation(); 122 123 static const char* last = 0; 124 const char* next = SM.getContentCacheForLoc(L)->Entry->getName(); 125 if (next != last) { 126 last = next; 127 llvm::cerr << next << '\n'; 128 } 129 130 llvm::cerr << "line " << SM.getLogicalLineNumber(L) << " col " << 131 SM.getLogicalColumnNumber(L) << '\n'; 132#endif 133 134 if (Tok.is(tok::hash)) { 135 if (Tok.isAtStartOfLine()) { 136 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 137 if (!LexingRawMode) { 138 PP->HandleDirective(Tok); 139 140 if (PP->isCurrentLexer(this)) 141 goto LexNextToken; 142 143 return PP->Lex(Tok); 144 } 145 } 146 } 147 148 if (Tok.is(tok::identifier)) { 149 if (LexingRawMode) { 150 Tok.setIdentifierInfo(0); 151 return; 152 } 153 154 return PP->HandleIdentifier(Tok); 155 } 156 157 158 assert(!Tok.is(tok::eom) || ParsingPreprocessorDirective); 159} 160 161// FIXME: This method can just be inlined into Lex(). 162bool PTHLexer::LexEndOfFile(Token &Tok) { 163 assert(!ParsingPreprocessorDirective); 164 assert(!LexingRawMode); 165 166 // FIXME: Issue diagnostics similar to Lexer. 167 return PP->HandleEndOfFile(Tok, false); 168} 169 170// FIXME: We can just grab the last token instead of storing a copy 171// into EofToken. 172void PTHLexer::setEOF(Token& Tok) { 173 assert(!EofToken.is(tok::eof)); 174 Tok = EofToken; 175} 176 177void PTHLexer::DiscardToEndOfLine() { 178 assert(ParsingPreprocessorDirective && ParsingFilename == false && 179 "Must be in a preprocessing directive!"); 180 181 // We assume that if the preprocessor wishes to discard to the end of 182 // the line that it also means to end the current preprocessor directive. 183 ParsingPreprocessorDirective = false; 184 185 // Skip tokens by only peeking at their token kind and the flags. 186 // We don't need to actually reconstruct full tokens from the token buffer. 187 // This saves some copies and it also reduces IdentifierInfo* lookup. 188 const char* p = CurPtr; 189 while (1) { 190 // Read the token kind. Are we at the end of the file? 191 tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; 192 if (x == tok::eof) break; 193 194 // Read the token flags. Are we at the start of the next line? 195 Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; 196 if (y & Token::StartOfLine) break; 197 198 // Skip to the next token. 199 p += DISK_TOKEN_SIZE; 200 } 201 202 CurPtr = p; 203} 204 205/// SkipBlock - Used by Preprocessor to skip the current conditional block. 206bool PTHLexer::SkipBlock() { 207 assert(CurPPCondPtr && "No cached PP conditional information."); 208 assert(LastHashTokPtr && "No known '#' token."); 209 210 const char* HashEntryI = 0; 211 uint32_t Offset; 212 uint32_t TableIdx; 213 214 do { 215 // Read the token offset from the side-table. 216 Offset = Read32(CurPPCondPtr); 217 218 // Read the target table index from the side-table. 219 TableIdx = Read32(CurPPCondPtr); 220 221 // Compute the actual memory address of the '#' token data for this entry. 222 HashEntryI = TokBuf + Offset; 223 224 // Optmization: "Sibling jumping". #if...#else...#endif blocks can 225 // contain nested blocks. In the side-table we can jump over these 226 // nested blocks instead of doing a linear search if the next "sibling" 227 // entry is not at a location greater than LastHashTokPtr. 228 if (HashEntryI < LastHashTokPtr && TableIdx) { 229 // In the side-table we are still at an entry for a '#' token that 230 // is earlier than the last one we saw. Check if the location we would 231 // stride gets us closer. 232 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 233 assert(NextPPCondPtr >= CurPPCondPtr); 234 // Read where we should jump to. 235 uint32_t TmpOffset = Read32(NextPPCondPtr); 236 const char* HashEntryJ = TokBuf + TmpOffset; 237 238 if (HashEntryJ <= LastHashTokPtr) { 239 // Jump directly to the next entry in the side table. 240 HashEntryI = HashEntryJ; 241 Offset = TmpOffset; 242 TableIdx = Read32(NextPPCondPtr); 243 CurPPCondPtr = NextPPCondPtr; 244 } 245 } 246 } 247 while (HashEntryI < LastHashTokPtr); 248 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 249 assert(TableIdx && "No jumping from #endifs."); 250 251 // Update our side-table iterator. 252 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 253 assert(NextPPCondPtr >= CurPPCondPtr); 254 CurPPCondPtr = NextPPCondPtr; 255 256 // Read where we should jump to. 257 HashEntryI = TokBuf + Read32(NextPPCondPtr); 258 uint32_t NextIdx = Read32(NextPPCondPtr); 259 260 // By construction NextIdx will be zero if this is a #endif. This is useful 261 // to know to obviate lexing another token. 262 bool isEndif = NextIdx == 0; 263 264 // This case can occur when we see something like this: 265 // 266 // #if ... 267 // /* a comment or nothing */ 268 // #elif 269 // 270 // If we are skipping the first #if block it will be the case that CurPtr 271 // already points 'elif'. Just return. 272 273 if (CurPtr > HashEntryI) { 274 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 275 // Did we reach a #endif? If so, go ahead and consume that token as well. 276 if (isEndif) 277 CurPtr += DISK_TOKEN_SIZE*2; 278 else 279 LastHashTokPtr = HashEntryI; 280 281 return isEndif; 282 } 283 284 // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 285 CurPtr = HashEntryI; 286 287 // Update the location of the last observed '#'. This is useful if we 288 // are skipping multiple blocks. 289 LastHashTokPtr = CurPtr; 290 291 // Skip the '#' token. 292 assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash); 293 CurPtr += DISK_TOKEN_SIZE; 294 295 // Did we reach a #endif? If so, go ahead and consume that token as well. 296 if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } 297 298 return isEndif; 299} 300 301SourceLocation PTHLexer::getSourceLocation() { 302 // getLocation is not on the hot path. It is used to get the location of 303 // the next token when transitioning back to this lexer when done 304 // handling a #included file. Just read the necessary data from the token 305 // data buffer to construct the SourceLocation object. 306 // NOTE: This is a virtual function; hence it is defined out-of-line. 307 const char* p = CurPtr + (1 + 1 + 4); 308 uint32_t offset = 309 ((uint32_t) ((uint8_t) p[0])) 310 | (((uint32_t) ((uint8_t) p[1])) << 8) 311 | (((uint32_t) ((uint8_t) p[2])) << 16) 312 | (((uint32_t) ((uint8_t) p[3])) << 24); 313 return SourceLocation::getFileLoc(FileID, offset); 314} 315 316//===----------------------------------------------------------------------===// 317// Internal Data Structures for PTH file lookup and resolving identifiers. 318//===----------------------------------------------------------------------===// 319 320 321/// PTHFileLookup - This internal data structure is used by the PTHManager 322/// to map from FileEntry objects managed by FileManager to offsets within 323/// the PTH file. 324namespace { 325class VISIBILITY_HIDDEN PTHFileLookup { 326public: 327 class Val { 328 uint32_t TokenOff; 329 uint32_t PPCondOff; 330 331 public: 332 Val() : TokenOff(~0) {} 333 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {} 334 335 uint32_t getTokenOffset() const { 336 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 337 return TokenOff; 338 } 339 340 uint32_t gettPPCondOffset() const { 341 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 342 return PPCondOff; 343 } 344 345 bool isValid() const { return TokenOff != ~((uint32_t)0); } 346 }; 347 348private: 349 llvm::StringMap<Val> FileMap; 350 351public: 352 PTHFileLookup() {}; 353 354 Val Lookup(const FileEntry* FE) { 355 const char* s = FE->getName(); 356 unsigned size = strlen(s); 357 return FileMap.GetOrCreateValue(s, s+size).getValue(); 358 } 359 360 void ReadTable(const char* D) { 361 uint32_t N = Read32(D); // Read the length of the table. 362 363 for ( ; N > 0; --N) { // The rest of the data is the table itself. 364 uint32_t len = Read32(D); 365 const char* s = D; 366 D += len; 367 uint32_t TokenOff = Read32(D); 368 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D)); 369 } 370 } 371}; 372} // end anonymous namespace 373 374//===----------------------------------------------------------------------===// 375// PTHManager methods. 376//===----------------------------------------------------------------------===// 377 378PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 379 const char* idDataTable, IdentifierInfo** perIDCache, 380 Preprocessor& pp) 381: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 382 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 383 384PTHManager::~PTHManager() { 385 delete Buf; 386 delete (PTHFileLookup*) FileLookup; 387 free(PerIDCache); 388} 389 390PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 391 392 // Memory map the PTH file. 393 llvm::OwningPtr<llvm::MemoryBuffer> 394 File(llvm::MemoryBuffer::getFile(file.c_str())); 395 396 if (!File) 397 return 0; 398 399 // Get the buffer ranges and check if there are at least three 32-bit 400 // words at the end of the file. 401 const char* BufBeg = File->getBufferStart(); 402 const char* BufEnd = File->getBufferEnd(); 403 404 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 405 assert(false && "Invalid PTH file."); 406 return 0; // FIXME: Proper error diagnostic? 407 } 408 409 // Compute the address of the index table at the end of the PTH file. 410 // This table contains the offset of the file lookup table, the 411 // persistent ID -> identifer data table. 412 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 413 414 // Construct the file lookup table. This will be used for mapping from 415 // FileEntry*'s to cached tokens. 416 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 417 const char* FileTable = BufBeg + Read32(FileTableOffset); 418 419 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 420 assert(false && "Invalid PTH file."); 421 return 0; // FIXME: Proper error diagnostic? 422 } 423 424 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 425 FL->ReadTable(FileTable); 426 427 // Get the location of the table mapping from persistent ids to the 428 // data needed to reconstruct identifiers. 429 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 430 const char* IData = BufBeg + Read32(IDTableOffset); 431 if (!(IData > BufBeg && IData < BufEnd)) { 432 assert(false && "Invalid PTH file."); 433 return 0; // FIXME: Proper error diagnostic? 434 } 435 436 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 437 uint32_t NumIds = Read32(IData); 438 439 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 440 // so that we in the best case only zero out memory once when the OS returns 441 // us new pages. 442 IdentifierInfo** PerIDCache = 443 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache)); 444 445 if (!PerIDCache) { 446 assert(false && "Could not allocate Persistent ID cache."); 447 return 0; 448 } 449 450 // Create the new lexer. 451 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); 452} 453 454IdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) { 455 456 // Check if the IdentifierInfo has already been resolved. 457 IdentifierInfo*& II = PerIDCache[persistentID]; 458 if (II) return II; 459 460 // Look in the PTH file for the string data for the IdentifierInfo object. 461 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 462 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 463 assert(IDData < Buf->getBufferEnd()); 464 465 // Read the length of the string. 466 uint32_t len = Read32(IDData); 467 468 // Get the IdentifierInfo* with the specified string. 469 II = &ITable.get(IDData, IDData+len); 470 return II; 471} 472 473PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 474 475 if (!FE) 476 return 0; 477 478 // Lookup the FileEntry object in our file lookup data structure. It will 479 // return a variant that indicates whether or not there is an offset within 480 // the PTH file that contains cached tokens. 481 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE); 482 483 if (!FileData.isValid()) // No tokens available. 484 return 0; 485 486 // Compute the offset of the token data within the buffer. 487 const char* data = Buf->getBufferStart() + FileData.getTokenOffset(); 488 489 // Get the location of pp-conditional table. 490 const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset(); 491 uint32_t len = Read32(ppcond); 492 if (len == 0) ppcond = 0; 493 494 assert(data < Buf->getBufferEnd()); 495 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, 496 *this); 497} 498