PTHLexer.cpp revision 59d08cb672136322375e5400578ee1fbd0947de2
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26 27using namespace clang; 28 29#define DISK_TOKEN_SIZE (1+1+3+4+2) 30 31//===----------------------------------------------------------------------===// 32// Utility methods for reading from the mmap'ed PTH file. 33//===----------------------------------------------------------------------===// 34 35static inline uint8_t Read8(const char*& data) { 36 return (uint8_t) *(data++); 37} 38 39static inline uint32_t Read32(const char*& data) { 40 uint32_t V = (uint32_t) Read8(data); 41 V |= (((uint32_t) Read8(data)) << 8); 42 V |= (((uint32_t) Read8(data)) << 16); 43 V |= (((uint32_t) Read8(data)) << 24); 44 return V; 45} 46 47//===----------------------------------------------------------------------===// 48// PTHLexer methods. 49//===----------------------------------------------------------------------===// 50 51PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 52 const char* ppcond, PTHManager& PM) 53 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 54 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {} 55 56void PTHLexer::Lex(Token& Tok) { 57LexNextToken: 58 59 //===--------------------------------------==// 60 // Read the raw token data. 61 //===--------------------------------------==// 62 63 // Shadow CurPtr into an automatic variable. 64 const unsigned char *CurPtrShadow = (const unsigned char*) CurPtr; 65 66 // Read in the data for the token. 14 bytes in total. 67 tok::TokenKind k = (tok::TokenKind) CurPtrShadow[0]; 68 Token::TokenFlags flags = (Token::TokenFlags) CurPtrShadow[1]; 69 70 uint32_t perID = ((uint32_t) CurPtrShadow[2]) 71 | (((uint32_t) CurPtrShadow[3]) << 8) 72 | (((uint32_t) CurPtrShadow[4]) << 16); 73 74 uint32_t FileOffset = ((uint32_t) CurPtrShadow[5]) 75 | (((uint32_t) CurPtrShadow[6]) << 8) 76 | (((uint32_t) CurPtrShadow[7]) << 16) 77 | (((uint32_t) CurPtrShadow[8]) << 24); 78 79 uint32_t Len = ((uint32_t) CurPtrShadow[9]) 80 | (((uint32_t) CurPtrShadow[10]) << 8); 81 82 CurPtr = (const char*) (CurPtrShadow + DISK_TOKEN_SIZE); 83 84 //===--------------------------------------==// 85 // Construct the token itself. 86 //===--------------------------------------==// 87 88 Tok.startToken(); 89 Tok.setKind(k); 90 Tok.setFlag(flags); 91 assert(!LexingRawMode); 92 Tok.setIdentifierInfo(perID ? PTHMgr.GetIdentifierInfo(perID-1) : 0); 93 Tok.setLocation(SourceLocation::getFileLoc(FileID, FileOffset)); 94 Tok.setLength(Len); 95 96 //===--------------------------------------==// 97 // Process the token. 98 //===--------------------------------------==// 99 100 if (k == tok::identifier) { 101 MIOpt.ReadToken(); 102 return PP->HandleIdentifier(Tok); 103 } 104 105 if (k == tok::eof) { 106 // Save the end-of-file token. 107 EofToken = Tok; 108 109 Preprocessor *PPCache = PP; 110 111 assert(!ParsingPreprocessorDirective); 112 assert(!LexingRawMode); 113 114 // FIXME: Issue diagnostics similar to Lexer. 115 if (PP->HandleEndOfFile(Tok, false)) 116 return; 117 118 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 119 return PPCache->Lex(Tok); 120 } 121 122 if (k == tok::hash && Tok.isAtStartOfLine()) { 123 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 124 assert(!LexingRawMode); 125 PP->HandleDirective(Tok); 126 127 if (PP->isCurrentLexer(this)) 128 goto LexNextToken; 129 130 return PP->Lex(Tok); 131 } 132 133 if (k == tok::eom) { 134 assert(ParsingPreprocessorDirective); 135 ParsingPreprocessorDirective = false; 136 return; 137 } 138 139 MIOpt.ReadToken(); 140} 141 142// FIXME: We can just grab the last token instead of storing a copy 143// into EofToken. 144void PTHLexer::getEOF(Token& Tok) { 145 assert(!EofToken.is(tok::eof)); 146 Tok = EofToken; 147} 148 149void PTHLexer::DiscardToEndOfLine() { 150 assert(ParsingPreprocessorDirective && ParsingFilename == false && 151 "Must be in a preprocessing directive!"); 152 153 // We assume that if the preprocessor wishes to discard to the end of 154 // the line that it also means to end the current preprocessor directive. 155 ParsingPreprocessorDirective = false; 156 157 // Skip tokens by only peeking at their token kind and the flags. 158 // We don't need to actually reconstruct full tokens from the token buffer. 159 // This saves some copies and it also reduces IdentifierInfo* lookup. 160 const char* p = CurPtr; 161 while (1) { 162 // Read the token kind. Are we at the end of the file? 163 tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; 164 if (x == tok::eof) break; 165 166 // Read the token flags. Are we at the start of the next line? 167 Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; 168 if (y & Token::StartOfLine) break; 169 170 // Skip to the next token. 171 p += DISK_TOKEN_SIZE; 172 } 173 174 CurPtr = p; 175} 176 177/// SkipBlock - Used by Preprocessor to skip the current conditional block. 178bool PTHLexer::SkipBlock() { 179 assert(CurPPCondPtr && "No cached PP conditional information."); 180 assert(LastHashTokPtr && "No known '#' token."); 181 182 const char* HashEntryI = 0; 183 uint32_t Offset; 184 uint32_t TableIdx; 185 186 do { 187 // Read the token offset from the side-table. 188 Offset = Read32(CurPPCondPtr); 189 190 // Read the target table index from the side-table. 191 TableIdx = Read32(CurPPCondPtr); 192 193 // Compute the actual memory address of the '#' token data for this entry. 194 HashEntryI = TokBuf + Offset; 195 196 // Optmization: "Sibling jumping". #if...#else...#endif blocks can 197 // contain nested blocks. In the side-table we can jump over these 198 // nested blocks instead of doing a linear search if the next "sibling" 199 // entry is not at a location greater than LastHashTokPtr. 200 if (HashEntryI < LastHashTokPtr && TableIdx) { 201 // In the side-table we are still at an entry for a '#' token that 202 // is earlier than the last one we saw. Check if the location we would 203 // stride gets us closer. 204 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 205 assert(NextPPCondPtr >= CurPPCondPtr); 206 // Read where we should jump to. 207 uint32_t TmpOffset = Read32(NextPPCondPtr); 208 const char* HashEntryJ = TokBuf + TmpOffset; 209 210 if (HashEntryJ <= LastHashTokPtr) { 211 // Jump directly to the next entry in the side table. 212 HashEntryI = HashEntryJ; 213 Offset = TmpOffset; 214 TableIdx = Read32(NextPPCondPtr); 215 CurPPCondPtr = NextPPCondPtr; 216 } 217 } 218 } 219 while (HashEntryI < LastHashTokPtr); 220 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 221 assert(TableIdx && "No jumping from #endifs."); 222 223 // Update our side-table iterator. 224 const char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 225 assert(NextPPCondPtr >= CurPPCondPtr); 226 CurPPCondPtr = NextPPCondPtr; 227 228 // Read where we should jump to. 229 HashEntryI = TokBuf + Read32(NextPPCondPtr); 230 uint32_t NextIdx = Read32(NextPPCondPtr); 231 232 // By construction NextIdx will be zero if this is a #endif. This is useful 233 // to know to obviate lexing another token. 234 bool isEndif = NextIdx == 0; 235 236 // This case can occur when we see something like this: 237 // 238 // #if ... 239 // /* a comment or nothing */ 240 // #elif 241 // 242 // If we are skipping the first #if block it will be the case that CurPtr 243 // already points 'elif'. Just return. 244 245 if (CurPtr > HashEntryI) { 246 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 247 // Did we reach a #endif? If so, go ahead and consume that token as well. 248 if (isEndif) 249 CurPtr += DISK_TOKEN_SIZE*2; 250 else 251 LastHashTokPtr = HashEntryI; 252 253 return isEndif; 254 } 255 256 // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 257 CurPtr = HashEntryI; 258 259 // Update the location of the last observed '#'. This is useful if we 260 // are skipping multiple blocks. 261 LastHashTokPtr = CurPtr; 262 263 // Skip the '#' token. 264 assert(((tok::TokenKind) (unsigned char) *CurPtr) == tok::hash); 265 CurPtr += DISK_TOKEN_SIZE; 266 267 // Did we reach a #endif? If so, go ahead and consume that token as well. 268 if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } 269 270 return isEndif; 271} 272 273SourceLocation PTHLexer::getSourceLocation() { 274 // getLocation is not on the hot path. It is used to get the location of 275 // the next token when transitioning back to this lexer when done 276 // handling a #included file. Just read the necessary data from the token 277 // data buffer to construct the SourceLocation object. 278 // NOTE: This is a virtual function; hence it is defined out-of-line. 279 const char* p = CurPtr + (1 + 1 + 3); 280 uint32_t offset = 281 ((uint32_t) ((uint8_t) p[0])) 282 | (((uint32_t) ((uint8_t) p[1])) << 8) 283 | (((uint32_t) ((uint8_t) p[2])) << 16) 284 | (((uint32_t) ((uint8_t) p[3])) << 24); 285 return SourceLocation::getFileLoc(FileID, offset); 286} 287 288//===----------------------------------------------------------------------===// 289// Internal Data Structures for PTH file lookup and resolving identifiers. 290//===----------------------------------------------------------------------===// 291 292 293/// PTHFileLookup - This internal data structure is used by the PTHManager 294/// to map from FileEntry objects managed by FileManager to offsets within 295/// the PTH file. 296namespace { 297class VISIBILITY_HIDDEN PTHFileLookup { 298public: 299 class Val { 300 uint32_t TokenOff; 301 uint32_t PPCondOff; 302 303 public: 304 Val() : TokenOff(~0) {} 305 Val(uint32_t toff, uint32_t poff) : TokenOff(toff), PPCondOff(poff) {} 306 307 uint32_t getTokenOffset() const { 308 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 309 return TokenOff; 310 } 311 312 uint32_t gettPPCondOffset() const { 313 assert(TokenOff != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 314 return PPCondOff; 315 } 316 317 bool isValid() const { return TokenOff != ~((uint32_t)0); } 318 }; 319 320private: 321 llvm::StringMap<Val> FileMap; 322 323public: 324 PTHFileLookup() {}; 325 326 Val Lookup(const FileEntry* FE) { 327 const char* s = FE->getName(); 328 unsigned size = strlen(s); 329 return FileMap.GetOrCreateValue(s, s+size).getValue(); 330 } 331 332 void ReadTable(const char* D) { 333 uint32_t N = Read32(D); // Read the length of the table. 334 335 for ( ; N > 0; --N) { // The rest of the data is the table itself. 336 uint32_t len = Read32(D); 337 const char* s = D; 338 D += len; 339 uint32_t TokenOff = Read32(D); 340 FileMap.GetOrCreateValue(s, s+len).getValue() = Val(TokenOff, Read32(D)); 341 } 342 } 343}; 344} // end anonymous namespace 345 346//===----------------------------------------------------------------------===// 347// PTHManager methods. 348//===----------------------------------------------------------------------===// 349 350PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 351 const char* idDataTable, IdentifierInfo** perIDCache, 352 Preprocessor& pp) 353: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 354 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 355 356PTHManager::~PTHManager() { 357 delete Buf; 358 delete (PTHFileLookup*) FileLookup; 359 free(PerIDCache); 360} 361 362PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 363 364 // Memory map the PTH file. 365 llvm::OwningPtr<llvm::MemoryBuffer> 366 File(llvm::MemoryBuffer::getFile(file.c_str())); 367 368 if (!File) 369 return 0; 370 371 // Get the buffer ranges and check if there are at least three 32-bit 372 // words at the end of the file. 373 const char* BufBeg = File->getBufferStart(); 374 const char* BufEnd = File->getBufferEnd(); 375 376 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 377 assert(false && "Invalid PTH file."); 378 return 0; // FIXME: Proper error diagnostic? 379 } 380 381 // Compute the address of the index table at the end of the PTH file. 382 // This table contains the offset of the file lookup table, the 383 // persistent ID -> identifer data table. 384 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 385 386 // Construct the file lookup table. This will be used for mapping from 387 // FileEntry*'s to cached tokens. 388 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 389 const char* FileTable = BufBeg + Read32(FileTableOffset); 390 391 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 392 assert(false && "Invalid PTH file."); 393 return 0; // FIXME: Proper error diagnostic? 394 } 395 396 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 397 FL->ReadTable(FileTable); 398 399 // Get the location of the table mapping from persistent ids to the 400 // data needed to reconstruct identifiers. 401 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 402 const char* IData = BufBeg + Read32(IDTableOffset); 403 if (!(IData > BufBeg && IData < BufEnd)) { 404 assert(false && "Invalid PTH file."); 405 return 0; // FIXME: Proper error diagnostic? 406 } 407 408 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 409 uint32_t NumIds = Read32(IData); 410 411 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 412 // so that we in the best case only zero out memory once when the OS returns 413 // us new pages. 414 IdentifierInfo** PerIDCache = 415 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache)); 416 417 if (!PerIDCache) { 418 assert(false && "Could not allocate Persistent ID cache."); 419 return 0; 420 } 421 422 // Create the new lexer. 423 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); 424} 425 426IdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) { 427 428 // Check if the IdentifierInfo has already been resolved. 429 IdentifierInfo*& II = PerIDCache[persistentID]; 430 if (II) return II; 431 432 // Look in the PTH file for the string data for the IdentifierInfo object. 433 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 434 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 435 assert(IDData < Buf->getBufferEnd()); 436 437 // Read the length of the string. 438 uint32_t len = Read32(IDData); 439 440 // Get the IdentifierInfo* with the specified string. 441 II = &ITable.get(IDData, IDData+len); 442 return II; 443} 444 445PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 446 447 if (!FE) 448 return 0; 449 450 // Lookup the FileEntry object in our file lookup data structure. It will 451 // return a variant that indicates whether or not there is an offset within 452 // the PTH file that contains cached tokens. 453 PTHFileLookup::Val FileData = ((PTHFileLookup*) FileLookup)->Lookup(FE); 454 455 if (!FileData.isValid()) // No tokens available. 456 return 0; 457 458 // Compute the offset of the token data within the buffer. 459 const char* data = Buf->getBufferStart() + FileData.getTokenOffset(); 460 461 // Get the location of pp-conditional table. 462 const char* ppcond = Buf->getBufferStart() + FileData.gettPPCondOffset(); 463 uint32_t len = Read32(ppcond); 464 if (len == 0) ppcond = 0; 465 466 assert(data < Buf->getBufferEnd()); 467 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond, 468 *this); 469} 470