PTHLexer.cpp revision 8fbc44d540eb7b9dabef71ba616c9102a2eaa71a
1b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 281362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// 381362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// The LLVM Compiler Infrastructure 481362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// 581362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// This file is distributed under the University of Illinois Open Source 681362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// License. See LICENSE.TXT for details. 781362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// 881362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen//===----------------------------------------------------------------------===// 981362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// 1081362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// This file implements the PTHLexer interface. 1181362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// 1281362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen//===----------------------------------------------------------------------===// 1381362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen 1481362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Basic/TokenKinds.h" 1581362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Basic/FileManager.h" 1681362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Basic/IdentifierTable.h" 1781362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Lex/PTHLexer.h" 1881362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Lex/Preprocessor.h" 1981362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Lex/PTHManager.h" 2081362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Lex/Token.h" 2181362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "clang/Lex/Preprocessor.h" 2281362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "llvm/ADT/StringMap.h" 2381362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "llvm/ADT/OwningPtr.h" 2481362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "llvm/Support/Compiler.h" 2581362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "llvm/Support/MathExtras.h" 2681362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "llvm/Support/MemoryBuffer.h" 2781362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen#include "llvm/System/Host.h" 28b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#include <sys/stat.h> 2981362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsenusing namespace clang; 30b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 31b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch#define DISK_TOKEN_SIZE (1+1+2+4+4) 32b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 33b0fe1620dcb4135ac3ab2d66ff93072373911299Ben Murdoch//===----------------------------------------------------------------------===// 3481362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen// Utility methods for reading from the mmap'ed PTH file. 35b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch//===----------------------------------------------------------------------===// 36b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch 37b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) { 38b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uint16_t V = ((uint16_t)Data[0]) | 3981362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen ((uint16_t)Data[1] << 8); 40b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch Data += 2; 41b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch return V; 42b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch} 4381362e16c30e9e970af6b17592f627ad8cdee4d8Kristian Monsen 44b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdochstatic inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) { 45b8a8cc1952d61a2f3a2568848933943a543b5d3eBen Murdoch uint32_t V = ((uint32_t)Data[0]) | 46 ((uint32_t)Data[1] << 8) | 47 ((uint32_t)Data[2] << 16) | 48 ((uint32_t)Data[3] << 24); 49 Data += 4; 50 return V; 51} 52 53static inline uint64_t ReadUnalignedLE64(const unsigned char *&Data) { 54 uint64_t V = ((uint64_t)Data[0]) | 55 ((uint64_t)Data[1] << 8) | 56 ((uint64_t)Data[2] << 16) | 57 ((uint64_t)Data[3] << 24) | 58 ((uint64_t)Data[4] << 32) | 59 ((uint64_t)Data[5] << 40) | 60 ((uint64_t)Data[6] << 48) | 61 ((uint64_t)Data[7] << 56); 62 Data += 8; 63 return V; 64} 65 66static inline uint32_t ReadLE32(const unsigned char *&Data) { 67 // Hosts that directly support little-endian 32-bit loads can just 68 // use them. Big-endian hosts need a bswap. 69 uint32_t V = *((uint32_t*)Data); 70 if (llvm::sys::isBigEndianHost()) 71 V = llvm::ByteSwap_32(V); 72 Data += 4; 73 return V; 74} 75 76// Bernstein hash function: 77// This is basically copy-and-paste from StringMap. This likely won't 78// stay here, which is why I didn't both to expose this function from 79// String Map. 80static unsigned BernsteinHash(const char* x) { 81 unsigned int R = 0; 82 for ( ; *x != '\0' ; ++x) R = R * 33 + *x; 83 return R + (R >> 5); 84} 85 86static unsigned BernsteinHash(const char* x, unsigned n) { 87 unsigned int R = 0; 88 for (unsigned i = 0 ; i < n ; ++i, ++x) R = R * 33 + *x; 89 return R + (R >> 5); 90} 91 92//===----------------------------------------------------------------------===// 93// PTHLexer methods. 94//===----------------------------------------------------------------------===// 95 96PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, 97 const unsigned char *ppcond, PTHManager &PM) 98 : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 99 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { 100 101 FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); 102} 103 104void PTHLexer::Lex(Token& Tok) { 105LexNextToken: 106 107 //===--------------------------------------==// 108 // Read the raw token data. 109 //===--------------------------------------==// 110 111 // Shadow CurPtr into an automatic variable. 112 const unsigned char *CurPtrShadow = CurPtr; 113 114 // Read in the data for the token. 115 unsigned Word0 = ReadLE32(CurPtrShadow); 116 uint32_t IdentifierID = ReadLE32(CurPtrShadow); 117 uint32_t FileOffset = ReadLE32(CurPtrShadow); 118 119 tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF); 120 Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF); 121 uint32_t Len = Word0 >> 16; 122 123 CurPtr = CurPtrShadow; 124 125 //===--------------------------------------==// 126 // Construct the token itself. 127 //===--------------------------------------==// 128 129 Tok.startToken(); 130 Tok.setKind(TKind); 131 Tok.setFlag(TFlags); 132 assert(!LexingRawMode); 133 Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset)); 134 Tok.setLength(Len); 135 136 // Handle identifiers. 137 if (Tok.isLiteral()) { 138 Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID)); 139 } 140 else if (IdentifierID) { 141 MIOpt.ReadToken(); 142 IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1); 143 144 Tok.setIdentifierInfo(II); 145 146 // Change the kind of this identifier to the appropriate token kind, e.g. 147 // turning "for" into a keyword. 148 Tok.setKind(II->getTokenID()); 149 150 if (II->isHandleIdentifierCase()) 151 PP->HandleIdentifier(Tok); 152 return; 153 } 154 155 //===--------------------------------------==// 156 // Process the token. 157 //===--------------------------------------==// 158#if 0 159 SourceManager& SM = PP->getSourceManager(); 160 llvm::cerr << SM.getFileEntryForID(FileID)->getName() 161 << ':' << SM.getLogicalLineNumber(Tok.getLocation()) 162 << ':' << SM.getLogicalColumnNumber(Tok.getLocation()) 163 << '\n'; 164#endif 165 166 if (TKind == tok::eof) { 167 // Save the end-of-file token. 168 EofToken = Tok; 169 170 Preprocessor *PPCache = PP; 171 172 assert(!ParsingPreprocessorDirective); 173 assert(!LexingRawMode); 174 175 // FIXME: Issue diagnostics similar to Lexer. 176 if (PP->HandleEndOfFile(Tok, false)) 177 return; 178 179 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 180 return PPCache->Lex(Tok); 181 } 182 183 if (TKind == tok::hash && Tok.isAtStartOfLine()) { 184 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 185 assert(!LexingRawMode); 186 PP->HandleDirective(Tok); 187 188 if (PP->isCurrentLexer(this)) 189 goto LexNextToken; 190 191 return PP->Lex(Tok); 192 } 193 194 if (TKind == tok::eom) { 195 assert(ParsingPreprocessorDirective); 196 ParsingPreprocessorDirective = false; 197 return; 198 } 199 200 MIOpt.ReadToken(); 201} 202 203// FIXME: We can just grab the last token instead of storing a copy 204// into EofToken. 205void PTHLexer::getEOF(Token& Tok) { 206 assert(EofToken.is(tok::eof)); 207 Tok = EofToken; 208} 209 210void PTHLexer::DiscardToEndOfLine() { 211 assert(ParsingPreprocessorDirective && ParsingFilename == false && 212 "Must be in a preprocessing directive!"); 213 214 // We assume that if the preprocessor wishes to discard to the end of 215 // the line that it also means to end the current preprocessor directive. 216 ParsingPreprocessorDirective = false; 217 218 // Skip tokens by only peeking at their token kind and the flags. 219 // We don't need to actually reconstruct full tokens from the token buffer. 220 // This saves some copies and it also reduces IdentifierInfo* lookup. 221 const unsigned char* p = CurPtr; 222 while (1) { 223 // Read the token kind. Are we at the end of the file? 224 tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; 225 if (x == tok::eof) break; 226 227 // Read the token flags. Are we at the start of the next line? 228 Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; 229 if (y & Token::StartOfLine) break; 230 231 // Skip to the next token. 232 p += DISK_TOKEN_SIZE; 233 } 234 235 CurPtr = p; 236} 237 238/// SkipBlock - Used by Preprocessor to skip the current conditional block. 239bool PTHLexer::SkipBlock() { 240 assert(CurPPCondPtr && "No cached PP conditional information."); 241 assert(LastHashTokPtr && "No known '#' token."); 242 243 const unsigned char* HashEntryI = 0; 244 uint32_t Offset; 245 uint32_t TableIdx; 246 247 do { 248 // Read the token offset from the side-table. 249 Offset = ReadLE32(CurPPCondPtr); 250 251 // Read the target table index from the side-table. 252 TableIdx = ReadLE32(CurPPCondPtr); 253 254 // Compute the actual memory address of the '#' token data for this entry. 255 HashEntryI = TokBuf + Offset; 256 257 // Optmization: "Sibling jumping". #if...#else...#endif blocks can 258 // contain nested blocks. In the side-table we can jump over these 259 // nested blocks instead of doing a linear search if the next "sibling" 260 // entry is not at a location greater than LastHashTokPtr. 261 if (HashEntryI < LastHashTokPtr && TableIdx) { 262 // In the side-table we are still at an entry for a '#' token that 263 // is earlier than the last one we saw. Check if the location we would 264 // stride gets us closer. 265 const unsigned char* NextPPCondPtr = 266 PPCond + TableIdx*(sizeof(uint32_t)*2); 267 assert(NextPPCondPtr >= CurPPCondPtr); 268 // Read where we should jump to. 269 uint32_t TmpOffset = ReadLE32(NextPPCondPtr); 270 const unsigned char* HashEntryJ = TokBuf + TmpOffset; 271 272 if (HashEntryJ <= LastHashTokPtr) { 273 // Jump directly to the next entry in the side table. 274 HashEntryI = HashEntryJ; 275 Offset = TmpOffset; 276 TableIdx = ReadLE32(NextPPCondPtr); 277 CurPPCondPtr = NextPPCondPtr; 278 } 279 } 280 } 281 while (HashEntryI < LastHashTokPtr); 282 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 283 assert(TableIdx && "No jumping from #endifs."); 284 285 // Update our side-table iterator. 286 const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 287 assert(NextPPCondPtr >= CurPPCondPtr); 288 CurPPCondPtr = NextPPCondPtr; 289 290 // Read where we should jump to. 291 HashEntryI = TokBuf + ReadLE32(NextPPCondPtr); 292 uint32_t NextIdx = ReadLE32(NextPPCondPtr); 293 294 // By construction NextIdx will be zero if this is a #endif. This is useful 295 // to know to obviate lexing another token. 296 bool isEndif = NextIdx == 0; 297 298 // This case can occur when we see something like this: 299 // 300 // #if ... 301 // /* a comment or nothing */ 302 // #elif 303 // 304 // If we are skipping the first #if block it will be the case that CurPtr 305 // already points 'elif'. Just return. 306 307 if (CurPtr > HashEntryI) { 308 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 309 // Did we reach a #endif? If so, go ahead and consume that token as well. 310 if (isEndif) 311 CurPtr += DISK_TOKEN_SIZE*2; 312 else 313 LastHashTokPtr = HashEntryI; 314 315 return isEndif; 316 } 317 318 // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 319 CurPtr = HashEntryI; 320 321 // Update the location of the last observed '#'. This is useful if we 322 // are skipping multiple blocks. 323 LastHashTokPtr = CurPtr; 324 325 // Skip the '#' token. 326 assert(((tok::TokenKind)*CurPtr) == tok::hash); 327 CurPtr += DISK_TOKEN_SIZE; 328 329 // Did we reach a #endif? If so, go ahead and consume that token as well. 330 if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } 331 332 return isEndif; 333} 334 335SourceLocation PTHLexer::getSourceLocation() { 336 // getSourceLocation is not on the hot path. It is used to get the location 337 // of the next token when transitioning back to this lexer when done 338 // handling a #included file. Just read the necessary data from the token 339 // data buffer to construct the SourceLocation object. 340 // NOTE: This is a virtual function; hence it is defined out-of-line. 341 const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4); 342 uint32_t Offset = ReadLE32(OffsetPtr); 343 return FileStartLoc.getFileLocWithOffset(Offset); 344} 345 346//===----------------------------------------------------------------------===// 347// OnDiskChainedHashTable 348//===----------------------------------------------------------------------===// 349 350template<typename Info> 351class OnDiskChainedHashTable { 352 const unsigned NumBuckets; 353 const unsigned NumEntries; 354 const unsigned char* const Buckets; 355 const unsigned char* const Base; 356public: 357 typedef typename Info::internal_key_type internal_key_type; 358 typedef typename Info::external_key_type external_key_type; 359 typedef typename Info::data_type data_type; 360 361 OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries, 362 const unsigned char* buckets, 363 const unsigned char* base) 364 : NumBuckets(numBuckets), NumEntries(numEntries), 365 Buckets(buckets), Base(base) { 366 assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 && 367 "'buckets' must have a 4-byte alignment"); 368 } 369 370 unsigned getNumBuckets() const { return NumBuckets; } 371 unsigned getNumEntries() const { return NumEntries; } 372 const unsigned char* getBase() const { return Base; } 373 const unsigned char* getBuckets() const { return Buckets; } 374 375 bool isEmpty() const { return NumEntries == 0; } 376 377 class iterator { 378 internal_key_type key; 379 const unsigned char* const data; 380 const unsigned len; 381 public: 382 iterator() : data(0), len(0) {} 383 iterator(const internal_key_type k, const unsigned char* d, unsigned l) 384 : key(k), data(d), len(l) {} 385 386 data_type operator*() const { return Info::ReadData(key, data, len); } 387 bool operator==(const iterator& X) const { return X.data == data; } 388 bool operator!=(const iterator& X) const { return X.data != data; } 389 }; 390 391 iterator find(const external_key_type& eKey) { 392 const internal_key_type& iKey = Info::GetInternalKey(eKey); 393 unsigned key_hash = Info::ComputeHash(iKey); 394 395 // Each bucket is just a 32-bit offset into the PTH file. 396 unsigned idx = key_hash & (NumBuckets - 1); 397 const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx; 398 399 unsigned offset = ReadLE32(Bucket); 400 if (offset == 0) return iterator(); // Empty bucket. 401 const unsigned char* Items = Base + offset; 402 403 // 'Items' starts with a 16-bit unsigned integer representing the 404 // number of items in this bucket. 405 unsigned len = ReadUnalignedLE16(Items); 406 407 for (unsigned i = 0; i < len; ++i) { 408 // Read the hash. 409 uint32_t item_hash = ReadUnalignedLE32(Items); 410 411 // Determine the length of the key and the data. 412 const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items); 413 unsigned item_len = L.first + L.second; 414 415 // Compare the hashes. If they are not the same, skip the entry entirely. 416 if (item_hash != key_hash) { 417 Items += item_len; 418 continue; 419 } 420 421 // Read the key. 422 const internal_key_type& X = 423 Info::ReadKey((const unsigned char* const) Items, L.first); 424 425 // If the key doesn't match just skip reading the value. 426 if (!Info::EqualKey(X, iKey)) { 427 Items += item_len; 428 continue; 429 } 430 431 // The key matches! 432 return iterator(X, Items + L.first, L.second); 433 } 434 435 return iterator(); 436 } 437 438 iterator end() const { return iterator(); } 439 440 441 static OnDiskChainedHashTable* Create(const unsigned char* buckets, 442 const unsigned char* const base) { 443 444 assert(buckets > base); 445 assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 && 446 "buckets should be 4-byte aligned."); 447 448 unsigned numBuckets = ReadLE32(buckets); 449 unsigned numEntries = ReadLE32(buckets); 450 return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets, 451 base); 452 } 453}; 454 455//===----------------------------------------------------------------------===// 456// PTH file lookup: map from strings to file data. 457//===----------------------------------------------------------------------===// 458 459/// PTHFileLookup - This internal data structure is used by the PTHManager 460/// to map from FileEntry objects managed by FileManager to offsets within 461/// the PTH file. 462namespace { 463class VISIBILITY_HIDDEN PTHFileData { 464 const uint32_t TokenOff; 465 const uint32_t PPCondOff; 466public: 467 PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) 468 : TokenOff(tokenOff), PPCondOff(ppCondOff) {} 469 470 uint32_t getTokenOffset() const { return TokenOff; } 471 uint32_t getPPCondOffset() const { return PPCondOff; } 472}; 473 474 475class VISIBILITY_HIDDEN PTHFileLookupCommonTrait { 476public: 477 typedef std::pair<unsigned char, const char*> internal_key_type; 478 479 static unsigned ComputeHash(internal_key_type x) { 480 return BernsteinHash(x.second); 481 } 482 483 static std::pair<unsigned, unsigned> 484 ReadKeyDataLength(const unsigned char*& d) { 485 unsigned keyLen = (unsigned) ReadUnalignedLE16(d); 486 unsigned dataLen = (unsigned) *(d++); 487 return std::make_pair(keyLen, dataLen); 488 } 489 490 static internal_key_type ReadKey(const unsigned char* d, unsigned) { 491 unsigned char k = *(d++); // Read the entry kind. 492 return std::make_pair(k, (const char*) d); 493 } 494}; 495 496class VISIBILITY_HIDDEN PTHFileLookupTrait : public PTHFileLookupCommonTrait { 497public: 498 typedef const FileEntry* external_key_type; 499 typedef PTHFileData data_type; 500 501 static internal_key_type GetInternalKey(const FileEntry* FE) { 502 return std::make_pair((unsigned char) 0x1, FE->getName()); 503 } 504 505 static bool EqualKey(internal_key_type a, internal_key_type b) { 506 return a.first == b.first && strcmp(a.second, b.second) == 0; 507 } 508 509 static PTHFileData ReadData(const internal_key_type& k, 510 const unsigned char* d, unsigned) { 511 assert(k.first == 0x1 && "Only file lookups can match!"); 512 uint32_t x = ::ReadUnalignedLE32(d); 513 uint32_t y = ::ReadUnalignedLE32(d); 514 return PTHFileData(x, y); 515 } 516}; 517 518class VISIBILITY_HIDDEN PTHStringLookupTrait { 519public: 520 typedef uint32_t 521 data_type; 522 523 typedef const std::pair<const char*, unsigned> 524 external_key_type; 525 526 typedef external_key_type internal_key_type; 527 528 static bool EqualKey(const internal_key_type& a, 529 const internal_key_type& b) { 530 return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0 531 : false; 532 } 533 534 static unsigned ComputeHash(const internal_key_type& a) { 535 return BernsteinHash(a.first, a.second); 536 } 537 538 // This hopefully will just get inlined and removed by the optimizer. 539 static const internal_key_type& 540 GetInternalKey(const external_key_type& x) { return x; } 541 542 static std::pair<unsigned, unsigned> 543 ReadKeyDataLength(const unsigned char*& d) { 544 return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t)); 545 } 546 547 static std::pair<const char*, unsigned> 548 ReadKey(const unsigned char* d, unsigned n) { 549 assert(n >= 2 && d[n-1] == '\0'); 550 return std::make_pair((const char*) d, n-1); 551 } 552 553 static uint32_t ReadData(const internal_key_type& k, const unsigned char* d, 554 unsigned) { 555 return ::ReadUnalignedLE32(d); 556 } 557}; 558 559} // end anonymous namespace 560 561typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup; 562typedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup; 563 564//===----------------------------------------------------------------------===// 565// PTHManager methods. 566//===----------------------------------------------------------------------===// 567 568PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 569 const unsigned char* idDataTable, 570 IdentifierInfo** perIDCache, 571 void* stringIdLookup, unsigned numIds, 572 const unsigned char* spellingBase, 573 const char* originalSourceFile) 574: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 575 IdDataTable(idDataTable), StringIdLookup(stringIdLookup), 576 NumIds(numIds), PP(0), SpellingBase(spellingBase), 577 OriginalSourceFile(originalSourceFile) {} 578 579PTHManager::~PTHManager() { 580 delete Buf; 581 delete (PTHFileLookup*) FileLookup; 582 delete (PTHStringIdLookup*) StringIdLookup; 583 free(PerIDCache); 584} 585 586static void InvalidPTH(Diagnostic *Diags, const char* Msg = 0) { 587 if (!Diags) return; 588 if (!Msg) Msg = "Invalid or corrupted PTH file"; 589 unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Warning, Msg); 590 Diags->Report(FullSourceLoc(), DiagID); 591} 592 593PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) { 594 // Memory map the PTH file. 595 llvm::OwningPtr<llvm::MemoryBuffer> 596 File(llvm::MemoryBuffer::getFile(file.c_str())); 597 598 if (!File) { 599 if (Diags) { 600 unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Warning, 601 "PTH file %0 could not be read"); 602 Diags->Report(FullSourceLoc(), DiagID) << file; 603 } 604 605 return 0; 606 } 607 608 // Get the buffer ranges and check if there are at least three 32-bit 609 // words at the end of the file. 610 const unsigned char* BufBeg = (unsigned char*)File->getBufferStart(); 611 const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd(); 612 613 // Check the prologue of the file. 614 if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) || 615 memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) { 616 InvalidPTH(Diags); 617 return 0; 618 } 619 620 // Read the PTH version. 621 const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1); 622 unsigned Version = ReadLE32(p); 623 624 if (Version != PTHManager::Version) { 625 InvalidPTH(Diags, 626 Version < PTHManager::Version 627 ? "PTH file uses an older PTH format that is no longer supported" 628 : "PTH file uses a newer PTH format that cannot be read"); 629 return 0; 630 } 631 632 // Compute the address of the index table at the end of the PTH file. 633 const unsigned char *PrologueOffset = p; 634 635 if (PrologueOffset >= BufEnd) { 636 InvalidPTH(Diags); 637 return 0; 638 } 639 640 // Construct the file lookup table. This will be used for mapping from 641 // FileEntry*'s to cached tokens. 642 const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2; 643 const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset); 644 645 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 646 InvalidPTH(Diags); 647 return 0; // FIXME: Proper error diagnostic? 648 } 649 650 llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); 651 652 // Warn if the PTH file is empty. We still want to create a PTHManager 653 // as the PTH could be used with -include-pth. 654 if (FL->isEmpty()) 655 InvalidPTH(Diags, "PTH file contains no cached source data"); 656 657 // Get the location of the table mapping from persistent ids to the 658 // data needed to reconstruct identifiers. 659 const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0; 660 const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset); 661 662 if (!(IData >= BufBeg && IData < BufEnd)) { 663 InvalidPTH(Diags); 664 return 0; 665 } 666 667 // Get the location of the hashtable mapping between strings and 668 // persistent IDs. 669 const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1; 670 const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset); 671 if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) { 672 InvalidPTH(Diags); 673 return 0; 674 } 675 676 llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, 677 BufBeg)); 678 // Get the location of the spelling cache. 679 const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3; 680 const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset); 681 if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) { 682 InvalidPTH(Diags); 683 return 0; 684 } 685 686 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 687 uint32_t NumIds = ReadLE32(IData); 688 689 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 690 // so that we in the best case only zero out memory once when the OS returns 691 // us new pages. 692 IdentifierInfo** PerIDCache = 0; 693 694 if (NumIds) { 695 PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache)); 696 if (!PerIDCache) { 697 InvalidPTH(Diags, "Could not allocate memory for processing PTH file"); 698 return 0; 699 } 700 } 701 702 // Compute the address of the original source file. 703 const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4; 704 unsigned len = ReadUnalignedLE16(originalSourceBase); 705 if (!len) originalSourceBase = 0; 706 707 // Create the new PTHManager. 708 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, 709 SL.take(), NumIds, spellingBase, 710 (const char*) originalSourceBase); 711} 712 713IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { 714 // Look in the PTH file for the string data for the IdentifierInfo object. 715 const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID; 716 const unsigned char* IDData = 717 (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry); 718 assert(IDData < (const unsigned char*)Buf->getBufferEnd()); 719 720 // Allocate the object. 721 std::pair<IdentifierInfo,const unsigned char*> *Mem = 722 Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >(); 723 724 Mem->second = IDData; 725 assert(IDData[0] != '\0'); 726 IdentifierInfo *II = new ((void*) Mem) IdentifierInfo(); 727 728 // Store the new IdentifierInfo in the cache. 729 PerIDCache[PersistentID] = II; 730 assert(II->getName() && II->getName()[0] != '\0'); 731 return II; 732} 733 734IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) { 735 PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup); 736 // Double check our assumption that the last character isn't '\0'. 737 assert(NameEnd==NameStart || NameStart[NameEnd-NameStart-1] != '\0'); 738 PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart, 739 NameEnd - NameStart)); 740 if (I == SL.end()) // No identifier found? 741 return 0; 742 743 // Match found. Return the identifier! 744 assert(*I > 0); 745 return GetIdentifierInfo(*I-1); 746} 747 748PTHLexer *PTHManager::CreateLexer(FileID FID) { 749 const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID); 750 if (!FE) 751 return 0; 752 753 // Lookup the FileEntry object in our file lookup data structure. It will 754 // return a variant that indicates whether or not there is an offset within 755 // the PTH file that contains cached tokens. 756 PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup); 757 PTHFileLookup::iterator I = PFL.find(FE); 758 759 if (I == PFL.end()) // No tokens available? 760 return 0; 761 762 const PTHFileData& FileData = *I; 763 764 const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart(); 765 // Compute the offset of the token data within the buffer. 766 const unsigned char* data = BufStart + FileData.getTokenOffset(); 767 768 // Get the location of pp-conditional table. 769 const unsigned char* ppcond = BufStart + FileData.getPPCondOffset(); 770 uint32_t Len = ReadLE32(ppcond); 771 if (Len == 0) ppcond = 0; 772 773 assert(PP && "No preprocessor set yet!"); 774 return new PTHLexer(*PP, FID, data, ppcond, *this); 775} 776 777//===----------------------------------------------------------------------===// 778// 'stat' caching. 779//===----------------------------------------------------------------------===// 780 781namespace { 782class VISIBILITY_HIDDEN PTHStatData { 783public: 784 const bool hasStat; 785 const ino_t ino; 786 const dev_t dev; 787 const mode_t mode; 788 const time_t mtime; 789 const off_t size; 790 791 PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s) 792 : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {} 793 794 PTHStatData() 795 : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {} 796}; 797 798class VISIBILITY_HIDDEN PTHStatLookupTrait : public PTHFileLookupCommonTrait { 799public: 800 typedef const char* external_key_type; // const char* 801 typedef PTHStatData data_type; 802 803 static internal_key_type GetInternalKey(const char *path) { 804 // The key 'kind' doesn't matter here because it is ignored in EqualKey. 805 return std::make_pair((unsigned char) 0x0, path); 806 } 807 808 static bool EqualKey(internal_key_type a, internal_key_type b) { 809 // When doing 'stat' lookups we don't care about the kind of 'a' and 'b', 810 // just the paths. 811 return strcmp(a.second, b.second) == 0; 812 } 813 814 static data_type ReadData(const internal_key_type& k, const unsigned char* d, 815 unsigned) { 816 817 if (k.first /* File or Directory */) { 818 if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words. 819 ino_t ino = (ino_t) ReadUnalignedLE32(d); 820 dev_t dev = (dev_t) ReadUnalignedLE32(d); 821 mode_t mode = (mode_t) ReadUnalignedLE16(d); 822 time_t mtime = (time_t) ReadUnalignedLE64(d); 823 return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d)); 824 } 825 826 // Negative stat. Don't read anything. 827 return data_type(); 828 } 829}; 830 831class VISIBILITY_HIDDEN PTHStatCache : public StatSysCallCache { 832 typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy; 833 CacheTy Cache; 834 835public: 836 PTHStatCache(PTHFileLookup &FL) : 837 Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(), 838 FL.getBase()) {} 839 840 ~PTHStatCache() {} 841 842 int stat(const char *path, struct stat *buf) { 843 // Do the lookup for the file's data in the PTH file. 844 CacheTy::iterator I = Cache.find(path); 845 846 // If we don't get a hit in the PTH file just forward to 'stat'. 847 if (I == Cache.end()) return ::stat(path, buf); 848 849 const PTHStatData& Data = *I; 850 851 if (!Data.hasStat) 852 return 1; 853 854 buf->st_ino = Data.ino; 855 buf->st_dev = Data.dev; 856 buf->st_mtime = Data.mtime; 857 buf->st_mode = Data.mode; 858 buf->st_size = Data.size; 859 return 0; 860 } 861}; 862} // end anonymous namespace 863 864StatSysCallCache *PTHManager::createStatCache() { 865 return new PTHStatCache(*((PTHFileLookup*) FileLookup)); 866} 867