PTHLexer.cpp revision a4bd8eb4d6d4b625f6bbb62fc180b02eab6433ed
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/ADT/StringMap.h" 23#include "llvm/ADT/OwningPtr.h" 24#include "llvm/Support/Compiler.h" 25#include "llvm/Support/MathExtras.h" 26#include "llvm/Support/MemoryBuffer.h" 27#include "llvm/System/Host.h" 28using namespace clang; 29 30#define DISK_TOKEN_SIZE (1+1+2+4+4) 31 32//===----------------------------------------------------------------------===// 33// Utility methods for reading from the mmap'ed PTH file. 34//===----------------------------------------------------------------------===// 35 36static inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) { 37 uint16_t V = ((uint16_t)Data[0]) | 38 ((uint16_t)Data[1] << 8); 39 Data += 2; 40 return V; 41} 42 43static inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) { 44 uint32_t V = ((uint32_t)Data[0]) | 45 ((uint32_t)Data[1] << 8) | 46 ((uint32_t)Data[2] << 16) | 47 ((uint32_t)Data[3] << 24); 48 Data += 4; 49 return V; 50} 51 52static inline uint32_t ReadLE32(const unsigned char *&Data) { 53 // Hosts that directly support little-endian 32-bit loads can just 54 // use them. Big-endian hosts need a bswap. 55 uint32_t V = *((uint32_t*)Data); 56 if (llvm::sys::isBigEndianHost()) 57 V = llvm::ByteSwap_32(V); 58 Data += 4; 59 return V; 60} 61 62// Bernstein hash function: 63// This is basically copy-and-paste from StringMap. This likely won't 64// stay here, which is why I didn't both to expose this function from 65// String Map. 66static unsigned BernsteinHash(const char* x) { 67 unsigned int R = 0; 68 for ( ; *x != '\0' ; ++x) R = R * 33 + *x; 69 return R + (R >> 5); 70} 71 72static unsigned BernsteinHash(const char* x, unsigned n) { 73 unsigned int R = 0; 74 for (unsigned i = 0 ; i < n ; ++i, ++x) R = R * 33 + *x; 75 return R + (R >> 5); 76} 77 78//===----------------------------------------------------------------------===// 79// PTHLexer methods. 80//===----------------------------------------------------------------------===// 81 82PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D, 83 const unsigned char *ppcond, PTHManager &PM) 84 : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 85 PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) { 86 87 FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID); 88} 89 90void PTHLexer::Lex(Token& Tok) { 91LexNextToken: 92 93 //===--------------------------------------==// 94 // Read the raw token data. 95 //===--------------------------------------==// 96 97 // Shadow CurPtr into an automatic variable. 98 const unsigned char *CurPtrShadow = CurPtr; 99 100 // Read in the data for the token. 101 unsigned Word0 = ReadLE32(CurPtrShadow); 102 uint32_t IdentifierID = ReadLE32(CurPtrShadow); 103 uint32_t FileOffset = ReadLE32(CurPtrShadow); 104 105 tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF); 106 Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF); 107 uint32_t Len = Word0 >> 16; 108 109 CurPtr = CurPtrShadow; 110 111 //===--------------------------------------==// 112 // Construct the token itself. 113 //===--------------------------------------==// 114 115 Tok.startToken(); 116 Tok.setKind(TKind); 117 Tok.setFlag(TFlags); 118 assert(!LexingRawMode); 119 Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset)); 120 Tok.setLength(Len); 121 122 // Handle identifiers. 123 if (Tok.isLiteral()) { 124 Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID)); 125 } 126 else if (IdentifierID) { 127 MIOpt.ReadToken(); 128 IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1); 129 130 Tok.setIdentifierInfo(II); 131 132 // Change the kind of this identifier to the appropriate token kind, e.g. 133 // turning "for" into a keyword. 134 Tok.setKind(II->getTokenID()); 135 136 if (II->isHandleIdentifierCase()) 137 PP->HandleIdentifier(Tok); 138 return; 139 } 140 141 //===--------------------------------------==// 142 // Process the token. 143 //===--------------------------------------==// 144#if 0 145 SourceManager& SM = PP->getSourceManager(); 146 llvm::cerr << SM.getFileEntryForID(FileID)->getName() 147 << ':' << SM.getLogicalLineNumber(Tok.getLocation()) 148 << ':' << SM.getLogicalColumnNumber(Tok.getLocation()) 149 << '\n'; 150#endif 151 152 if (TKind == tok::eof) { 153 // Save the end-of-file token. 154 EofToken = Tok; 155 156 Preprocessor *PPCache = PP; 157 158 assert(!ParsingPreprocessorDirective); 159 assert(!LexingRawMode); 160 161 // FIXME: Issue diagnostics similar to Lexer. 162 if (PP->HandleEndOfFile(Tok, false)) 163 return; 164 165 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 166 return PPCache->Lex(Tok); 167 } 168 169 if (TKind == tok::hash && Tok.isAtStartOfLine()) { 170 LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE; 171 assert(!LexingRawMode); 172 PP->HandleDirective(Tok); 173 174 if (PP->isCurrentLexer(this)) 175 goto LexNextToken; 176 177 return PP->Lex(Tok); 178 } 179 180 if (TKind == tok::eom) { 181 assert(ParsingPreprocessorDirective); 182 ParsingPreprocessorDirective = false; 183 return; 184 } 185 186 MIOpt.ReadToken(); 187} 188 189// FIXME: We can just grab the last token instead of storing a copy 190// into EofToken. 191void PTHLexer::getEOF(Token& Tok) { 192 assert(EofToken.is(tok::eof)); 193 Tok = EofToken; 194} 195 196void PTHLexer::DiscardToEndOfLine() { 197 assert(ParsingPreprocessorDirective && ParsingFilename == false && 198 "Must be in a preprocessing directive!"); 199 200 // We assume that if the preprocessor wishes to discard to the end of 201 // the line that it also means to end the current preprocessor directive. 202 ParsingPreprocessorDirective = false; 203 204 // Skip tokens by only peeking at their token kind and the flags. 205 // We don't need to actually reconstruct full tokens from the token buffer. 206 // This saves some copies and it also reduces IdentifierInfo* lookup. 207 const unsigned char* p = CurPtr; 208 while (1) { 209 // Read the token kind. Are we at the end of the file? 210 tok::TokenKind x = (tok::TokenKind) (uint8_t) *p; 211 if (x == tok::eof) break; 212 213 // Read the token flags. Are we at the start of the next line? 214 Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1]; 215 if (y & Token::StartOfLine) break; 216 217 // Skip to the next token. 218 p += DISK_TOKEN_SIZE; 219 } 220 221 CurPtr = p; 222} 223 224/// SkipBlock - Used by Preprocessor to skip the current conditional block. 225bool PTHLexer::SkipBlock() { 226 assert(CurPPCondPtr && "No cached PP conditional information."); 227 assert(LastHashTokPtr && "No known '#' token."); 228 229 const unsigned char* HashEntryI = 0; 230 uint32_t Offset; 231 uint32_t TableIdx; 232 233 do { 234 // Read the token offset from the side-table. 235 Offset = ReadLE32(CurPPCondPtr); 236 237 // Read the target table index from the side-table. 238 TableIdx = ReadLE32(CurPPCondPtr); 239 240 // Compute the actual memory address of the '#' token data for this entry. 241 HashEntryI = TokBuf + Offset; 242 243 // Optmization: "Sibling jumping". #if...#else...#endif blocks can 244 // contain nested blocks. In the side-table we can jump over these 245 // nested blocks instead of doing a linear search if the next "sibling" 246 // entry is not at a location greater than LastHashTokPtr. 247 if (HashEntryI < LastHashTokPtr && TableIdx) { 248 // In the side-table we are still at an entry for a '#' token that 249 // is earlier than the last one we saw. Check if the location we would 250 // stride gets us closer. 251 const unsigned char* NextPPCondPtr = 252 PPCond + TableIdx*(sizeof(uint32_t)*2); 253 assert(NextPPCondPtr >= CurPPCondPtr); 254 // Read where we should jump to. 255 uint32_t TmpOffset = ReadLE32(NextPPCondPtr); 256 const unsigned char* HashEntryJ = TokBuf + TmpOffset; 257 258 if (HashEntryJ <= LastHashTokPtr) { 259 // Jump directly to the next entry in the side table. 260 HashEntryI = HashEntryJ; 261 Offset = TmpOffset; 262 TableIdx = ReadLE32(NextPPCondPtr); 263 CurPPCondPtr = NextPPCondPtr; 264 } 265 } 266 } 267 while (HashEntryI < LastHashTokPtr); 268 assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'"); 269 assert(TableIdx && "No jumping from #endifs."); 270 271 // Update our side-table iterator. 272 const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2); 273 assert(NextPPCondPtr >= CurPPCondPtr); 274 CurPPCondPtr = NextPPCondPtr; 275 276 // Read where we should jump to. 277 HashEntryI = TokBuf + ReadLE32(NextPPCondPtr); 278 uint32_t NextIdx = ReadLE32(NextPPCondPtr); 279 280 // By construction NextIdx will be zero if this is a #endif. This is useful 281 // to know to obviate lexing another token. 282 bool isEndif = NextIdx == 0; 283 284 // This case can occur when we see something like this: 285 // 286 // #if ... 287 // /* a comment or nothing */ 288 // #elif 289 // 290 // If we are skipping the first #if block it will be the case that CurPtr 291 // already points 'elif'. Just return. 292 293 if (CurPtr > HashEntryI) { 294 assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE); 295 // Did we reach a #endif? If so, go ahead and consume that token as well. 296 if (isEndif) 297 CurPtr += DISK_TOKEN_SIZE*2; 298 else 299 LastHashTokPtr = HashEntryI; 300 301 return isEndif; 302 } 303 304 // Otherwise, we need to advance. Update CurPtr to point to the '#' token. 305 CurPtr = HashEntryI; 306 307 // Update the location of the last observed '#'. This is useful if we 308 // are skipping multiple blocks. 309 LastHashTokPtr = CurPtr; 310 311 // Skip the '#' token. 312 assert(((tok::TokenKind)*CurPtr) == tok::hash); 313 CurPtr += DISK_TOKEN_SIZE; 314 315 // Did we reach a #endif? If so, go ahead and consume that token as well. 316 if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; } 317 318 return isEndif; 319} 320 321SourceLocation PTHLexer::getSourceLocation() { 322 // getSourceLocation is not on the hot path. It is used to get the location 323 // of the next token when transitioning back to this lexer when done 324 // handling a #included file. Just read the necessary data from the token 325 // data buffer to construct the SourceLocation object. 326 // NOTE: This is a virtual function; hence it is defined out-of-line. 327 const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4); 328 uint32_t Offset = ReadLE32(OffsetPtr); 329 return FileStartLoc.getFileLocWithOffset(Offset); 330} 331 332//===----------------------------------------------------------------------===// 333// OnDiskChainedHashTable 334//===----------------------------------------------------------------------===// 335 336template<typename Info> 337class OnDiskChainedHashTable { 338 const unsigned NumBuckets; 339 const unsigned NumEntries; 340 const unsigned char* const Buckets; 341 const unsigned char* const Base; 342public: 343 typedef typename Info::internal_key_type internal_key_type; 344 typedef typename Info::external_key_type external_key_type; 345 typedef typename Info::data_type data_type; 346 347 OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries, 348 const unsigned char* buckets, 349 const unsigned char* base) 350 : NumBuckets(numBuckets), NumEntries(numEntries), 351 Buckets(buckets), Base(base) { 352 assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 && 353 "'buckets' must have a 4-byte alignment"); 354 } 355 356 357 bool isEmpty() const { return NumEntries == 0; } 358 359 class iterator { 360 const unsigned char* const data; 361 const unsigned len; 362 public: 363 iterator() : data(0), len(0) {} 364 iterator(const unsigned char* d, unsigned l) : data(d), len(l) {} 365 366 data_type operator*() const { return Info::ReadData(data, len); } 367 bool operator==(const iterator& X) const { return X.data == data; } 368 bool operator!=(const iterator& X) const { return X.data != data; } 369 }; 370 371 iterator find(const external_key_type& eKey) { 372 const internal_key_type& iKey = Info::GetInternalKey(eKey); 373 unsigned key_hash = Info::ComputeHash(iKey); 374 375 // Each bucket is just a 32-bit offset into the PTH file. 376 unsigned idx = key_hash & (NumBuckets - 1); 377 const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx; 378 379 unsigned offset = ReadLE32(Bucket); 380 if (offset == 0) return iterator(); // Empty bucket. 381 const unsigned char* Items = Base + offset; 382 383 // 'Items' starts with a 16-bit unsigned integer representing the 384 // number of items in this bucket. 385 unsigned len = ReadUnalignedLE16(Items); 386 387 for (unsigned i = 0; i < len; ++i) { 388 // Read the hash. 389 uint32_t item_hash = ReadUnalignedLE32(Items); 390 391 // Determine the length of the key and the data. 392 const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items); 393 unsigned item_len = L.first + L.second; 394 395 // Compare the hashes. If they are not the same, skip the entry entirely. 396 if (item_hash != key_hash) { 397 Items += item_len; 398 continue; 399 } 400 401 // Read the key. 402 const internal_key_type& X = 403 Info::ReadKey((const unsigned char* const) Items, L.first); 404 405 // If the key doesn't match just skip reading the value. 406 if (!Info::EqualKey(X, iKey)) { 407 Items += item_len; 408 continue; 409 } 410 411 // The key matches! 412 return iterator(Items + L.first, L.second); 413 } 414 415 return iterator(); 416 } 417 418 iterator end() const { return iterator(); } 419 420 421 static OnDiskChainedHashTable* Create(const unsigned char* buckets, 422 const unsigned char* const base) { 423 424 assert(buckets > base); 425 assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 && 426 "buckets should be 4-byte aligned."); 427 428 unsigned numBuckets = ReadLE32(buckets); 429 unsigned numEntries = ReadLE32(buckets); 430 return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets, 431 base); 432 } 433}; 434 435//===----------------------------------------------------------------------===// 436// PTH file lookup: map from strings to file data. 437//===----------------------------------------------------------------------===// 438 439/// PTHFileLookup - This internal data structure is used by the PTHManager 440/// to map from FileEntry objects managed by FileManager to offsets within 441/// the PTH file. 442namespace { 443class VISIBILITY_HIDDEN PTHFileData { 444 const uint32_t TokenOff; 445 const uint32_t PPCondOff; 446public: 447 PTHFileData(uint32_t tokenOff, uint32_t ppCondOff) 448 : TokenOff(tokenOff), PPCondOff(ppCondOff) {} 449 450 uint32_t getTokenOffset() const { return TokenOff; } 451 uint32_t getPPCondOffset() const { return PPCondOff; } 452}; 453 454class VISIBILITY_HIDDEN PTHFileLookupTrait { 455public: 456 typedef PTHFileData data_type; 457 typedef const FileEntry* external_key_type; 458 typedef const char* internal_key_type; 459 460 static bool EqualKey(const char* a, const char* b) { 461 return strcmp(a, b) == 0; 462 } 463 464 static unsigned ComputeHash(const char* x) { 465 return BernsteinHash(x); 466 } 467 468 static const char* GetInternalKey(const FileEntry* FE) { 469 return FE->getName(); 470 } 471 472 static std::pair<unsigned, unsigned> 473 ReadKeyDataLength(const unsigned char*& d) { 474 return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U); 475 } 476 477 static const char* ReadKey(const unsigned char* d, unsigned) { 478 return (const char*) d; 479 } 480 481 static PTHFileData ReadData(const unsigned char* d, unsigned) { 482 uint32_t x = ::ReadUnalignedLE32(d); 483 uint32_t y = ::ReadUnalignedLE32(d); 484 return PTHFileData(x, y); 485 } 486}; 487 488class VISIBILITY_HIDDEN PTHStringLookupTrait { 489public: 490 typedef uint32_t 491 data_type; 492 493 typedef const std::pair<const char*, unsigned> 494 external_key_type; 495 496 typedef external_key_type internal_key_type; 497 498 static bool EqualKey(const internal_key_type& a, 499 const internal_key_type& b) { 500 return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0 501 : false; 502 } 503 504 static unsigned ComputeHash(const internal_key_type& a) { 505 return BernsteinHash(a.first, a.second); 506 } 507 508 // This hopefully will just get inlined and removed by the optimizer. 509 static const internal_key_type& 510 GetInternalKey(const external_key_type& x) { return x; } 511 512 static std::pair<unsigned, unsigned> 513 ReadKeyDataLength(const unsigned char*& d) { 514 return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t)); 515 } 516 517 static std::pair<const char*, unsigned> 518 ReadKey(const unsigned char* d, unsigned n) { 519 assert(n >= 2 && d[n-1] == '\0'); 520 return std::make_pair((const char*) d, n-1); 521 } 522 523 static uint32_t ReadData(const unsigned char* d, unsigned) { 524 return ::ReadUnalignedLE32(d); 525 } 526}; 527 528} // end anonymous namespace 529 530typedef OnDiskChainedHashTable<PTHFileLookupTrait> PTHFileLookup; 531typedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup; 532 533//===----------------------------------------------------------------------===// 534// PTHManager methods. 535//===----------------------------------------------------------------------===// 536 537PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 538 const unsigned char* idDataTable, 539 IdentifierInfo** perIDCache, 540 void* stringIdLookup, unsigned numIds, 541 const unsigned char* spellingBase) 542: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 543 IdDataTable(idDataTable), StringIdLookup(stringIdLookup), 544 NumIds(numIds), PP(0), SpellingBase(spellingBase) {} 545 546PTHManager::~PTHManager() { 547 delete Buf; 548 delete (PTHFileLookup*) FileLookup; 549 delete (PTHStringIdLookup*) StringIdLookup; 550 free(PerIDCache); 551} 552 553static void InvalidPTH(Diagnostic *Diags, const char* Msg = 0) { 554 if (!Diags) return; 555 if (!Msg) Msg = "Invalid or corrupted PTH file"; 556 unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note, Msg); 557 Diags->Report(FullSourceLoc(), DiagID); 558} 559 560PTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) { 561 // Memory map the PTH file. 562 llvm::OwningPtr<llvm::MemoryBuffer> 563 File(llvm::MemoryBuffer::getFile(file.c_str())); 564 565 if (!File) { 566 if (Diags) { 567 unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note, 568 "PTH file %0 could not be read"); 569 Diags->Report(FullSourceLoc(), DiagID) << file; 570 } 571 572 return 0; 573 } 574 575 // Get the buffer ranges and check if there are at least three 32-bit 576 // words at the end of the file. 577 const unsigned char* BufBeg = (unsigned char*)File->getBufferStart(); 578 const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd(); 579 580 // Check the prologue of the file. 581 if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) || 582 memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) { 583 InvalidPTH(Diags); 584 return 0; 585 } 586 587 // Read the PTH version. 588 const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1); 589 unsigned Version = ReadLE32(p); 590 591 if (Version != PTHManager::Version) { 592 InvalidPTH(Diags, 593 Version < PTHManager::Version 594 ? "PTH file uses an older PTH format that is no longer supported" 595 : "PTH file uses a newer PTH format that cannot be read"); 596 return 0; 597 } 598 599 // Compute the address of the index table at the end of the PTH file. 600 const unsigned char *PrologueOffset = p; 601 602 if (PrologueOffset >= BufEnd) { 603 InvalidPTH(Diags); 604 return 0; 605 } 606 607 // Construct the file lookup table. This will be used for mapping from 608 // FileEntry*'s to cached tokens. 609 const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2; 610 const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset); 611 612 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 613 InvalidPTH(Diags); 614 return 0; // FIXME: Proper error diagnostic? 615 } 616 617 llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg)); 618 if (FL->isEmpty()) { 619 InvalidPTH(Diags, "PTH file contains no cached source data"); 620 return 0; 621 } 622 623 // Get the location of the table mapping from persistent ids to the 624 // data needed to reconstruct identifiers. 625 const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0; 626 const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset); 627 628 if (!(IData >= BufBeg && IData < BufEnd)) { 629 InvalidPTH(Diags); 630 return 0; 631 } 632 633 // Get the location of the hashtable mapping between strings and 634 // persistent IDs. 635 const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1; 636 const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset); 637 if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) { 638 InvalidPTH(Diags); 639 return 0; 640 } 641 642 llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable, 643 BufBeg)); 644 if (SL->isEmpty()) { 645 InvalidPTH(Diags, "PTH file contains no identifiers."); 646 return 0; 647 } 648 649 // Get the location of the spelling cache. 650 const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3; 651 const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset); 652 if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) { 653 InvalidPTH(Diags); 654 return 0; 655 } 656 657 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 658 uint32_t NumIds = ReadLE32(IData); 659 660 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 661 // so that we in the best case only zero out memory once when the OS returns 662 // us new pages. 663 IdentifierInfo** PerIDCache = 0; 664 665 if (NumIds) { 666 PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache)); 667 if (!PerIDCache) { 668 InvalidPTH(Diags, "Could not allocate memory for processing PTH file"); 669 return 0; 670 } 671 } 672 673 // Create the new PTHManager. 674 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, 675 SL.take(), NumIds, spellingBase); 676} 677IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) { 678 // Look in the PTH file for the string data for the IdentifierInfo object. 679 const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID; 680 const unsigned char* IDData = 681 (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry); 682 assert(IDData < (const unsigned char*)Buf->getBufferEnd()); 683 684 // Allocate the object. 685 std::pair<IdentifierInfo,const unsigned char*> *Mem = 686 Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >(); 687 688 Mem->second = IDData; 689 assert(IDData[0] != '\0'); 690 IdentifierInfo *II = new ((void*) Mem) IdentifierInfo(); 691 692 // Store the new IdentifierInfo in the cache. 693 PerIDCache[PersistentID] = II; 694 assert(II->getName() && II->getName()[0] != '\0'); 695 return II; 696} 697 698IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) { 699 PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup); 700 // Double check our assumption that the last character isn't '\0'. 701 assert(NameStart[NameEnd-NameStart-1] != '\0'); 702 PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart, 703 NameEnd - NameStart)); 704 if (I == SL.end()) // No identifier found? 705 return 0; 706 707 // Match found. Return the identifier! 708 assert(*I > 0); 709 return GetIdentifierInfo(*I-1); 710} 711 712PTHLexer *PTHManager::CreateLexer(FileID FID) { 713 const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID); 714 if (!FE) 715 return 0; 716 717 // Lookup the FileEntry object in our file lookup data structure. It will 718 // return a variant that indicates whether or not there is an offset within 719 // the PTH file that contains cached tokens. 720 PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup); 721 PTHFileLookup::iterator I = PFL.find(FE); 722 723 if (I == PFL.end()) // No tokens available? 724 return 0; 725 726 const PTHFileData& FileData = *I; 727 728 const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart(); 729 // Compute the offset of the token data within the buffer. 730 const unsigned char* data = BufStart + FileData.getTokenOffset(); 731 732 // Get the location of pp-conditional table. 733 const unsigned char* ppcond = BufStart + FileData.getPPCondOffset(); 734 uint32_t Len = ReadLE32(ppcond); 735 if (Len == 0) ppcond = 0; 736 737 assert(PP && "No preprocessor set yet!"); 738 return new PTHLexer(*PP, FID, data, ppcond, *this); 739} 740