CacheTokens.cpp revision aa269c2e9c242a2fdf1f47ea400f58823ae9e395
1f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman//===--- CacheTokens.cpp - Caching of lexer tokens for PTH support --------===// 2f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// 3f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// The LLVM Compiler Infrastructure 4f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// 584e66db653835cee524fc51185ed614f1d6ac628Chris Lattner// This file is distributed under the University of Illinois Open Source 684e66db653835cee524fc51185ed614f1d6ac628Chris Lattner// License. See LICENSE.TXT for details. 7f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// 8f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman//===----------------------------------------------------------------------===// 9f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// 10f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// This provides a possible implementation of PTH support for Clang that is 11f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// based on caching lexed tokens and identifiers. 12f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman// 13f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman//===----------------------------------------------------------------------===// 14f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman 15f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang-cc.h" 16f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang/Basic/FileManager.h" 17f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang/Basic/SourceManager.h" 18f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang/Basic/IdentifierTable.h" 19f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang/Basic/Diagnostic.h" 20f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang/Lex/Lexer.h" 21f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "clang/Lex/Preprocessor.h" 22f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "llvm/ADT/StringMap.h" 23f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "llvm/Support/MemoryBuffer.h" 24f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "llvm/Support/raw_ostream.h" 25f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman#include "llvm/System/Path.h" 26ca954d25b78e887743474643e2fc35f090621098David Greene#include "llvm/Support/Compiler.h" 27ca954d25b78e887743474643e2fc35f090621098David Greene#include "llvm/Support/Streams.h" 28fdf9ee278b684165014055069f407362bf9044f3Dan Gohman 29fdf9ee278b684165014055069f407362bf9044f3Dan Gohman// FIXME: put this somewhere else? 30fdf9ee278b684165014055069f407362bf9044f3Dan Gohman#ifndef S_ISDIR 31fdf9ee278b684165014055069f407362bf9044f3Dan Gohman#define S_ISDIR(x) (((x)&_S_IFDIR)!=0) 32fdf9ee278b684165014055069f407362bf9044f3Dan Gohman#endif 33fdf9ee278b684165014055069f407362bf9044f3Dan Gohman 347569322765651f19eea0609fb082e6b267d5d2b5Owen Andersonusing namespace clang; 35f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman 36f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohmantypedef uint32_t Offset; 37f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman 38f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohmanstatic void Emit8(llvm::raw_ostream& Out, uint32_t V) { 39f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman Out << (unsigned char)(V); 40f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman} 41fdf9ee278b684165014055069f407362bf9044f3Dan Gohman 42fdf9ee278b684165014055069f407362bf9044f3Dan Gohmanstatic void Emit16(llvm::raw_ostream& Out, uint32_t V) { 43877d0a7b6530bb7991487e1e701790aa5b52d7d1Dan Gohman Out << (unsigned char)(V); 44877d0a7b6530bb7991487e1e701790aa5b52d7d1Dan Gohman Out << (unsigned char)(V >> 8); 45877d0a7b6530bb7991487e1e701790aa5b52d7d1Dan Gohman assert((V >> 16) == 0); 46877d0a7b6530bb7991487e1e701790aa5b52d7d1Dan Gohman} 47fdf9ee278b684165014055069f407362bf9044f3Dan Gohman 48fdf9ee278b684165014055069f407362bf9044f3Dan Gohmanstatic void Emit32(llvm::raw_ostream& Out, uint32_t V) { 49fdf9ee278b684165014055069f407362bf9044f3Dan Gohman Out << (unsigned char)(V); 50eeb01df9c1f16636746d1fc6eef35ea0487eeecdDan Gohman Out << (unsigned char)(V >> 8); 51eeb01df9c1f16636746d1fc6eef35ea0487eeecdDan Gohman Out << (unsigned char)(V >> 16); 52eeb01df9c1f16636746d1fc6eef35ea0487eeecdDan Gohman Out << (unsigned char)(V >> 24); 53eeb01df9c1f16636746d1fc6eef35ea0487eeecdDan Gohman} 54eeb01df9c1f16636746d1fc6eef35ea0487eeecdDan Gohman 55f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohmanstatic void Emit64(llvm::raw_ostream& Out, uint64_t V) { 56f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman Out << (unsigned char)(V); 57f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman Out << (unsigned char)(V >> 8); 58f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman Out << (unsigned char)(V >> 16); 59f17a25c88b892d30c2b41ba7ecdfbdfb2b4be9cDan Gohman Out << (unsigned char)(V >> 24); 60 Out << (unsigned char)(V >> 32); 61 Out << (unsigned char)(V >> 40); 62 Out << (unsigned char)(V >> 48); 63 Out << (unsigned char)(V >> 56); 64} 65 66static void Pad(llvm::raw_fd_ostream& Out, unsigned A) { 67 Offset off = (Offset) Out.tell(); 68 uint32_t n = ((uintptr_t)(off+A-1) & ~(uintptr_t)(A-1)) - off; 69 for (; n ; --n) 70 Emit8(Out, 0); 71} 72 73// Bernstein hash function: 74// This is basically copy-and-paste from StringMap. This likely won't 75// stay here, which is why I didn't both to expose this function from 76// String Map. 77static unsigned BernsteinHash(const char* x) { 78 unsigned int R = 0; 79 for ( ; *x != '\0' ; ++x) R = R * 33 + *x; 80 return R + (R >> 5); 81} 82 83//===----------------------------------------------------------------------===// 84// On Disk Hashtable Logic. This will eventually get refactored and put 85// elsewhere. 86//===----------------------------------------------------------------------===// 87 88template<typename Info> 89class OnDiskChainedHashTableGenerator { 90 unsigned NumBuckets; 91 unsigned NumEntries; 92 llvm::BumpPtrAllocator BA; 93 94 class Item { 95 public: 96 typename Info::key_type key; 97 typename Info::data_type data; 98 Item *next; 99 const uint32_t hash; 100 101 Item(typename Info::key_type_ref k, typename Info::data_type_ref d) 102 : key(k), data(d), next(0), hash(Info::ComputeHash(k)) {} 103 }; 104 105 class Bucket { 106 public: 107 Offset off; 108 Item* head; 109 unsigned length; 110 111 Bucket() {} 112 }; 113 114 Bucket* Buckets; 115 116private: 117 void insert(Bucket* b, size_t size, Item* E) { 118 unsigned idx = E->hash & (size - 1); 119 Bucket& B = b[idx]; 120 E->next = B.head; 121 ++B.length; 122 B.head = E; 123 } 124 125 void resize(size_t newsize) { 126 Bucket* newBuckets = (Bucket*) calloc(newsize, sizeof(Bucket)); 127 // Populate newBuckets with the old entries. 128 for (unsigned i = 0; i < NumBuckets; ++i) 129 for (Item* E = Buckets[i].head; E ; ) { 130 Item* N = E->next; 131 E->next = 0; 132 insert(newBuckets, newsize, E); 133 E = N; 134 } 135 136 free(Buckets); 137 NumBuckets = newsize; 138 Buckets = newBuckets; 139 } 140 141public: 142 143 void insert(typename Info::key_type_ref key, 144 typename Info::data_type_ref data) { 145 146 ++NumEntries; 147 if (4*NumEntries >= 3*NumBuckets) resize(NumBuckets*2); 148 insert(Buckets, NumBuckets, new (BA.Allocate<Item>()) Item(key, data)); 149 } 150 151 Offset Emit(llvm::raw_fd_ostream& out) { 152 // Emit the payload of the table. 153 for (unsigned i = 0; i < NumBuckets; ++i) { 154 Bucket& B = Buckets[i]; 155 if (!B.head) continue; 156 157 // Store the offset for the data of this bucket. 158 B.off = out.tell(); 159 160 // Write out the number of items in the bucket. 161 Emit16(out, B.length); 162 163 // Write out the entries in the bucket. 164 for (Item *I = B.head; I ; I = I->next) { 165 Emit32(out, I->hash); 166 const std::pair<unsigned, unsigned>& Len = 167 Info::EmitKeyDataLength(out, I->key, I->data); 168 Info::EmitKey(out, I->key, Len.first); 169 Info::EmitData(out, I->key, I->data, Len.second); 170 } 171 } 172 173 // Emit the hashtable itself. 174 Pad(out, 4); 175 Offset TableOff = out.tell(); 176 Emit32(out, NumBuckets); 177 Emit32(out, NumEntries); 178 for (unsigned i = 0; i < NumBuckets; ++i) Emit32(out, Buckets[i].off); 179 180 return TableOff; 181 } 182 183 OnDiskChainedHashTableGenerator() { 184 NumEntries = 0; 185 NumBuckets = 64; 186 // Note that we do not need to run the constructors of the individual 187 // Bucket objects since 'calloc' returns bytes that are all 0. 188 Buckets = (Bucket*) calloc(NumBuckets, sizeof(Bucket)); 189 } 190 191 ~OnDiskChainedHashTableGenerator() { 192 free(Buckets); 193 } 194}; 195 196//===----------------------------------------------------------------------===// 197// PTH-specific stuff. 198//===----------------------------------------------------------------------===// 199 200namespace { 201class VISIBILITY_HIDDEN PTHEntry { 202 Offset TokenData, PPCondData; 203 204public: 205 PTHEntry() {} 206 207 PTHEntry(Offset td, Offset ppcd) 208 : TokenData(td), PPCondData(ppcd) {} 209 210 Offset getTokenOffset() const { return TokenData; } 211 Offset getPPCondTableOffset() const { return PPCondData; } 212}; 213 214 215class VISIBILITY_HIDDEN PTHEntryKeyVariant { 216 union { const FileEntry* FE; const char* Path; }; 217 enum { IsFE = 0x1, IsDE = 0x2, IsNoExist = 0x0 } Kind; 218 struct stat *StatBuf; 219public: 220 PTHEntryKeyVariant(const FileEntry *fe) 221 : FE(fe), Kind(IsFE), StatBuf(0) {} 222 223 PTHEntryKeyVariant(struct stat* statbuf, const char* path) 224 : Path(path), Kind(IsDE), StatBuf(new struct stat(*statbuf)) {} 225 226 PTHEntryKeyVariant(const char* path) 227 : Path(path), Kind(IsNoExist), StatBuf(0) {} 228 229 bool isFile() const { return Kind == IsFE; } 230 231 const char* getCString() const { 232 return Kind == IsFE ? FE->getName() : Path; 233 } 234 235 unsigned getKind() const { return (unsigned) Kind; } 236 237 void EmitData(llvm::raw_ostream& Out) { 238 switch (Kind) { 239 case IsFE: 240 // Emit stat information. 241 ::Emit32(Out, FE->getInode()); 242 ::Emit32(Out, FE->getDevice()); 243 ::Emit16(Out, FE->getFileMode()); 244 ::Emit64(Out, FE->getModificationTime()); 245 ::Emit64(Out, FE->getSize()); 246 break; 247 case IsDE: 248 // Emit stat information. 249 ::Emit32(Out, (uint32_t) StatBuf->st_ino); 250 ::Emit32(Out, (uint32_t) StatBuf->st_dev); 251 ::Emit16(Out, (uint16_t) StatBuf->st_mode); 252 ::Emit64(Out, (uint64_t) StatBuf->st_mtime); 253 ::Emit64(Out, (uint64_t) StatBuf->st_size); 254 delete StatBuf; 255 break; 256 default: 257 break; 258 } 259 } 260 261 unsigned getRepresentationLength() const { 262 return Kind == IsNoExist ? 0 : 4 + 4 + 2 + 8 + 8; 263 } 264}; 265 266class VISIBILITY_HIDDEN FileEntryPTHEntryInfo { 267public: 268 typedef PTHEntryKeyVariant key_type; 269 typedef key_type key_type_ref; 270 271 typedef PTHEntry data_type; 272 typedef const PTHEntry& data_type_ref; 273 274 static unsigned ComputeHash(PTHEntryKeyVariant V) { 275 return BernsteinHash(V.getCString()); 276 } 277 278 static std::pair<unsigned,unsigned> 279 EmitKeyDataLength(llvm::raw_ostream& Out, PTHEntryKeyVariant V, 280 const PTHEntry& E) { 281 282 unsigned n = strlen(V.getCString()) + 1 + 1; 283 ::Emit16(Out, n); 284 285 unsigned m = V.getRepresentationLength() + (V.isFile() ? 4 + 4 : 0); 286 ::Emit8(Out, m); 287 288 return std::make_pair(n, m); 289 } 290 291 static void EmitKey(llvm::raw_ostream& Out, PTHEntryKeyVariant V, unsigned n){ 292 // Emit the entry kind. 293 ::Emit8(Out, (unsigned) V.getKind()); 294 // Emit the string. 295 Out.write(V.getCString(), n - 1); 296 } 297 298 static void EmitData(llvm::raw_ostream& Out, PTHEntryKeyVariant V, 299 const PTHEntry& E, unsigned) { 300 301 302 // For file entries emit the offsets into the PTH file for token data 303 // and the preprocessor blocks table. 304 if (V.isFile()) { 305 ::Emit32(Out, E.getTokenOffset()); 306 ::Emit32(Out, E.getPPCondTableOffset()); 307 } 308 309 // Emit any other data associated with the key (i.e., stat information). 310 V.EmitData(Out); 311 } 312}; 313 314class OffsetOpt { 315 bool valid; 316 Offset off; 317public: 318 OffsetOpt() : valid(false) {} 319 bool hasOffset() const { return valid; } 320 Offset getOffset() const { assert(valid); return off; } 321 void setOffset(Offset o) { off = o; valid = true; } 322}; 323} // end anonymous namespace 324 325typedef OnDiskChainedHashTableGenerator<FileEntryPTHEntryInfo> PTHMap; 326typedef llvm::DenseMap<const IdentifierInfo*,uint32_t> IDMap; 327typedef llvm::StringMap<OffsetOpt, llvm::BumpPtrAllocator> CachedStrsTy; 328 329namespace { 330class VISIBILITY_HIDDEN PTHWriter { 331 IDMap IM; 332 llvm::raw_fd_ostream& Out; 333 Preprocessor& PP; 334 uint32_t idcount; 335 PTHMap PM; 336 CachedStrsTy CachedStrs; 337 Offset CurStrOffset; 338 std::vector<llvm::StringMapEntry<OffsetOpt>*> StrEntries; 339 340 //// Get the persistent id for the given IdentifierInfo*. 341 uint32_t ResolveID(const IdentifierInfo* II); 342 343 /// Emit a token to the PTH file. 344 void EmitToken(const Token& T); 345 346 void Emit8(uint32_t V) { 347 Out << (unsigned char)(V); 348 } 349 350 void Emit16(uint32_t V) { ::Emit16(Out, V); } 351 352 void Emit24(uint32_t V) { 353 Out << (unsigned char)(V); 354 Out << (unsigned char)(V >> 8); 355 Out << (unsigned char)(V >> 16); 356 assert((V >> 24) == 0); 357 } 358 359 void Emit32(uint32_t V) { ::Emit32(Out, V); } 360 361 void EmitBuf(const char *Ptr, unsigned NumBytes) { 362 Out.write(Ptr, NumBytes); 363 } 364 365 /// EmitIdentifierTable - Emits two tables to the PTH file. The first is 366 /// a hashtable mapping from identifier strings to persistent IDs. 367 /// The second is a straight table mapping from persistent IDs to string data 368 /// (the keys of the first table). 369 std::pair<Offset, Offset> EmitIdentifierTable(); 370 371 /// EmitFileTable - Emit a table mapping from file name strings to PTH 372 /// token data. 373 Offset EmitFileTable() { return PM.Emit(Out); } 374 375 PTHEntry LexTokens(Lexer& L); 376 Offset EmitCachedSpellings(); 377 378public: 379 PTHWriter(llvm::raw_fd_ostream& out, Preprocessor& pp) 380 : Out(out), PP(pp), idcount(0), CurStrOffset(0) {} 381 382 PTHMap &getPM() { return PM; } 383 void GeneratePTH(const std::string *MainFile = 0); 384}; 385} // end anonymous namespace 386 387uint32_t PTHWriter::ResolveID(const IdentifierInfo* II) { 388 // Null IdentifierInfo's map to the persistent ID 0. 389 if (!II) 390 return 0; 391 392 IDMap::iterator I = IM.find(II); 393 if (I != IM.end()) 394 return I->second; // We've already added 1. 395 396 IM[II] = ++idcount; // Pre-increment since '0' is reserved for NULL. 397 return idcount; 398} 399 400void PTHWriter::EmitToken(const Token& T) { 401 // Emit the token kind, flags, and length. 402 Emit32(((uint32_t) T.getKind()) | ((((uint32_t) T.getFlags())) << 8)| 403 (((uint32_t) T.getLength()) << 16)); 404 405 if (!T.isLiteral()) { 406 Emit32(ResolveID(T.getIdentifierInfo())); 407 } else { 408 // We cache *un-cleaned* spellings. This gives us 100% fidelity with the 409 // source code. 410 const char* s = T.getLiteralData(); 411 unsigned len = T.getLength(); 412 413 // Get the string entry. 414 llvm::StringMapEntry<OffsetOpt> *E = &CachedStrs.GetOrCreateValue(s, s+len); 415 416 // If this is a new string entry, bump the PTH offset. 417 if (!E->getValue().hasOffset()) { 418 E->getValue().setOffset(CurStrOffset); 419 StrEntries.push_back(E); 420 CurStrOffset += len + 1; 421 } 422 423 // Emit the relative offset into the PTH file for the spelling string. 424 Emit32(E->getValue().getOffset()); 425 } 426 427 // Emit the offset into the original source file of this token so that we 428 // can reconstruct its SourceLocation. 429 Emit32(PP.getSourceManager().getFileOffset(T.getLocation())); 430} 431 432PTHEntry PTHWriter::LexTokens(Lexer& L) { 433 // Pad 0's so that we emit tokens to a 4-byte alignment. 434 // This speed up reading them back in. 435 Pad(Out, 4); 436 Offset off = (Offset) Out.tell(); 437 438 // Keep track of matching '#if' ... '#endif'. 439 typedef std::vector<std::pair<Offset, unsigned> > PPCondTable; 440 PPCondTable PPCond; 441 std::vector<unsigned> PPStartCond; 442 bool ParsingPreprocessorDirective = false; 443 Token Tok; 444 445 do { 446 L.LexFromRawLexer(Tok); 447 NextToken: 448 449 if ((Tok.isAtStartOfLine() || Tok.is(tok::eof)) && 450 ParsingPreprocessorDirective) { 451 // Insert an eom token into the token cache. It has the same 452 // position as the next token that is not on the same line as the 453 // preprocessor directive. Observe that we continue processing 454 // 'Tok' when we exit this branch. 455 Token Tmp = Tok; 456 Tmp.setKind(tok::eom); 457 Tmp.clearFlag(Token::StartOfLine); 458 Tmp.setIdentifierInfo(0); 459 EmitToken(Tmp); 460 ParsingPreprocessorDirective = false; 461 } 462 463 if (Tok.is(tok::identifier)) { 464 Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); 465 EmitToken(Tok); 466 continue; 467 } 468 469 if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) { 470 // Special processing for #include. Store the '#' token and lex 471 // the next token. 472 assert(!ParsingPreprocessorDirective); 473 Offset HashOff = (Offset) Out.tell(); 474 EmitToken(Tok); 475 476 // Get the next token. 477 L.LexFromRawLexer(Tok); 478 479 // If we see the start of line, then we had a null directive "#". 480 if (Tok.isAtStartOfLine()) 481 goto NextToken; 482 483 // Did we see 'include'/'import'/'include_next'? 484 if (Tok.isNot(tok::identifier)) { 485 EmitToken(Tok); 486 continue; 487 } 488 489 IdentifierInfo* II = PP.LookUpIdentifierInfo(Tok); 490 Tok.setIdentifierInfo(II); 491 tok::PPKeywordKind K = II->getPPKeywordID(); 492 493 ParsingPreprocessorDirective = true; 494 495 switch (K) { 496 case tok::pp_not_keyword: 497 // Invalid directives "#foo" can occur in #if 0 blocks etc, just pass 498 // them through. 499 default: 500 break; 501 502 case tok::pp_include: 503 case tok::pp_import: 504 case tok::pp_include_next: { 505 // Save the 'include' token. 506 EmitToken(Tok); 507 // Lex the next token as an include string. 508 L.setParsingPreprocessorDirective(true); 509 L.LexIncludeFilename(Tok); 510 L.setParsingPreprocessorDirective(false); 511 assert(!Tok.isAtStartOfLine()); 512 if (Tok.is(tok::identifier)) 513 Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); 514 515 break; 516 } 517 case tok::pp_if: 518 case tok::pp_ifdef: 519 case tok::pp_ifndef: { 520 // Add an entry for '#if' and friends. We initially set the target 521 // index to 0. This will get backpatched when we hit #endif. 522 PPStartCond.push_back(PPCond.size()); 523 PPCond.push_back(std::make_pair(HashOff, 0U)); 524 break; 525 } 526 case tok::pp_endif: { 527 // Add an entry for '#endif'. We set the target table index to itself. 528 // This will later be set to zero when emitting to the PTH file. We 529 // use 0 for uninitialized indices because that is easier to debug. 530 unsigned index = PPCond.size(); 531 // Backpatch the opening '#if' entry. 532 assert(!PPStartCond.empty()); 533 assert(PPCond.size() > PPStartCond.back()); 534 assert(PPCond[PPStartCond.back()].second == 0); 535 PPCond[PPStartCond.back()].second = index; 536 PPStartCond.pop_back(); 537 // Add the new entry to PPCond. 538 PPCond.push_back(std::make_pair(HashOff, index)); 539 EmitToken(Tok); 540 541 // Some files have gibberish on the same line as '#endif'. 542 // Discard these tokens. 543 do 544 L.LexFromRawLexer(Tok); 545 while (Tok.isNot(tok::eof) && !Tok.isAtStartOfLine()); 546 // We have the next token in hand. 547 // Don't immediately lex the next one. 548 goto NextToken; 549 } 550 case tok::pp_elif: 551 case tok::pp_else: { 552 // Add an entry for #elif or #else. 553 // This serves as both a closing and opening of a conditional block. 554 // This means that its entry will get backpatched later. 555 unsigned index = PPCond.size(); 556 // Backpatch the previous '#if' entry. 557 assert(!PPStartCond.empty()); 558 assert(PPCond.size() > PPStartCond.back()); 559 assert(PPCond[PPStartCond.back()].second == 0); 560 PPCond[PPStartCond.back()].second = index; 561 PPStartCond.pop_back(); 562 // Now add '#elif' as a new block opening. 563 PPCond.push_back(std::make_pair(HashOff, 0U)); 564 PPStartCond.push_back(index); 565 break; 566 } 567 } 568 } 569 570 EmitToken(Tok); 571 } 572 while (Tok.isNot(tok::eof)); 573 574 assert(PPStartCond.empty() && "Error: imblanced preprocessor conditionals."); 575 576 // Next write out PPCond. 577 Offset PPCondOff = (Offset) Out.tell(); 578 579 // Write out the size of PPCond so that clients can identifer empty tables. 580 Emit32(PPCond.size()); 581 582 for (unsigned i = 0, e = PPCond.size(); i!=e; ++i) { 583 Emit32(PPCond[i].first - off); 584 uint32_t x = PPCond[i].second; 585 assert(x != 0 && "PPCond entry not backpatched."); 586 // Emit zero for #endifs. This allows us to do checking when 587 // we read the PTH file back in. 588 Emit32(x == i ? 0 : x); 589 } 590 591 return PTHEntry(off, PPCondOff); 592} 593 594Offset PTHWriter::EmitCachedSpellings() { 595 // Write each cached strings to the PTH file. 596 Offset SpellingsOff = Out.tell(); 597 598 for (std::vector<llvm::StringMapEntry<OffsetOpt>*>::iterator 599 I = StrEntries.begin(), E = StrEntries.end(); I!=E; ++I) 600 EmitBuf((*I)->getKeyData(), (*I)->getKeyLength()+1 /*nul included*/); 601 602 return SpellingsOff; 603} 604 605void PTHWriter::GeneratePTH(const std::string *MainFile) { 606 // Generate the prologue. 607 Out << "cfe-pth"; 608 Emit32(PTHManager::Version); 609 610 // Leave 4 words for the prologue. 611 Offset PrologueOffset = Out.tell(); 612 for (unsigned i = 0; i < 4; ++i) 613 Emit32(0); 614 615 // Write the name of the MainFile. 616 if (MainFile && !MainFile->empty()) { 617 Emit16(MainFile->length()); 618 EmitBuf(MainFile->data(), MainFile->length()); 619 } else { 620 // String with 0 bytes. 621 Emit16(0); 622 } 623 Emit8(0); 624 625 // Iterate over all the files in SourceManager. Create a lexer 626 // for each file and cache the tokens. 627 SourceManager &SM = PP.getSourceManager(); 628 const LangOptions &LOpts = PP.getLangOptions(); 629 630 for (SourceManager::fileinfo_iterator I = SM.fileinfo_begin(), 631 E = SM.fileinfo_end(); I != E; ++I) { 632 const SrcMgr::ContentCache &C = *I->second; 633 const FileEntry *FE = C.Entry; 634 635 // FIXME: Handle files with non-absolute paths. 636 llvm::sys::Path P(FE->getName()); 637 if (!P.isAbsolute()) 638 continue; 639 640 const llvm::MemoryBuffer *B = C.getBuffer(); 641 if (!B) continue; 642 643 FileID FID = SM.createFileID(FE, SourceLocation(), SrcMgr::C_User); 644 Lexer L(FID, SM, LOpts); 645 PM.insert(FE, LexTokens(L)); 646 } 647 648 // Write out the identifier table. 649 const std::pair<Offset,Offset> &IdTableOff = EmitIdentifierTable(); 650 651 // Write out the cached strings table. 652 Offset SpellingOff = EmitCachedSpellings(); 653 654 // Write out the file table. 655 Offset FileTableOff = EmitFileTable(); 656 657 // Finally, write the prologue. 658 Out.seek(PrologueOffset); 659 Emit32(IdTableOff.first); 660 Emit32(IdTableOff.second); 661 Emit32(FileTableOff); 662 Emit32(SpellingOff); 663} 664 665namespace { 666/// StatListener - A simple "interpose" object used to monitor stat calls 667/// invoked by FileManager while processing the original sources used 668/// as input to PTH generation. StatListener populates the PTHWriter's 669/// file map with stat information for directories as well as negative stats. 670/// Stat information for files are populated elsewhere. 671class StatListener : public StatSysCallCache { 672 PTHMap &PM; 673public: 674 StatListener(PTHMap &pm) : PM(pm) {} 675 ~StatListener() {} 676 677 int stat(const char *path, struct stat *buf) { 678 int result = ::stat(path, buf); 679 680 if (result != 0) // Failed 'stat'. 681 PM.insert(path, PTHEntry()); 682 else if (S_ISDIR(buf->st_mode)) { 683 // Only cache directories with absolute paths. 684 if (!llvm::sys::Path(path).isAbsolute()) 685 return result; 686 687 PM.insert(PTHEntryKeyVariant(buf, path), PTHEntry()); 688 } 689 690 return result; 691 } 692}; 693} // end anonymous namespace 694 695 696void clang::CacheTokens(Preprocessor &PP, const std::string &OutFile) { 697 // Open up the PTH file. 698 std::string ErrMsg; 699 llvm::raw_fd_ostream Out(OutFile.c_str(), true, ErrMsg); 700 701 if (!ErrMsg.empty()) { 702 llvm::errs() << "PTH error: " << ErrMsg << "\n"; 703 return; 704 } 705 706 // Get the name of the main file. 707 const SourceManager &SrcMgr = PP.getSourceManager(); 708 const FileEntry *MainFile = SrcMgr.getFileEntryForID(SrcMgr.getMainFileID()); 709 llvm::sys::Path MainFilePath(MainFile->getName()); 710 std::string MainFileName; 711 712 if (!MainFilePath.isAbsolute()) { 713 llvm::sys::Path P = llvm::sys::Path::GetCurrentDirectory(); 714 P.appendComponent(MainFilePath.toString()); 715 MainFileName = P.toString(); 716 } else { 717 MainFileName = MainFilePath.toString(); 718 } 719 720 // Create the PTHWriter. 721 PTHWriter PW(Out, PP); 722 723 // Install the 'stat' system call listener in the FileManager. 724 PP.getFileManager().setStatCache(new StatListener(PW.getPM())); 725 726 // Lex through the entire file. This will populate SourceManager with 727 // all of the header information. 728 Token Tok; 729 PP.EnterMainSourceFile(); 730 do { PP.Lex(Tok); } while (Tok.isNot(tok::eof)); 731 732 // Generate the PTH file. 733 PP.getFileManager().setStatCache(0); 734 PW.GeneratePTH(&MainFileName); 735} 736 737//===----------------------------------------------------------------------===// 738 739class PTHIdKey { 740public: 741 const IdentifierInfo* II; 742 uint32_t FileOffset; 743}; 744 745namespace { 746class VISIBILITY_HIDDEN PTHIdentifierTableTrait { 747public: 748 typedef PTHIdKey* key_type; 749 typedef key_type key_type_ref; 750 751 typedef uint32_t data_type; 752 typedef data_type data_type_ref; 753 754 static unsigned ComputeHash(PTHIdKey* key) { 755 return BernsteinHash(key->II->getName()); 756 } 757 758 static std::pair<unsigned,unsigned> 759 EmitKeyDataLength(llvm::raw_ostream& Out, const PTHIdKey* key, uint32_t) { 760 unsigned n = strlen(key->II->getName()) + 1; 761 ::Emit16(Out, n); 762 return std::make_pair(n, sizeof(uint32_t)); 763 } 764 765 static void EmitKey(llvm::raw_fd_ostream& Out, PTHIdKey* key, unsigned n) { 766 // Record the location of the key data. This is used when generating 767 // the mapping from persistent IDs to strings. 768 key->FileOffset = Out.tell(); 769 Out.write(key->II->getName(), n); 770 } 771 772 static void EmitData(llvm::raw_ostream& Out, PTHIdKey*, uint32_t pID, 773 unsigned) { 774 ::Emit32(Out, pID); 775 } 776}; 777} // end anonymous namespace 778 779/// EmitIdentifierTable - Emits two tables to the PTH file. The first is 780/// a hashtable mapping from identifier strings to persistent IDs. The second 781/// is a straight table mapping from persistent IDs to string data (the 782/// keys of the first table). 783/// 784std::pair<Offset,Offset> PTHWriter::EmitIdentifierTable() { 785 // Build two maps: 786 // (1) an inverse map from persistent IDs -> (IdentifierInfo*,Offset) 787 // (2) a map from (IdentifierInfo*, Offset)* -> persistent IDs 788 789 // Note that we use 'calloc', so all the bytes are 0. 790 PTHIdKey *IIDMap = (PTHIdKey*)calloc(idcount, sizeof(PTHIdKey)); 791 792 // Create the hashtable. 793 OnDiskChainedHashTableGenerator<PTHIdentifierTableTrait> IIOffMap; 794 795 // Generate mapping from persistent IDs -> IdentifierInfo*. 796 for (IDMap::iterator I = IM.begin(), E = IM.end(); I != E; ++I) { 797 // Decrement by 1 because we are using a vector for the lookup and 798 // 0 is reserved for NULL. 799 assert(I->second > 0); 800 assert(I->second-1 < idcount); 801 unsigned idx = I->second-1; 802 803 // Store the mapping from persistent ID to IdentifierInfo* 804 IIDMap[idx].II = I->first; 805 806 // Store the reverse mapping in a hashtable. 807 IIOffMap.insert(&IIDMap[idx], I->second); 808 } 809 810 // Write out the inverse map first. This causes the PCIDKey entries to 811 // record PTH file offsets for the string data. This is used to write 812 // the second table. 813 Offset StringTableOffset = IIOffMap.Emit(Out); 814 815 // Now emit the table mapping from persistent IDs to PTH file offsets. 816 Offset IDOff = Out.tell(); 817 Emit32(idcount); // Emit the number of identifiers. 818 for (unsigned i = 0 ; i < idcount; ++i) 819 Emit32(IIDMap[i].FileOffset); 820 821 // Finally, release the inverse map. 822 free(IIDMap); 823 824 return std::make_pair(IDOff, StringTableOffset); 825} 826