CacheTokens.cpp revision 0d0bf8cf58b35302312cc155287fde3e81eb25a7
146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner//===--- CacheTokens.cpp - Caching of lexer tokens for PCH support --------===// 246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// 346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// The LLVM Compiler Infrastructure 446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// 546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// This file is distributed under the University of Illinois Open Source 646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// License. See LICENSE.TXT for details. 746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// 846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner//===----------------------------------------------------------------------===// 946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// 1046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// This provides a possible implementation of PCH support for Clang that is 1146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// based on caching lexed tokens and identifiers. 1246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner// 1346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner//===----------------------------------------------------------------------===// 1446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 1546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang.h" 1646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang/Basic/FileManager.h" 1746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang/Basic/SourceManager.h" 1846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang/Basic/IdentifierTable.h" 1946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang/Basic/Diagnostic.h" 2046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang/Lex/Lexer.h" 2146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "clang/Lex/Preprocessor.h" 2246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "llvm/ADT/StringMap.h" 2346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "llvm/Support/MemoryBuffer.h" 2446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "llvm/Support/raw_ostream.h" 2546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "llvm/System/Path.h" 2646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "llvm/Support/Compiler.h" 2746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner#include "llvm/Support/Streams.h" 2846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 2946be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerusing namespace clang; 3046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 3146be48730333120a7b939116cef075e61c12c703David 'Digit' Turnertypedef uint32_t Offset; 3246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 3346be48730333120a7b939116cef075e61c12c703David 'Digit' Turnernamespace { 3446be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerclass VISIBILITY_HIDDEN PCHEntry { 3546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset TokenData, PPCondData; 3646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 3746be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerpublic: 3846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner PCHEntry() {} 3946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 4046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner PCHEntry(Offset td, Offset ppcd) 4146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner : TokenData(td), PPCondData(ppcd) {} 4246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 4346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset getTokenOffset() const { return TokenData; } 4446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset getPPCondTableOffset() const { return PPCondData; } 4546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}; 4646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 4746be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerclass OffsetOpt { 4846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner bool valid; 4946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset off; 5046be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerpublic: 5146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner OffsetOpt() : valid(false) {} 5246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner bool hasOffset() const { return valid; } 5346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset getOffset() const { assert(valid); return off; } 5446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void setOffset(Offset o) { off = o; valid = true; } 5546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}; 5646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} // end anonymous namespace 5746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 5846be48730333120a7b939116cef075e61c12c703David 'Digit' Turnertypedef llvm::DenseMap<const FileEntry*, PCHEntry> PCHMap; 5946be48730333120a7b939116cef075e61c12c703David 'Digit' Turnertypedef llvm::DenseMap<const IdentifierInfo*,uint32_t> IDMap; 6046be48730333120a7b939116cef075e61c12c703David 'Digit' Turnertypedef llvm::StringMap<OffsetOpt, llvm::BumpPtrAllocator> CachedStrsTy; 6146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 6246be48730333120a7b939116cef075e61c12c703David 'Digit' Turnernamespace { 6346be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerclass VISIBILITY_HIDDEN PTHWriter { 6446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IDMap IM; 6546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner llvm::raw_fd_ostream& Out; 6646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Preprocessor& PP; 6746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner uint32_t idcount; 6846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner PCHMap PM; 6946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner CachedStrsTy CachedStrs; 7046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset CurStrOffset; 7146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner std::vector<llvm::StringMapEntry<OffsetOpt>*> StrEntries; 7246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 7346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner //// Get the persistent id for the given IdentifierInfo*. 7446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner uint32_t ResolveID(const IdentifierInfo* II); 7546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 7646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner /// Emit a token to the PTH file. 7746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void EmitToken(const Token& T); 7846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 7946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void Emit8(uint32_t V) { 8046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V); 8146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 8246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 8346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void Emit16(uint32_t V) { 8446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V); 8546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V >> 8); 8646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner assert((V >> 16) == 0); 8746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 8846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 8946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void Emit24(uint32_t V) { 9046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V); 9146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V >> 8); 9246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V >> 16); 9346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner assert((V >> 24) == 0); 9446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 9546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 9646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void Emit32(uint32_t V) { 9746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V); 9846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V >> 8); 9946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V >> 16); 10046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Out << (unsigned char)(V >> 24); 10146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 10246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 10346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void EmitBuf(const char* I, const char* E) { 10446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for ( ; I != E ; ++I) Out << *I; 10546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 10646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 10746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner std::pair<Offset,std::pair<Offset, Offset> > EmitIdentifierTable(); 10846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset EmitFileTable(); 10946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner PCHEntry LexTokens(Lexer& L); 11046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset EmitCachedSpellings(); 11146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 11246be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerpublic: 11346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner PTHWriter(llvm::raw_fd_ostream& out, Preprocessor& pp) 11446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner : Out(out), PP(pp), idcount(0), CurStrOffset(0) {} 11546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 11646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner void GeneratePTH(); 11746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}; 11846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} // end anonymous namespace 11946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 12046be48730333120a7b939116cef075e61c12c703David 'Digit' Turneruint32_t PTHWriter::ResolveID(const IdentifierInfo* II) { 12146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Null IdentifierInfo's map to the persistent ID 0. 12246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (!II) 12346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return 0; 12446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 12546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IDMap::iterator I = IM.find(II); 12646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 12746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (I == IM.end()) { 12846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IM[II] = ++idcount; // Pre-increment since '0' is reserved for NULL. 12946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return idcount; 13046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 13146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 13246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return I->second; // We've already added 1. 13346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 13446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 13546be48730333120a7b939116cef075e61c12c703David 'Digit' Turnervoid PTHWriter::EmitToken(const Token& T) { 13646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(((uint32_t) T.getKind()) | 13746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (((uint32_t) T.getFlags()) << 8) | 13846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner (((uint32_t) T.getLength()) << 16)); 13946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 14046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Literals (strings, numbers, characters) get cached spellings. 14146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (T.isLiteral()) { 14246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // FIXME: This uses the slow getSpelling(). Perhaps we do better 14346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // in the future? This only slows down PTH generation. 14446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const std::string &spelling = PP.getSpelling(T); 14546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const char* s = spelling.c_str(); 14646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 14746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Get the string entry. 14846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner llvm::StringMapEntry<OffsetOpt> *E = 14946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner &CachedStrs.GetOrCreateValue(s, s+spelling.size()); 15046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 15146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (!E->getValue().hasOffset()) { 15246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner E->getValue().setOffset(CurStrOffset); 15346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner StrEntries.push_back(E); 15446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner CurStrOffset += spelling.size() + 1; 15546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 15646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 15746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(E->getValue().getOffset()); 15846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 15946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner else 16046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(ResolveID(T.getIdentifierInfo())); 16146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 16246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(PP.getSourceManager().getFileOffset(T.getLocation())); 16346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 16446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 16546be48730333120a7b939116cef075e61c12c703David 'Digit' Turnernamespace { 16646be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerstruct VISIBILITY_HIDDEN IDData { 16746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const IdentifierInfo* II; 16846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner uint32_t FileOffset; 16946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}; 17046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 17146be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerclass VISIBILITY_HIDDEN CompareIDDataIndex { 17246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IDData* Table; 17346be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerpublic: 17446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner CompareIDDataIndex(IDData* table) : Table(table) {} 17546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 17646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner bool operator()(unsigned i, unsigned j) const { 17746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const IdentifierInfo* II_i = Table[i].II; 17846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const IdentifierInfo* II_j = Table[j].II; 17946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 18046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner unsigned i_len = II_i->getLength(); 18146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner unsigned j_len = II_j->getLength(); 18246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 18346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (i_len > j_len) 18446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return false; 18546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 18646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner if (i_len < j_len) 18746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return true; 18846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 18946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Otherwise, compare the strings themselves! 19046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return strncmp(II_i->getName(), II_j->getName(), i_len) < 0; 19146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 19246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner}; 19346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 19446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 19546be48730333120a7b939116cef075e61c12c703David 'Digit' Turnerstd::pair<Offset,std::pair<Offset,Offset> > 19646be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerPTHWriter::EmitIdentifierTable() { 19746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner llvm::BumpPtrAllocator Alloc; 19846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 19946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Build an inverse map from persistent IDs -> IdentifierInfo*. 20046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IDData* IIDMap = Alloc.Allocate<IDData>(idcount); 20146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 20246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Generate mapping from persistent IDs -> IdentifierInfo*. 20346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for (IDMap::iterator I=IM.begin(), E=IM.end(); I!=E; ++I) { 20446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Decrement by 1 because we are using a vector for the lookup and 20546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // 0 is reserved for NULL. 20646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner assert(I->second > 0); 20746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner assert(I->second-1 < idcount); 20846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner unsigned idx = I->second-1; 20946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IIDMap[idx].II = I->first; 21046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 21146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 21246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // We want to write out the strings in lexical order to support binary 21346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // search of strings to identifiers. Create such a table. 21446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner unsigned *LexicalOrder = Alloc.Allocate<unsigned>(idcount); 21546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for (unsigned i = 0; i < idcount ; ++i ) LexicalOrder[i] = i; 21646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner std::sort(LexicalOrder, LexicalOrder+idcount, CompareIDDataIndex(IIDMap)); 21746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 21846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Write out the lexically-sorted table of persistent ids. 21946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset LexicalOff = Out.tell(); 22046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for (unsigned i = 0; i < idcount ; ++i) Emit32(LexicalOrder[i]); 22146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 22246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Write out the string data itself. 22346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset DataOff = Out.tell(); 22446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 22546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for (unsigned i = 0; i < idcount; ++i) { 22646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner IDData& d = IIDMap[i]; 22746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner d.FileOffset = Out.tell(); // Record the location for this data. 22846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner unsigned len = d.II->getLength(); // Write out the string length. 22946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(len); 23046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const char* buf = d.II->getName(); // Write out the string data. 23146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner EmitBuf(buf, buf+len); 23246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Emit a null character for those clients expecting that IdentifierInfo 23346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // strings are null terminated. 23446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit8('\0'); 23546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 23646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 23746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Now emit the table mapping from persistent IDs to PTH file offsets. 23846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset IDOff = Out.tell(); 23946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(idcount); // Emit the number of identifiers. 24046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for (unsigned i = 0 ; i < idcount; ++i) Emit32(IIDMap[i].FileOffset); 24146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 24246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return std::make_pair(DataOff, std::make_pair(IDOff, LexicalOff)); 24346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 24446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 24546be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerOffset PTHWriter::EmitFileTable() { 24646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Determine the offset where this table appears in the PTH file. 24746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Offset off = (Offset) Out.tell(); 24846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 24946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner // Output the size of the table. 25046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(PM.size()); 25146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 25246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner for (PCHMap::iterator I=PM.begin(), E=PM.end(); I!=E; ++I) { 25346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const FileEntry* FE = I->first; 25446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner const char* Name = FE->getName(); 25546be48730333120a7b939116cef075e61c12c703David 'Digit' Turner unsigned size = strlen(Name); 25646be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(size); 25746be48730333120a7b939116cef075e61c12c703David 'Digit' Turner EmitBuf(Name, Name+size); 25846be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(I->second.getTokenOffset()); 25946be48730333120a7b939116cef075e61c12c703David 'Digit' Turner Emit32(I->second.getPPCondTableOffset()); 26046be48730333120a7b939116cef075e61c12c703David 'Digit' Turner } 26146be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 26246be48730333120a7b939116cef075e61c12c703David 'Digit' Turner return off; 26346be48730333120a7b939116cef075e61c12c703David 'Digit' Turner} 26446be48730333120a7b939116cef075e61c12c703David 'Digit' Turner 26546be48730333120a7b939116cef075e61c12c703David 'Digit' TurnerPCHEntry PTHWriter::LexTokens(Lexer& L) { 266 // Pad 0's so that we emit tokens to a 4-byte alignment. 267 // This speed up reading them back in. 268 Offset off = (Offset) Out.tell(); 269 for (unsigned Pad = off % 4 ; Pad != 0 ; --Pad, ++off) Emit8(0); 270 271 // Keep track of matching '#if' ... '#endif'. 272 typedef std::vector<std::pair<Offset, unsigned> > PPCondTable; 273 PPCondTable PPCond; 274 std::vector<unsigned> PPStartCond; 275 bool ParsingPreprocessorDirective = false; 276 Token Tok; 277 278 do { 279 L.LexFromRawLexer(Tok); 280 281 if ((Tok.isAtStartOfLine() || Tok.is(tok::eof)) && 282 ParsingPreprocessorDirective) { 283 // Insert an eom token into the token cache. It has the same 284 // position as the next token that is not on the same line as the 285 // preprocessor directive. Observe that we continue processing 286 // 'Tok' when we exit this branch. 287 Token Tmp = Tok; 288 Tmp.setKind(tok::eom); 289 Tmp.clearFlag(Token::StartOfLine); 290 Tmp.setIdentifierInfo(0); 291 EmitToken(Tmp); 292 ParsingPreprocessorDirective = false; 293 } 294 295 if (Tok.is(tok::identifier)) { 296 Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); 297 continue; 298 } 299 300 if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) { 301 // Special processing for #include. Store the '#' token and lex 302 // the next token. 303 assert(!ParsingPreprocessorDirective); 304 Offset HashOff = (Offset) Out.tell(); 305 EmitToken(Tok); 306 307 // Get the next token. 308 L.LexFromRawLexer(Tok); 309 310 assert(!Tok.isAtStartOfLine()); 311 312 // Did we see 'include'/'import'/'include_next'? 313 if (!Tok.is(tok::identifier)) 314 continue; 315 316 IdentifierInfo* II = PP.LookUpIdentifierInfo(Tok); 317 Tok.setIdentifierInfo(II); 318 tok::PPKeywordKind K = II->getPPKeywordID(); 319 320 assert(K != tok::pp_not_keyword); 321 ParsingPreprocessorDirective = true; 322 323 switch (K) { 324 default: 325 break; 326 case tok::pp_include: 327 case tok::pp_import: 328 case tok::pp_include_next: { 329 // Save the 'include' token. 330 EmitToken(Tok); 331 // Lex the next token as an include string. 332 L.setParsingPreprocessorDirective(true); 333 L.LexIncludeFilename(Tok); 334 L.setParsingPreprocessorDirective(false); 335 assert(!Tok.isAtStartOfLine()); 336 if (Tok.is(tok::identifier)) 337 Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); 338 339 break; 340 } 341 case tok::pp_if: 342 case tok::pp_ifdef: 343 case tok::pp_ifndef: { 344 // Ad an entry for '#if' and friends. We initially set the target index 345 // to 0. This will get backpatched when we hit #endif. 346 PPStartCond.push_back(PPCond.size()); 347 PPCond.push_back(std::make_pair(HashOff, 0U)); 348 break; 349 } 350 case tok::pp_endif: { 351 // Add an entry for '#endif'. We set the target table index to itself. 352 // This will later be set to zero when emitting to the PTH file. We 353 // use 0 for uninitialized indices because that is easier to debug. 354 unsigned index = PPCond.size(); 355 // Backpatch the opening '#if' entry. 356 assert(!PPStartCond.empty()); 357 assert(PPCond.size() > PPStartCond.back()); 358 assert(PPCond[PPStartCond.back()].second == 0); 359 PPCond[PPStartCond.back()].second = index; 360 PPStartCond.pop_back(); 361 // Add the new entry to PPCond. 362 PPCond.push_back(std::make_pair(HashOff, index)); 363 break; 364 } 365 case tok::pp_elif: 366 case tok::pp_else: { 367 // Add an entry for #elif or #else. 368 // This serves as both a closing and opening of a conditional block. 369 // This means that its entry will get backpatched later. 370 unsigned index = PPCond.size(); 371 // Backpatch the previous '#if' entry. 372 assert(!PPStartCond.empty()); 373 assert(PPCond.size() > PPStartCond.back()); 374 assert(PPCond[PPStartCond.back()].second == 0); 375 PPCond[PPStartCond.back()].second = index; 376 PPStartCond.pop_back(); 377 // Now add '#elif' as a new block opening. 378 PPCond.push_back(std::make_pair(HashOff, 0U)); 379 PPStartCond.push_back(index); 380 break; 381 } 382 } 383 } 384 } 385 while (EmitToken(Tok), Tok.isNot(tok::eof)); 386 387 assert(PPStartCond.empty() && "Error: imblanced preprocessor conditionals."); 388 389 // Next write out PPCond. 390 Offset PPCondOff = (Offset) Out.tell(); 391 392 // Write out the size of PPCond so that clients can identifer empty tables. 393 Emit32(PPCond.size()); 394 395 for (unsigned i = 0, e = PPCond.size(); i!=e; ++i) { 396 Emit32(PPCond[i].first - off); 397 uint32_t x = PPCond[i].second; 398 assert(x != 0 && "PPCond entry not backpatched."); 399 // Emit zero for #endifs. This allows us to do checking when 400 // we read the PTH file back in. 401 Emit32(x == i ? 0 : x); 402 } 403 404 return PCHEntry(off, PPCondOff); 405} 406 407Offset PTHWriter::EmitCachedSpellings() { 408 // Write each cached strings to the PTH file. 409 Offset SpellingsOff = Out.tell(); 410 411 for (std::vector<llvm::StringMapEntry<OffsetOpt>*>::iterator 412 I = StrEntries.begin(), E = StrEntries.end(); I!=E; ++I) { 413 414 const char* data = (*I)->getKeyData(); 415 EmitBuf(data, data + (*I)->getKeyLength()); 416 Emit8('\0'); 417 } 418 419 return SpellingsOff; 420} 421 422void PTHWriter::GeneratePTH() { 423 // Generate the prologue. 424 Out << "cfe-pth"; 425 Emit32(PTHManager::Version); 426 Offset JumpOffset = Out.tell(); 427 Emit32(0); 428 429 // Iterate over all the files in SourceManager. Create a lexer 430 // for each file and cache the tokens. 431 SourceManager &SM = PP.getSourceManager(); 432 const LangOptions &LOpts = PP.getLangOptions(); 433 434 for (SourceManager::fileinfo_iterator I = SM.fileinfo_begin(), 435 E = SM.fileinfo_end(); I != E; ++I) { 436 const SrcMgr::ContentCache &C = *I->second; 437 const FileEntry *FE = C.Entry; 438 439 // FIXME: Handle files with non-absolute paths. 440 llvm::sys::Path P(FE->getName()); 441 if (!P.isAbsolute()) 442 continue; 443 444 assert(!PM.count(FE) && "fileinfo's are not uniqued on FileEntry?"); 445 446 const llvm::MemoryBuffer *B = C.getBuffer(); 447 if (!B) continue; 448 449 FileID FID = SM.createFileID(FE, SourceLocation(), SrcMgr::C_User); 450 Lexer L(FID, SM, LOpts); 451 PM[FE] = LexTokens(L); 452 } 453 454 // Write out the identifier table. 455 const std::pair<Offset, std::pair<Offset,Offset> >& IdTableOff 456 = EmitIdentifierTable(); 457 458 // Write out the cached strings table. 459 Offset SpellingOff = EmitCachedSpellings(); 460 461 // Write out the file table. 462 Offset FileTableOff = EmitFileTable(); 463 464 // Finally, write out the offset table at the end. 465 Offset JumpTargetOffset = Out.tell(); 466 Emit32(IdTableOff.first); 467 Emit32(IdTableOff.second.first); 468 Emit32(IdTableOff.second.second); 469 Emit32(FileTableOff); 470 Emit32(SpellingOff); 471 472 // Now write the offset in the prologue. 473 Out.seek(JumpOffset); 474 Emit32(JumpTargetOffset); 475} 476 477void clang::CacheTokens(Preprocessor& PP, const std::string& OutFile) { 478 // Lex through the entire file. This will populate SourceManager with 479 // all of the header information. 480 Token Tok; 481 PP.EnterMainSourceFile(); 482 do { PP.Lex(Tok); } while (Tok.isNot(tok::eof)); 483 484 // Open up the PTH file. 485 std::string ErrMsg; 486 llvm::raw_fd_ostream Out(OutFile.c_str(), true, ErrMsg); 487 488 if (!ErrMsg.empty()) { 489 llvm::errs() << "PTH error: " << ErrMsg << "\n"; 490 return; 491 } 492 493 // Create the PTHWriter and generate the PTH file. 494 PTHWriter PW(Out, PP); 495 PW.GeneratePTH(); 496} 497