PTHLexer.cpp revision cd223444d1680290efe11da657faafc9a1ac14ba
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26 27using namespace clang; 28 29PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 30 PTHManager& PM) 31 : PreprocessorLexer(&pp, fileloc), TokBuf(D), CurPtr(D), LastHashTokPtr(0), 32 PTHMgr(PM), 33 NeedsFetching(true) { 34 // Make sure the EofToken is completely clean. 35 EofToken.startToken(); 36 } 37 38Token PTHLexer::GetToken() { 39 // Read the next token, or if we haven't advanced yet, get the last 40 // token read. 41 if (NeedsFetching) { 42 NeedsFetching = false; 43 ReadToken(LastFetched); 44 } 45 46 Token Tok = LastFetched; 47 48 // If we are in raw mode, zero out identifier pointers. This is 49 // needed for 'pragma poison'. Note that this requires that the Preprocessor 50 // can go back to the original source when it calls getSpelling(). 51 if (LexingRawMode && Tok.is(tok::identifier)) 52 Tok.setIdentifierInfo(0); 53 54 return Tok; 55} 56 57void PTHLexer::Lex(Token& Tok) { 58LexNextToken: 59 Tok = GetToken(); 60 61 if (AtLastToken()) { 62 Preprocessor *PPCache = PP; 63 64 if (LexEndOfFile(Tok)) 65 return; 66 67 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 68 return PPCache->Lex(Tok); 69 } 70 71 // Don't advance to the next token yet. Check if we are at the 72 // start of a new line and we're processing a directive. If so, we 73 // consume this token twice, once as an tok::eom. 74 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) { 75 ParsingPreprocessorDirective = false; 76 Tok.setKind(tok::eom); 77 MIOpt.ReadToken(); 78 return; 79 } 80 81 // Advance to the next token. 82 AdvanceToken(); 83 84 if (Tok.is(tok::hash)) { 85 if (Tok.isAtStartOfLine() && !LexingRawMode) { 86 LastHashTokPtr = CurPtr; 87 88 PP->HandleDirective(Tok); 89 90 if (PP->isCurrentLexer(this)) 91 goto LexNextToken; 92 93 return PP->Lex(Tok); 94 } 95 } 96 97 MIOpt.ReadToken(); 98 99 if (Tok.is(tok::identifier)) { 100 if (LexingRawMode) return; 101 return PP->HandleIdentifier(Tok); 102 } 103} 104 105bool PTHLexer::LexEndOfFile(Token &Tok) { 106 107 if (ParsingPreprocessorDirective) { 108 ParsingPreprocessorDirective = false; 109 Tok.setKind(tok::eom); 110 MIOpt.ReadToken(); 111 return true; // Have a token. 112 } 113 114 if (LexingRawMode) { 115 MIOpt.ReadToken(); 116 return true; // Have an eof token. 117 } 118 119 // FIXME: Issue diagnostics similar to Lexer. 120 return PP->HandleEndOfFile(Tok, false); 121} 122 123void PTHLexer::setEOF(Token& Tok) { 124 assert(!EofToken.is(tok::eof)); 125 Tok = EofToken; 126} 127 128void PTHLexer::DiscardToEndOfLine() { 129 assert(ParsingPreprocessorDirective && ParsingFilename == false && 130 "Must be in a preprocessing directive!"); 131 132 // Already at end-of-file? 133 if (AtLastToken()) 134 return; 135 136 // Find the first token that is not the start of the *current* line. 137 Token T; 138 for (Lex(T); !AtLastToken(); Lex(T)) 139 if (GetToken().isAtStartOfLine()) 140 return; 141} 142 143//===----------------------------------------------------------------------===// 144// Utility methods for reading from the mmap'ed PTH file. 145//===----------------------------------------------------------------------===// 146 147static inline uint8_t Read8(const char*& data) { 148 return (uint8_t) *(data++); 149} 150 151static inline uint32_t Read32(const char*& data) { 152 uint32_t V = (uint32_t) Read8(data); 153 V |= (((uint32_t) Read8(data)) << 8); 154 V |= (((uint32_t) Read8(data)) << 16); 155 V |= (((uint32_t) Read8(data)) << 24); 156 return V; 157} 158 159//===----------------------------------------------------------------------===// 160// Token reconstruction from the PTH file. 161//===----------------------------------------------------------------------===// 162 163void PTHLexer::ReadToken(Token& T) { 164 // Clear the token. 165 // FIXME: Setting the flags directly should obviate this step. 166 T.startToken(); 167 168 // Read the type of the token. 169 T.setKind((tok::TokenKind) Read8(CurPtr)); 170 171 // Set flags. This is gross, since we are really setting multiple flags. 172 T.setFlag((Token::TokenFlags) Read8(CurPtr)); 173 174 // Set the IdentifierInfo* (if any). 175 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(CurPtr)); 176 177 // Set the SourceLocation. Since all tokens are constructed using a 178 // raw lexer, they will all be offseted from the same FileID. 179 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(CurPtr))); 180 181 // Finally, read and set the length of the token. 182 T.setLength(Read32(CurPtr)); 183} 184 185//===----------------------------------------------------------------------===// 186// Internal Data Structures for PTH file lookup and resolving identifiers. 187//===----------------------------------------------------------------------===// 188 189 190/// PTHFileLookup - This internal data structure is used by the PTHManager 191/// to map from FileEntry objects managed by FileManager to offsets within 192/// the PTH file. 193namespace { 194class VISIBILITY_HIDDEN PTHFileLookup { 195public: 196 class Val { 197 uint32_t v; 198 199 public: 200 Val() : v(~0) {} 201 Val(uint32_t x) : v(x) {} 202 203 operator uint32_t() const { 204 assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 205 return v; 206 } 207 208 Val& operator=(uint32_t x) { v = x; return *this; } 209 bool isValid() const { return v != ~((uint32_t)0); } 210 }; 211 212private: 213 llvm::StringMap<Val> FileMap; 214 215public: 216 PTHFileLookup() {}; 217 218 Val Lookup(const FileEntry* FE) { 219 const char* s = FE->getName(); 220 unsigned size = strlen(s); 221 return FileMap.GetOrCreateValue(s, s+size).getValue(); 222 } 223 224 void ReadTable(const char* D) { 225 uint32_t N = Read32(D); // Read the length of the table. 226 227 for ( ; N > 0; --N) { // The rest of the data is the table itself. 228 uint32_t len = Read32(D); 229 const char* s = D; 230 D += len; 231 FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D); 232 } 233 } 234}; 235} // end anonymous namespace 236 237//===----------------------------------------------------------------------===// 238// PTHManager methods. 239//===----------------------------------------------------------------------===// 240 241PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 242 const char* idDataTable, IdentifierInfo** perIDCache, 243 Preprocessor& pp) 244: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 245 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 246 247PTHManager::~PTHManager() { 248 delete Buf; 249 delete (PTHFileLookup*) FileLookup; 250 free(PerIDCache); 251} 252 253PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 254 255 // Memory map the PTH file. 256 llvm::OwningPtr<llvm::MemoryBuffer> 257 File(llvm::MemoryBuffer::getFile(file.c_str())); 258 259 if (!File) 260 return 0; 261 262 // Get the buffer ranges and check if there are at least three 32-bit 263 // words at the end of the file. 264 const char* BufBeg = File->getBufferStart(); 265 const char* BufEnd = File->getBufferEnd(); 266 267 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 268 assert(false && "Invalid PTH file."); 269 return 0; // FIXME: Proper error diagnostic? 270 } 271 272 // Compute the address of the index table at the end of the PTH file. 273 // This table contains the offset of the file lookup table, the 274 // persistent ID -> identifer data table. 275 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 276 277 // Construct the file lookup table. This will be used for mapping from 278 // FileEntry*'s to cached tokens. 279 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 280 const char* FileTable = BufBeg + Read32(FileTableOffset); 281 282 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 283 assert(false && "Invalid PTH file."); 284 return 0; // FIXME: Proper error diagnostic? 285 } 286 287 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 288 FL->ReadTable(FileTable); 289 290 // Get the location of the table mapping from persistent ids to the 291 // data needed to reconstruct identifiers. 292 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 293 const char* IData = BufBeg + Read32(IDTableOffset); 294 if (!(IData > BufBeg && IData < BufEnd)) { 295 assert(false && "Invalid PTH file."); 296 return 0; // FIXME: Proper error diagnostic? 297 } 298 299 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 300 uint32_t NumIds = Read32(IData); 301 302 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 303 // so that we in the best case only zero out memory once when the OS returns 304 // us new pages. 305 IdentifierInfo** PerIDCache = 306 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache)); 307 308 if (!PerIDCache) { 309 assert(false && "Could not allocate Persistent ID cache."); 310 return 0; 311 } 312 313 // Create the new lexer. 314 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); 315} 316 317IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) { 318 // Read the persistent ID from the PTH file. 319 uint32_t persistentID = Read32(D); 320 321 // A persistent ID of '0' always maps to NULL. 322 if (!persistentID) 323 return 0; 324 325 // Adjust the persistent ID by subtracting '1' so that it can be used 326 // as an index within a table in the PTH file. 327 --persistentID; 328 329 // Check if the IdentifierInfo has already been resolved. 330 IdentifierInfo*& II = PerIDCache[persistentID]; 331 if (II) return II; 332 333 // Look in the PTH file for the string data for the IdentifierInfo object. 334 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 335 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 336 assert(IDData < Buf->getBufferEnd()); 337 338 // Read the length of the string. 339 uint32_t len = Read32(IDData); 340 341 // Get the IdentifierInfo* with the specified string. 342 II = &ITable.get(IDData, IDData+len); 343 return II; 344} 345 346PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 347 348 if (!FE) 349 return 0; 350 351 // Lookup the FileEntry object in our file lookup data structure. It will 352 // return a variant that indicates whether or not there is an offset within 353 // the PTH file that contains cached tokens. 354 PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE); 355 356 if (!Off.isValid()) // No tokens available. 357 return 0; 358 359 // Compute the offset of the token data within the buffer. 360 const char* data = Buf->getBufferStart() + Off; 361 assert(data < Buf->getBufferEnd()); 362 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this); 363} 364