PTHLexer.cpp revision 7415326dcbba6f333f5f7708ac1bf4982f3adb5a
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26 27using namespace clang; 28 29PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 30 PTHManager& PM) 31 : PreprocessorLexer(&pp, fileloc), TokBuf(D), PTHMgr(PM), 32 NeedsFetching(true) { 33 // Make sure the EofToken is completely clean. 34 EofToken.startToken(); 35 } 36 37Token PTHLexer::GetToken() { 38 // Read the next token, or if we haven't advanced yet, get the last 39 // token read. 40 if (NeedsFetching) { 41 NeedsFetching = false; 42 ReadToken(LastFetched); 43 } 44 45 Token Tok = LastFetched; 46 47 // If we are in raw mode, zero out identifier pointers. This is 48 // needed for 'pragma poison'. Note that this requires that the Preprocessor 49 // can go back to the original source when it calls getSpelling(). 50 if (LexingRawMode && Tok.is(tok::identifier)) 51 Tok.setIdentifierInfo(0); 52 53 return Tok; 54} 55 56void PTHLexer::Lex(Token& Tok) { 57LexNextToken: 58 Tok = GetToken(); 59 60 if (AtLastToken()) { 61 Preprocessor *PPCache = PP; 62 63 if (LexEndOfFile(Tok)) 64 return; 65 66 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 67 return PPCache->Lex(Tok); 68 } 69 70 // Don't advance to the next token yet. Check if we are at the 71 // start of a new line and we're processing a directive. If so, we 72 // consume this token twice, once as an tok::eom. 73 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) { 74 ParsingPreprocessorDirective = false; 75 Tok.setKind(tok::eom); 76 MIOpt.ReadToken(); 77 return; 78 } 79 80 // Advance to the next token. 81 AdvanceToken(); 82 83 if (Tok.is(tok::hash)) { 84 if (Tok.isAtStartOfLine() && !LexingRawMode) { 85 PP->HandleDirective(Tok); 86 87 if (PP->isCurrentLexer(this)) 88 goto LexNextToken; 89 90 return PP->Lex(Tok); 91 } 92 } 93 94 MIOpt.ReadToken(); 95 96 if (Tok.is(tok::identifier)) { 97 if (LexingRawMode) return; 98 return PP->HandleIdentifier(Tok); 99 } 100} 101 102bool PTHLexer::LexEndOfFile(Token &Tok) { 103 104 if (ParsingPreprocessorDirective) { 105 ParsingPreprocessorDirective = false; 106 Tok.setKind(tok::eom); 107 MIOpt.ReadToken(); 108 return true; // Have a token. 109 } 110 111 if (LexingRawMode) { 112 MIOpt.ReadToken(); 113 return true; // Have an eof token. 114 } 115 116 // FIXME: Issue diagnostics similar to Lexer. 117 return PP->HandleEndOfFile(Tok, false); 118} 119 120void PTHLexer::setEOF(Token& Tok) { 121 assert(!EofToken.is(tok::eof)); 122 Tok = EofToken; 123} 124 125void PTHLexer::DiscardToEndOfLine() { 126 assert(ParsingPreprocessorDirective && ParsingFilename == false && 127 "Must be in a preprocessing directive!"); 128 129 // Already at end-of-file? 130 if (AtLastToken()) 131 return; 132 133 // Find the first token that is not the start of the *current* line. 134 Token T; 135 for (Lex(T); !AtLastToken(); Lex(T)) 136 if (GetToken().isAtStartOfLine()) 137 return; 138} 139 140//===----------------------------------------------------------------------===// 141// Utility methods for reading from the mmap'ed PTH file. 142//===----------------------------------------------------------------------===// 143 144static inline uint8_t Read8(const char*& data) { 145 return (uint8_t) *(data++); 146} 147 148static inline uint32_t Read32(const char*& data) { 149 uint32_t V = (uint32_t) Read8(data); 150 V |= (((uint32_t) Read8(data)) << 8); 151 V |= (((uint32_t) Read8(data)) << 16); 152 V |= (((uint32_t) Read8(data)) << 24); 153 return V; 154} 155 156//===----------------------------------------------------------------------===// 157// Token reconstruction from the PTH file. 158//===----------------------------------------------------------------------===// 159 160void PTHLexer::ReadToken(Token& T) { 161 // Clear the token. 162 // FIXME: Setting the flags directly should obviate this step. 163 T.startToken(); 164 165 // Read the type of the token. 166 T.setKind((tok::TokenKind) Read8(TokBuf)); 167 168 // Set flags. This is gross, since we are really setting multiple flags. 169 T.setFlag((Token::TokenFlags) Read8(TokBuf)); 170 171 // Set the IdentifierInfo* (if any). 172 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(TokBuf)); 173 174 // Set the SourceLocation. Since all tokens are constructed using a 175 // raw lexer, they will all be offseted from the same FileID. 176 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(TokBuf))); 177 178 // Finally, read and set the length of the token. 179 T.setLength(Read32(TokBuf)); 180} 181 182//===----------------------------------------------------------------------===// 183// Internal Data Structures for PTH file lookup and resolving identifiers. 184//===----------------------------------------------------------------------===// 185 186 187/// PTHFileLookup - This internal data structure is used by the PTHManager 188/// to map from FileEntry objects managed by FileManager to offsets within 189/// the PTH file. 190namespace { 191class VISIBILITY_HIDDEN PTHFileLookup { 192public: 193 class Val { 194 uint32_t v; 195 196 public: 197 Val() : v(~0) {} 198 Val(uint32_t x) : v(x) {} 199 200 operator uint32_t() const { 201 assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 202 return v; 203 } 204 205 Val& operator=(uint32_t x) { v = x; return *this; } 206 bool isValid() const { return v != ~((uint32_t)0); } 207 }; 208 209private: 210 llvm::StringMap<Val> FileMap; 211 212public: 213 PTHFileLookup() {}; 214 215 Val Lookup(const FileEntry* FE) { 216 const char* s = FE->getName(); 217 unsigned size = strlen(s); 218 return FileMap.GetOrCreateValue(s, s+size).getValue(); 219 } 220 221 void ReadTable(const char* D) { 222 uint32_t N = Read32(D); // Read the length of the table. 223 224 for ( ; N > 0; --N) { // The rest of the data is the table itself. 225 uint32_t len = Read32(D); 226 const char* s = D; 227 D += len; 228 FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D); 229 } 230 } 231}; 232} // end anonymous namespace 233 234//===----------------------------------------------------------------------===// 235// PTHManager methods. 236//===----------------------------------------------------------------------===// 237 238PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 239 const char* idDataTable, IdentifierInfo** perIDCache, 240 Preprocessor& pp) 241: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup), 242 IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 243 244PTHManager::~PTHManager() { 245 delete Buf; 246 delete (PTHFileLookup*) FileLookup; 247 free(PerIDCache); 248} 249 250PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 251 252 // Memory map the PTH file. 253 llvm::OwningPtr<llvm::MemoryBuffer> 254 File(llvm::MemoryBuffer::getFile(file.c_str())); 255 256 if (!File) 257 return 0; 258 259 // Get the buffer ranges and check if there are at least three 32-bit 260 // words at the end of the file. 261 const char* BufBeg = File->getBufferStart(); 262 const char* BufEnd = File->getBufferEnd(); 263 264 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 265 assert(false && "Invalid PTH file."); 266 return 0; // FIXME: Proper error diagnostic? 267 } 268 269 // Compute the address of the index table at the end of the PTH file. 270 // This table contains the offset of the file lookup table, the 271 // persistent ID -> identifer data table. 272 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 273 274 // Construct the file lookup table. This will be used for mapping from 275 // FileEntry*'s to cached tokens. 276 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 277 const char* FileTable = BufBeg + Read32(FileTableOffset); 278 279 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 280 assert(false && "Invalid PTH file."); 281 return 0; // FIXME: Proper error diagnostic? 282 } 283 284 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 285 FL->ReadTable(FileTable); 286 287 // Get the location of the table mapping from persistent ids to the 288 // data needed to reconstruct identifiers. 289 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 290 const char* IData = BufBeg + Read32(IDTableOffset); 291 if (!(IData > BufBeg && IData < BufEnd)) { 292 assert(false && "Invalid PTH file."); 293 return 0; // FIXME: Proper error diagnostic? 294 } 295 296 // Get the number of IdentifierInfos and pre-allocate the identifier cache. 297 uint32_t NumIds = Read32(IData); 298 299 // Pre-allocate the peristent ID -> IdentifierInfo* cache. We use calloc() 300 // so that we in the best case only zero out memory once when the OS returns 301 // us new pages. 302 IdentifierInfo** PerIDCache = 303 (IdentifierInfo**) calloc(NumIds, sizeof(*PerIDCache)); 304 305 if (!PerIDCache) { 306 assert(false && "Could not allocate Persistent ID cache."); 307 return 0; 308 } 309 310 // Create the new lexer. 311 return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP); 312} 313 314IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) { 315 // Read the persistent ID from the PTH file. 316 uint32_t persistentID = Read32(D); 317 318 // A persistent ID of '0' always maps to NULL. 319 if (!persistentID) 320 return 0; 321 322 // Adjust the persistent ID by subtracting '1' so that it can be used 323 // as an index within a table in the PTH file. 324 --persistentID; 325 326 // Check if the IdentifierInfo has already been resolved. 327 IdentifierInfo*& II = PerIDCache[persistentID]; 328 if (II) return II; 329 330 // Look in the PTH file for the string data for the IdentifierInfo object. 331 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 332 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 333 assert(IDData < Buf->getBufferEnd()); 334 335 // Read the length of the string. 336 uint32_t len = Read32(IDData); 337 338 // Get the IdentifierInfo* with the specified string. 339 II = &ITable.get(IDData, IDData+len); 340 return II; 341} 342 343PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 344 345 if (!FE) 346 return 0; 347 348 // Lookup the FileEntry object in our file lookup data structure. It will 349 // return a variant that indicates whether or not there is an offset within 350 // the PTH file that contains cached tokens. 351 PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE); 352 353 if (!Off.isValid()) // No tokens available. 354 return 0; 355 356 // Compute the offset of the token data within the buffer. 357 const char* data = Buf->getBufferStart() + Off; 358 assert(data < Buf->getBufferEnd()); 359 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this); 360} 361