PTHLexer.cpp revision 0c6a77bc1f52f282a969538f139ebde429076ed3
1//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PTHLexer interface. 11// 12//===----------------------------------------------------------------------===// 13 14#include "clang/Basic/TokenKinds.h" 15#include "clang/Basic/FileManager.h" 16#include "clang/Basic/IdentifierTable.h" 17#include "clang/Lex/PTHLexer.h" 18#include "clang/Lex/Preprocessor.h" 19#include "clang/Lex/PTHManager.h" 20#include "clang/Lex/Token.h" 21#include "clang/Lex/Preprocessor.h" 22#include "llvm/Support/Compiler.h" 23#include "llvm/Support/MemoryBuffer.h" 24#include "llvm/ADT/StringMap.h" 25#include "llvm/ADT/OwningPtr.h" 26#include "llvm/ADT/DenseMap.h" 27 28using namespace clang; 29 30PTHLexer::PTHLexer(Preprocessor& pp, SourceLocation fileloc, const char* D, 31 PTHManager& PM) 32 : TokBuf(D), PreprocessorLexer(&pp, fileloc), CurTokenIdx(0), PTHMgr(PM), 33 NeedsFetching(true) { 34 // Make sure the EofToken is completely clean. 35 EofToken.startToken(); 36 } 37 38Token PTHLexer::GetToken() { 39 // Read the next token, or if we haven't advanced yet, get the last 40 // token read. 41 if (NeedsFetching) { 42 NeedsFetching = false; 43 ReadToken(LastFetched); 44 } 45 46 Token Tok = LastFetched; 47 48 // If we are in raw mode, zero out identifier pointers. This is 49 // needed for 'pragma poison'. Note that this requires that the Preprocessor 50 // can go back to the original source when it calls getSpelling(). 51 if (LexingRawMode && Tok.is(tok::identifier)) 52 Tok.setIdentifierInfo(0); 53 54 return Tok; 55} 56 57void PTHLexer::Lex(Token& Tok) { 58LexNextToken: 59 Tok = GetToken(); 60 61 if (AtLastToken()) { 62 Preprocessor *PPCache = PP; 63 64 if (LexEndOfFile(Tok)) 65 return; 66 67 assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); 68 return PPCache->Lex(Tok); 69 } 70 71 // Don't advance to the next token yet. Check if we are at the 72 // start of a new line and we're processing a directive. If so, we 73 // consume this token twice, once as an tok::eom. 74 if (Tok.isAtStartOfLine() && ParsingPreprocessorDirective) { 75 ParsingPreprocessorDirective = false; 76 Tok.setKind(tok::eom); 77 MIOpt.ReadToken(); 78 return; 79 } 80 81 // Advance to the next token. 82 AdvanceToken(); 83 84 if (Tok.is(tok::hash)) { 85 if (Tok.isAtStartOfLine() && !LexingRawMode) { 86 PP->HandleDirective(Tok); 87 88 if (PP->isCurrentLexer(this)) 89 goto LexNextToken; 90 91 return PP->Lex(Tok); 92 } 93 } 94 95 MIOpt.ReadToken(); 96 97 if (Tok.is(tok::identifier)) { 98 if (LexingRawMode) return; 99 return PP->HandleIdentifier(Tok); 100 } 101} 102 103bool PTHLexer::LexEndOfFile(Token &Tok) { 104 105 if (ParsingPreprocessorDirective) { 106 ParsingPreprocessorDirective = false; 107 Tok.setKind(tok::eom); 108 MIOpt.ReadToken(); 109 return true; // Have a token. 110 } 111 112 if (LexingRawMode) { 113 MIOpt.ReadToken(); 114 return true; // Have an eof token. 115 } 116 117 // FIXME: Issue diagnostics similar to Lexer. 118 return PP->HandleEndOfFile(Tok, false); 119} 120 121void PTHLexer::setEOF(Token& Tok) { 122 assert(!EofToken.is(tok::eof)); 123 Tok = EofToken; 124} 125 126void PTHLexer::DiscardToEndOfLine() { 127 assert(ParsingPreprocessorDirective && ParsingFilename == false && 128 "Must be in a preprocessing directive!"); 129 130 // Already at end-of-file? 131 if (AtLastToken()) 132 return; 133 134 // Find the first token that is not the start of the *current* line. 135 Token T; 136 for (Lex(T); !AtLastToken(); Lex(T)) 137 if (GetToken().isAtStartOfLine()) 138 return; 139} 140 141//===----------------------------------------------------------------------===// 142// Utility methods for reading from the mmap'ed PTH file. 143//===----------------------------------------------------------------------===// 144 145static inline uint8_t Read8(const char*& data) { 146 return (uint8_t) *(data++); 147} 148 149static inline uint32_t Read32(const char*& data) { 150 uint32_t V = (uint32_t) Read8(data); 151 V |= (((uint32_t) Read8(data)) << 8); 152 V |= (((uint32_t) Read8(data)) << 16); 153 V |= (((uint32_t) Read8(data)) << 24); 154 return V; 155} 156 157//===----------------------------------------------------------------------===// 158// Token reconstruction from the PTH file. 159//===----------------------------------------------------------------------===// 160 161void PTHLexer::ReadToken(Token& T) { 162 // Clear the token. 163 // FIXME: Setting the flags directly should obviate this step. 164 T.startToken(); 165 166 // Read the type of the token. 167 T.setKind((tok::TokenKind) Read8(TokBuf)); 168 169 // Set flags. This is gross, since we are really setting multiple flags. 170 T.setFlag((Token::TokenFlags) Read8(TokBuf)); 171 172 // Set the IdentifierInfo* (if any). 173 T.setIdentifierInfo(PTHMgr.ReadIdentifierInfo(TokBuf)); 174 175 // Set the SourceLocation. Since all tokens are constructed using a 176 // raw lexer, they will all be offseted from the same FileID. 177 T.setLocation(SourceLocation::getFileLoc(FileID, Read32(TokBuf))); 178 179 // Finally, read and set the length of the token. 180 T.setLength(Read32(TokBuf)); 181} 182 183//===----------------------------------------------------------------------===// 184// Internal Data Structures for PTH file lookup and resolving identifiers. 185//===----------------------------------------------------------------------===// 186 187typedef llvm::DenseMap<uint32_t, IdentifierInfo*> IDCache; 188 189/// PTHFileLookup - This internal data structure is used by the PTHManager 190/// to map from FileEntry objects managed by FileManager to offsets within 191/// the PTH file. 192namespace { 193class VISIBILITY_HIDDEN PTHFileLookup { 194public: 195 class Val { 196 uint32_t v; 197 198 public: 199 Val() : v(~0) {} 200 Val(uint32_t x) : v(x) {} 201 202 operator uint32_t() const { 203 assert(v != ~((uint32_t)0) && "PTHFileLookup entry initialized."); 204 return v; 205 } 206 207 Val& operator=(uint32_t x) { v = x; return *this; } 208 bool isValid() const { return v != ~((uint32_t)0); } 209 }; 210 211private: 212 llvm::StringMap<Val> FileMap; 213 214public: 215 PTHFileLookup() {}; 216 217 Val Lookup(const FileEntry* FE) { 218 const char* s = FE->getName(); 219 unsigned size = strlen(s); 220 return FileMap.GetOrCreateValue(s, s+size).getValue(); 221 } 222 223 void ReadTable(const char* D) { 224 uint32_t N = Read32(D); // Read the length of the table. 225 226 for ( ; N > 0; --N) { // The rest of the data is the table itself. 227 uint32_t len = Read32(D); 228 const char* s = D; 229 D += len; 230 FileMap.GetOrCreateValue(s, s+len).getValue() = Read32(D); 231 } 232 } 233}; 234} // end anonymous namespace 235 236//===----------------------------------------------------------------------===// 237// PTHManager methods. 238//===----------------------------------------------------------------------===// 239 240PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup, 241 const char* idDataTable, Preprocessor& pp) 242: Buf(buf), PersistentIDCache(0), FileLookup(fileLookup), 243IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {} 244 245PTHManager::~PTHManager() { 246 delete Buf; 247 delete (PTHFileLookup*) FileLookup; 248 delete (IDCache*) PersistentIDCache; 249} 250 251PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) { 252 253 // Memory map the PTH file. 254 llvm::OwningPtr<llvm::MemoryBuffer> 255 File(llvm::MemoryBuffer::getFile(file.c_str())); 256 257 if (!File) 258 return 0; 259 260 // Get the buffer ranges and check if there are at least three 32-bit 261 // words at the end of the file. 262 const char* BufBeg = File->getBufferStart(); 263 const char* BufEnd = File->getBufferEnd(); 264 265 if(!(BufEnd > BufBeg + sizeof(uint32_t)*3)) { 266 assert(false && "Invalid PTH file."); 267 return 0; // FIXME: Proper error diagnostic? 268 } 269 270 // Compute the address of the index table at the end of the PTH file. 271 // This table contains the offset of the file lookup table, the 272 // persistent ID -> identifer data table. 273 const char* EndTable = BufEnd - sizeof(uint32_t)*3; 274 275 // Construct the file lookup table. This will be used for mapping from 276 // FileEntry*'s to cached tokens. 277 const char* FileTableOffset = EndTable + sizeof(uint32_t)*2; 278 const char* FileTable = BufBeg + Read32(FileTableOffset); 279 280 if (!(FileTable > BufBeg && FileTable < BufEnd)) { 281 assert(false && "Invalid PTH file."); 282 return 0; // FIXME: Proper error diagnostic? 283 } 284 285 llvm::OwningPtr<PTHFileLookup> FL(new PTHFileLookup()); 286 FL->ReadTable(FileTable); 287 288 // Get the location of the table mapping from persistent ids to the 289 // data needed to reconstruct identifiers. 290 const char* IDTableOffset = EndTable + sizeof(uint32_t)*1; 291 const char* IData = BufBeg + Read32(IDTableOffset); 292 if (!(IData > BufBeg && IData < BufEnd)) { 293 assert(false && "Invalid PTH file."); 294 return 0; // FIXME: Proper error diagnostic? 295 } 296 297 return new PTHManager(File.take(), FL.take(), IData, PP); 298} 299 300IdentifierInfo* PTHManager::ReadIdentifierInfo(const char*& D) { 301 // Read the persistent ID from the PTH file. 302 uint32_t persistentID = Read32(D); 303 304 // A persistent ID of '0' always maps to NULL. 305 if (!persistentID) 306 return 0; 307 308 // Adjust the persistent ID by subtracting '1' so that it can be used 309 // as an index within a table in the PTH file. 310 --persistentID; 311 312 // Check if the IdentifierInfo has already been resolved. 313 if (!PersistentIDCache) 314 PersistentIDCache = new IDCache(); 315 316 // FIXME: We can make this an array, but what is the performance tradeoff? 317 IdentifierInfo*& II = (*((IDCache*) PersistentIDCache))[persistentID]; 318 if (II) return II; 319 320 // Look in the PTH file for the string data for the IdentifierInfo object. 321 const char* TableEntry = IdDataTable + sizeof(uint32_t) * persistentID; 322 const char* IDData = Buf->getBufferStart() + Read32(TableEntry); 323 assert(IDData < Buf->getBufferEnd()); 324 325 // Read the length of the string. 326 uint32_t len = Read32(IDData); 327 328 // Get the IdentifierInfo* with the specified string. 329 II = &ITable.get(IDData, IDData+len); 330 return II; 331} 332 333PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) { 334 335 if (!FE) 336 return 0; 337 338 // Lookup the FileEntry object in our file lookup data structure. It will 339 // return a variant that indicates whether or not there is an offset within 340 // the PTH file that contains cached tokens. 341 PTHFileLookup::Val Off = ((PTHFileLookup*) FileLookup)->Lookup(FE); 342 343 if (!Off.isValid()) // No tokens available. 344 return 0; 345 346 // Compute the offset of the token data within the buffer. 347 const char* data = Buf->getBufferStart() + Off; 348 assert(data < Buf->getBufferEnd()); 349 return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, *this); 350} 351