PTHLexer.cpp revision c141b51d78b40c7b0b04f27adfa70ec5bbddc49c
13ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
23ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//
33ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//                     The LLVM Compiler Infrastructure
43ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//
53ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// This file is distributed under the University of Illinois Open Source
63ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// License. See LICENSE.TXT for details.
73ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//
83ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
93ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//
103ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// This file implements the PTHLexer interface.
113ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//
123ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
133ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
143ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Basic/TokenKinds.h"
153ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Basic/FileManager.h"
16de984cdc3631106b1cbbb8d3972b76a0fc27e8e8cristy#include "clang/Basic/FileSystemStatCache.h"
173ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Basic/IdentifierTable.h"
183ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Basic/OnDiskHashTable.h"
193ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Lex/LexDiagnostic.h"
207ce65e7125a4e1df1a274ce373c537a9df9c16cdCristy#include "clang/Lex/PTHLexer.h"
213ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Lex/Preprocessor.h"
223ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Lex/PTHManager.h"
233ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Lex/Token.h"
243ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "clang/Lex/Preprocessor.h"
253ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "llvm/ADT/OwningPtr.h"
263ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "llvm/ADT/StringExtras.h"
273ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "llvm/ADT/StringMap.h"
283ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "llvm/Support/MemoryBuffer.h"
293ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#include "llvm/Support/system_error.h"
303ed852eea50f9d4cd633efb8c2b054b8e33c253cristyusing namespace clang;
313ed852eea50f9d4cd633efb8c2b054b8e33c253cristyusing namespace clang::io;
323ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
333ed852eea50f9d4cd633efb8c2b054b8e33c253cristy#define DISK_TOKEN_SIZE (1+1+2+4+4)
343ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
353ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
363ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// PTHLexer methods.
373ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
383ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
393ed852eea50f9d4cd633efb8c2b054b8e33c253cristyPTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
403ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                   const unsigned char *ppcond, PTHManager &PM)
413ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
424c08aed51c5899665ade97263692328eea4af106cristy    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
434c08aed51c5899665ade97263692328eea4af106cristy
444c08aed51c5899665ade97263692328eea4af106cristy  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
454c08aed51c5899665ade97263692328eea4af106cristy}
464c08aed51c5899665ade97263692328eea4af106cristy
474c08aed51c5899665ade97263692328eea4af106cristyvoid PTHLexer::Lex(Token& Tok) {
484c08aed51c5899665ade97263692328eea4af106cristyLexNextToken:
494c08aed51c5899665ade97263692328eea4af106cristy
504c08aed51c5899665ade97263692328eea4af106cristy  //===--------------------------------------==//
514c08aed51c5899665ade97263692328eea4af106cristy  // Read the raw token data.
524c08aed51c5899665ade97263692328eea4af106cristy  //===--------------------------------------==//
534c08aed51c5899665ade97263692328eea4af106cristy
544c08aed51c5899665ade97263692328eea4af106cristy  // Shadow CurPtr into an automatic variable.
554c08aed51c5899665ade97263692328eea4af106cristy  const unsigned char *CurPtrShadow = CurPtr;
564c08aed51c5899665ade97263692328eea4af106cristy
574c08aed51c5899665ade97263692328eea4af106cristy  // Read in the data for the token.
584c08aed51c5899665ade97263692328eea4af106cristy  unsigned Word0 = ReadLE32(CurPtrShadow);
594c08aed51c5899665ade97263692328eea4af106cristy  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
604c08aed51c5899665ade97263692328eea4af106cristy  uint32_t FileOffset = ReadLE32(CurPtrShadow);
613ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
623ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
633ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
643ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t Len = Word0 >> 16;
653ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
663ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  CurPtr = CurPtrShadow;
673ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
683ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  //===--------------------------------------==//
693ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Construct the token itself.
703ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  //===--------------------------------------==//
713ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
723ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Tok.startToken();
733ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Tok.setKind(TKind);
743ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Tok.setFlag(TFlags);
753ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(!LexingRawMode);
763ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
773ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Tok.setLength(Len);
783ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
793ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Handle identifiers.
803ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if (Tok.isLiteral()) {
813ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
823ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
833ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  else if (IdentifierID) {
843ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    MIOpt.ReadToken();
853ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
863ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
873ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Tok.setIdentifierInfo(II);
883ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
893ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Change the kind of this identifier to the appropriate token kind, e.g.
903ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // turning "for" into a keyword.
913ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Tok.setKind(II->getTokenID());
923ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
933ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    if (II->isHandleIdentifierCase())
943ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      PP->HandleIdentifier(Tok);
953ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return;
963ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
973ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
983ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  //===--------------------------------------==//
993ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Process the token.
1003ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  //===--------------------------------------==//
1013ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if (TKind == tok::eof) {
1023ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Save the end-of-file token.
1033ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    EofToken = Tok;
1043ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1053ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Save 'PP' to 'PPCache' as LexEndOfFile can delete 'this'.
1063ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Preprocessor *PPCache = PP;
1073ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1083ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    assert(!ParsingPreprocessorDirective);
1093ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    assert(!LexingRawMode);
1103ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1113ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    if (LexEndOfFile(Tok))
1123ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      return;
1133ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1143ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return PPCache->Lex(Tok);
1153ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
116952a6063b767d87c452b00435088197d38916dd8cristy
117952a6063b767d87c452b00435088197d38916dd8cristy  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
118952a6063b767d87c452b00435088197d38916dd8cristy    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
119952a6063b767d87c452b00435088197d38916dd8cristy    assert(!LexingRawMode);
120952a6063b767d87c452b00435088197d38916dd8cristy    PP->HandleDirective(Tok);
1213ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
122952a6063b767d87c452b00435088197d38916dd8cristy    if (PP->isCurrentLexer(this))
1233ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      goto LexNextToken;
1243ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1253ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return PP->Lex(Tok);
126952a6063b767d87c452b00435088197d38916dd8cristy  }
127952a6063b767d87c452b00435088197d38916dd8cristy
128952a6063b767d87c452b00435088197d38916dd8cristy  if (TKind == tok::eod) {
129952a6063b767d87c452b00435088197d38916dd8cristy    assert(ParsingPreprocessorDirective);
130952a6063b767d87c452b00435088197d38916dd8cristy    ParsingPreprocessorDirective = false;
131952a6063b767d87c452b00435088197d38916dd8cristy    return;
132952a6063b767d87c452b00435088197d38916dd8cristy  }
133952a6063b767d87c452b00435088197d38916dd8cristy
134952a6063b767d87c452b00435088197d38916dd8cristy  MIOpt.ReadToken();
1353ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
1363ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1373ed852eea50f9d4cd633efb8c2b054b8e33c253cristybool PTHLexer::LexEndOfFile(Token &Result) {
1383ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // If we hit the end of the file while parsing a preprocessor directive,
1393ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // end the preprocessor directive first.  The next token returned will
1403ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // then be the end of file.
1413ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if (ParsingPreprocessorDirective) {
1423ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    ParsingPreprocessorDirective = false; // Done parsing the "line".
1433ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return true;  // Have a token.
1443ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
1453ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1463ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(!LexingRawMode);
1473ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1483ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // If we are in a #if directive, emit an error.
1493ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  while (!ConditionalStack.empty()) {
1503ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    if (PP->getCodeCompletionFileLoc() != FileStartLoc)
151952a6063b767d87c452b00435088197d38916dd8cristy      PP->Diag(ConditionalStack.back().IfLoc,
152952a6063b767d87c452b00435088197d38916dd8cristy               diag::err_pp_unterminated_conditional);
1533ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    ConditionalStack.pop_back();
154952a6063b767d87c452b00435088197d38916dd8cristy  }
1553ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1563ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Finally, let the preprocessor handle this.
1573ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  return PP->HandleEndOfFile(Result);
1583ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
1593ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1603ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// FIXME: We can just grab the last token instead of storing a copy
1613ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// into EofToken.
1623ed852eea50f9d4cd633efb8c2b054b8e33c253cristyvoid PTHLexer::getEOF(Token& Tok) {
1633ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(EofToken.is(tok::eof));
1643ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Tok = EofToken;
1653ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
1663ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1673ed852eea50f9d4cd633efb8c2b054b8e33c253cristyvoid PTHLexer::DiscardToEndOfLine() {
1683ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
1693ed852eea50f9d4cd633efb8c2b054b8e33c253cristy         "Must be in a preprocessing directive!");
170952a6063b767d87c452b00435088197d38916dd8cristy
171952a6063b767d87c452b00435088197d38916dd8cristy  // We assume that if the preprocessor wishes to discard to the end of
1723ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // the line that it also means to end the current preprocessor directive.
173bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy  ParsingPreprocessorDirective = false;
1743ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1753ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Skip tokens by only peeking at their token kind and the flags.
1763ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // We don't need to actually reconstruct full tokens from the token buffer.
1774c08aed51c5899665ade97263692328eea4af106cristy  // This saves some copies and it also reduces IdentifierInfo* lookup.
1783ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const unsigned char* p = CurPtr;
1793ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  while (1) {
1803ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Read the token kind.  Are we at the end of the file?
181c6da28e61bb609d2b2cfdcc7752106c973415edbcristy    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
182c6da28e61bb609d2b2cfdcc7752106c973415edbcristy    if (x == tok::eof) break;
1833ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1843ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Read the token flags.  Are we at the start of the next line?
1853ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
1863ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    if (y & Token::StartOfLine) break;
1873ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1883ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Skip to the next token.
1893ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    p += DISK_TOKEN_SIZE;
1903ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
1913ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
1923ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  CurPtr = p;
1933ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
194e1c94d9d25db6b0dd7a5028ffee31d1057855d73cristy
1953ed852eea50f9d4cd633efb8c2b054b8e33c253cristy/// SkipBlock - Used by Preprocessor to skip the current conditional block.
1963ed852eea50f9d4cd633efb8c2b054b8e33c253cristybool PTHLexer::SkipBlock() {
1973ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(CurPPCondPtr && "No cached PP conditional information.");
1983ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(LastHashTokPtr && "No known '#' token.");
199e1c94d9d25db6b0dd7a5028ffee31d1057855d73cristy
2009950d57e1124b73f684fb5946e206994cefda628cristy  const unsigned char* HashEntryI = 0;
2013ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t Offset;
2023ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t TableIdx;
2033ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2043ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  do {
2053ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Read the token offset from the side-table.
2063ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Offset = ReadLE32(CurPPCondPtr);
207952a6063b767d87c452b00435088197d38916dd8cristy
2083ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Read the target table index from the side-table.
2093ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    TableIdx = ReadLE32(CurPPCondPtr);
2103ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2113ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Compute the actual memory address of the '#' token data for this entry.
2123ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    HashEntryI = TokBuf + Offset;
2133ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2143ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
2153ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    //  contain nested blocks.  In the side-table we can jump over these
2163ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    //  nested blocks instead of doing a linear search if the next "sibling"
2173ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    //  entry is not at a location greater than LastHashTokPtr.
2183ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    if (HashEntryI < LastHashTokPtr && TableIdx) {
2193ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      // In the side-table we are still at an entry for a '#' token that
2203ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      // is earlier than the last one we saw.  Check if the location we would
2213ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      // stride gets us closer.
2223ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      const unsigned char* NextPPCondPtr =
2233ed852eea50f9d4cd633efb8c2b054b8e33c253cristy        PPCond + TableIdx*(sizeof(uint32_t)*2);
2243ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      assert(NextPPCondPtr >= CurPPCondPtr);
2253ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      // Read where we should jump to.
2263ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
2273ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
2283ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2293ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      if (HashEntryJ <= LastHashTokPtr) {
2301fe0b879964fa7797e3d68574d297922a47c4034Cristy        // Jump directly to the next entry in the side table.
2313ed852eea50f9d4cd633efb8c2b054b8e33c253cristy        HashEntryI = HashEntryJ;
2323ed852eea50f9d4cd633efb8c2b054b8e33c253cristy        Offset = TmpOffset;
233f50886b34832135cf6e2e1458f2a324bb1704191cristy        TableIdx = ReadLE32(NextPPCondPtr);
2343ed852eea50f9d4cd633efb8c2b054b8e33c253cristy        CurPPCondPtr = NextPPCondPtr;
2353ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      }
2363ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    }
2373ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
2383ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  while (HashEntryI < LastHashTokPtr);
2393ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
2403ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(TableIdx && "No jumping from #endifs.");
2413ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2423ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Update our side-table iterator.
2433ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
2443ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(NextPPCondPtr >= CurPPCondPtr);
2453ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  CurPPCondPtr = NextPPCondPtr;
2463ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2473ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Read where we should jump to.
2483ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
2493ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
2503ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2513ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // By construction NextIdx will be zero if this is a #endif.  This is useful
2523ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // to know to obviate lexing another token.
2531fe0b879964fa7797e3d68574d297922a47c4034Cristy  bool isEndif = NextIdx == 0;
2543ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2553ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // This case can occur when we see something like this:
2563ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  //
257b0a657e13c4aefba39c51292005427b47277869dcristy  //  #if ...
258b0a657e13c4aefba39c51292005427b47277869dcristy  //   /* a comment or nothing */
259952a6063b767d87c452b00435088197d38916dd8cristy  //  #elif
260952a6063b767d87c452b00435088197d38916dd8cristy  //
261952a6063b767d87c452b00435088197d38916dd8cristy  // If we are skipping the first #if block it will be the case that CurPtr
262952a6063b767d87c452b00435088197d38916dd8cristy  // already points 'elif'.  Just return.
2633ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2643ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if (CurPtr > HashEntryI) {
2653ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
2663ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // Did we reach a #endif?  If so, go ahead and consume that token as well.
2673ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    if (isEndif)
268acabb847a592ca5e430c1c0949d03acfc0b78bb9cristy      CurPtr += DISK_TOKEN_SIZE*2;
269acabb847a592ca5e430c1c0949d03acfc0b78bb9cristy    else
270acabb847a592ca5e430c1c0949d03acfc0b78bb9cristy      LastHashTokPtr = HashEntryI;
271952a6063b767d87c452b00435088197d38916dd8cristy
272952a6063b767d87c452b00435088197d38916dd8cristy    return isEndif;
273952a6063b767d87c452b00435088197d38916dd8cristy  }
2743ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2753ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
276d15e65928aec551b7388c2863de3e3e628e2e0ddcristy  CurPtr = HashEntryI;
2773ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2783ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Update the location of the last observed '#'.  This is useful if we
2793ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // are skipping multiple blocks.
280bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy  LastHashTokPtr = CurPtr;
2811fe0b879964fa7797e3d68574d297922a47c4034Cristy
2823ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Skip the '#' token.
2833ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  assert(((tok::TokenKind)*CurPtr) == tok::hash);
2843ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  CurPtr += DISK_TOKEN_SIZE;
2853ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
286bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy  // Did we reach a #endif?  If so, go ahead and consume that token as well.
2873ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
2883ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2893ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  return isEndif;
2903ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
2913ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
2923ed852eea50f9d4cd633efb8c2b054b8e33c253cristySourceLocation PTHLexer::getSourceLocation() {
2931fe0b879964fa7797e3d68574d297922a47c4034Cristy  // getSourceLocation is not on the hot path.  It is used to get the location
2943ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // of the next token when transitioning back to this lexer when done
2953ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // handling a #included file.  Just read the necessary data from the token
2963ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // data buffer to construct the SourceLocation object.
2973ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // NOTE: This is a virtual function; hence it is defined out-of-line.
2983ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
2993ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t Offset = ReadLE32(OffsetPtr);
3003ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  return FileStartLoc.getLocWithOffset(Offset);
3013ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
3023ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3033ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
3043ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// PTH file lookup: map from strings to file data.
3053ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
3063ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
307bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy/// PTHFileLookup - This internal data structure is used by the PTHManager
308bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy///  to map from FileEntry objects managed by FileManager to offsets within
309acd2ed254c18c254a0ab5aafa06d1645e5d079d8cristy///  the PTH file.
3103ed852eea50f9d4cd633efb8c2b054b8e33c253cristynamespace {
3113ed852eea50f9d4cd633efb8c2b054b8e33c253cristyclass PTHFileData {
3123ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const uint32_t TokenOff;
3133ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const uint32_t PPCondOff;
3143ed852eea50f9d4cd633efb8c2b054b8e33c253cristypublic:
3153ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
3163ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
3174c08aed51c5899665ade97263692328eea4af106cristy
3183ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t getTokenOffset() const { return TokenOff; }
3193ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  uint32_t getPPCondOffset() const { return PPCondOff; }
3203ed852eea50f9d4cd633efb8c2b054b8e33c253cristy};
3213ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3224c08aed51c5899665ade97263692328eea4af106cristy
3233ed852eea50f9d4cd633efb8c2b054b8e33c253cristyclass PTHFileLookupCommonTrait {
3243ed852eea50f9d4cd633efb8c2b054b8e33c253cristypublic:
3253ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  typedef std::pair<unsigned char, const char*> internal_key_type;
3263ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3274c08aed51c5899665ade97263692328eea4af106cristy  static unsigned ComputeHash(internal_key_type x) {
3283ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return llvm::HashString(x.second);
3293ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
3303ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3313ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static std::pair<unsigned, unsigned>
3323ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  ReadKeyDataLength(const unsigned char*& d) {
3334c08aed51c5899665ade97263692328eea4af106cristy    unsigned keyLen = (unsigned) ReadUnalignedLE16(d);
3343ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    unsigned dataLen = (unsigned) *(d++);
3353ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return std::make_pair(keyLen, dataLen);
3363ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
3373ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3383ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static internal_key_type ReadKey(const unsigned char* d, unsigned) {
3393ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    unsigned char k = *(d++); // Read the entry kind.
3403ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return std::make_pair(k, (const char*) d);
3413ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
3423ed852eea50f9d4cd633efb8c2b054b8e33c253cristy};
3433ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3443ed852eea50f9d4cd633efb8c2b054b8e33c253cristyclass PTHFileLookupTrait : public PTHFileLookupCommonTrait {
3453ed852eea50f9d4cd633efb8c2b054b8e33c253cristypublic:
3463ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  typedef const FileEntry* external_key_type;
3473ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  typedef PTHFileData      data_type;
3483ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
349bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy  static internal_key_type GetInternalKey(const FileEntry* FE) {
350bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy    return std::make_pair((unsigned char) 0x1, FE->getName());
351acd2ed254c18c254a0ab5aafa06d1645e5d079d8cristy  }
3523ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3533ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static bool EqualKey(internal_key_type a, internal_key_type b) {
3543ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return a.first == b.first && strcmp(a.second, b.second) == 0;
3553ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
3563ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3574c08aed51c5899665ade97263692328eea4af106cristy  static PTHFileData ReadData(const internal_key_type& k,
3583ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                              const unsigned char* d, unsigned) {
3593ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    assert(k.first == 0x1 && "Only file lookups can match!");
3603ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    uint32_t x = ::ReadUnalignedLE32(d);
3613ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    uint32_t y = ::ReadUnalignedLE32(d);
3624c08aed51c5899665ade97263692328eea4af106cristy    return PTHFileData(x, y);
3633ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
3643ed852eea50f9d4cd633efb8c2b054b8e33c253cristy};
3653ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3663ed852eea50f9d4cd633efb8c2b054b8e33c253cristyclass PTHStringLookupTrait {
3674c08aed51c5899665ade97263692328eea4af106cristypublic:
3683ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  typedef uint32_t
3693ed852eea50f9d4cd633efb8c2b054b8e33c253cristy          data_type;
3703ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3713ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  typedef const std::pair<const char*, unsigned>
3723ed852eea50f9d4cd633efb8c2b054b8e33c253cristy          external_key_type;
3734c08aed51c5899665ade97263692328eea4af106cristy
3743ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  typedef external_key_type internal_key_type;
3753ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3763ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static bool EqualKey(const internal_key_type& a,
3773ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                       const internal_key_type& b) {
3783ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
3793ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                                  : false;
3803ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
3813ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3823ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static unsigned ComputeHash(const internal_key_type& a) {
3833ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return llvm::HashString(StringRef(a.first, a.second));
3843ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
385cee9711bbc334b5677d5ec4ea1cc70340d35ee35cristy
386c6da28e61bb609d2b2cfdcc7752106c973415edbcristy  // This hopefully will just get inlined and removed by the optimizer.
3873ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static const internal_key_type&
3883ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  GetInternalKey(const external_key_type& x) { return x; }
3893ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3903ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static std::pair<unsigned, unsigned>
3913ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  ReadKeyDataLength(const unsigned char*& d) {
3923ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t));
393952a6063b767d87c452b00435088197d38916dd8cristy  }
3943ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
3953ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static std::pair<const char*, unsigned>
3963ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  ReadKey(const unsigned char* d, unsigned n) {
3973ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      assert(n >= 2 && d[n-1] == '\0');
3983ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      return std::make_pair((const char*) d, n-1);
3993ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    }
4003ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4013ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
4023ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                           unsigned) {
4033ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return ::ReadUnalignedLE32(d);
4043ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
4053ed852eea50f9d4cd633efb8c2b054b8e33c253cristy};
4063ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4073ed852eea50f9d4cd633efb8c2b054b8e33c253cristy} // end anonymous namespace
4083ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4093ed852eea50f9d4cd633efb8c2b054b8e33c253cristytypedef OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
4103ed852eea50f9d4cd633efb8c2b054b8e33c253cristytypedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
4113ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4123ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
4133ed852eea50f9d4cd633efb8c2b054b8e33c253cristy// PTHManager methods.
4143ed852eea50f9d4cd633efb8c2b054b8e33c253cristy//===----------------------------------------------------------------------===//
4153ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4163ed852eea50f9d4cd633efb8c2b054b8e33c253cristyPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
4173ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                       const unsigned char* idDataTable,
418bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy                       IdentifierInfo** perIDCache,
4193ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                       void* stringIdLookup, unsigned numIds,
4203ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                       const unsigned char* spellingBase,
421bb50337b2a8a16ca7e903cc04ab195ff0fd47ae6cristy                       const char* originalSourceFile)
4223ed852eea50f9d4cd633efb8c2b054b8e33c253cristy: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
4233ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
4243ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  NumIds(numIds), PP(0), SpellingBase(spellingBase),
4253ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  OriginalSourceFile(originalSourceFile) {}
42606b627a07ff44e1ff93ef1288c9f428066ded10ddirk
4273ed852eea50f9d4cd633efb8c2b054b8e33c253cristyPTHManager::~PTHManager() {
42808e9a113db499034abb5ad8d59b42f8eca3c641cdirk  delete Buf;
42908e9a113db499034abb5ad8d59b42f8eca3c641cdirk  delete (PTHFileLookup*) FileLookup;
4303ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  delete (PTHStringIdLookup*) StringIdLookup;
4313ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  free(PerIDCache);
4323ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
4333ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4343ed852eea50f9d4cd633efb8c2b054b8e33c253cristystatic void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
4353ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, Msg));
4363ed852eea50f9d4cd633efb8c2b054b8e33c253cristy}
4373ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4383ed852eea50f9d4cd633efb8c2b054b8e33c253cristyPTHManager *PTHManager::Create(const std::string &file,
4393ed852eea50f9d4cd633efb8c2b054b8e33c253cristy                               DiagnosticsEngine &Diags) {
4403ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Memory map the PTH file.
4413ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  OwningPtr<llvm::MemoryBuffer> File;
4423ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4433ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if (llvm::MemoryBuffer::getFile(file, File)) {
4443ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    // FIXME: Add ec.message() to this diag.
4453ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    Diags.Report(diag::err_invalid_pth_file) << file;
4463ed852eea50f9d4cd633efb8c2b054b8e33c253cristy    return 0;
4473ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  }
4483ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4493ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Get the buffer ranges and check if there are at least three 32-bit
4503ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // words at the end of the file.
4513ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const unsigned char *BufBeg = (unsigned char*)File->getBufferStart();
4523ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  const unsigned char *BufEnd = (unsigned char*)File->getBufferEnd();
4533ed852eea50f9d4cd633efb8c2b054b8e33c253cristy
4543ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  // Check the prologue of the file.
4553ed852eea50f9d4cd633efb8c2b054b8e33c253cristy  if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
4563ed852eea50f9d4cd633efb8c2b054b8e33c253cristy      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
457    Diags.Report(diag::err_invalid_pth_file) << file;
458    return 0;
459  }
460
461  // Read the PTH version.
462  const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
463  unsigned Version = ReadLE32(p);
464
465  if (Version < PTHManager::Version) {
466    InvalidPTH(Diags,
467        Version < PTHManager::Version
468        ? "PTH file uses an older PTH format that is no longer supported"
469        : "PTH file uses a newer PTH format that cannot be read");
470    return 0;
471  }
472
473  // Compute the address of the index table at the end of the PTH file.
474  const unsigned char *PrologueOffset = p;
475
476  if (PrologueOffset >= BufEnd) {
477    Diags.Report(diag::err_invalid_pth_file) << file;
478    return 0;
479  }
480
481  // Construct the file lookup table.  This will be used for mapping from
482  // FileEntry*'s to cached tokens.
483  const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
484  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
485
486  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
487    Diags.Report(diag::err_invalid_pth_file) << file;
488    return 0; // FIXME: Proper error diagnostic?
489  }
490
491  OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
492
493  // Warn if the PTH file is empty.  We still want to create a PTHManager
494  // as the PTH could be used with -include-pth.
495  if (FL->isEmpty())
496    InvalidPTH(Diags, "PTH file contains no cached source data");
497
498  // Get the location of the table mapping from persistent ids to the
499  // data needed to reconstruct identifiers.
500  const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
501  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
502
503  if (!(IData >= BufBeg && IData < BufEnd)) {
504    Diags.Report(diag::err_invalid_pth_file) << file;
505    return 0;
506  }
507
508  // Get the location of the hashtable mapping between strings and
509  // persistent IDs.
510  const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
511  const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset);
512  if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
513    Diags.Report(diag::err_invalid_pth_file) << file;
514    return 0;
515  }
516
517  OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable,
518                                                                  BufBeg));
519
520  // Get the location of the spelling cache.
521  const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
522  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
523  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
524    Diags.Report(diag::err_invalid_pth_file) << file;
525    return 0;
526  }
527
528  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
529  uint32_t NumIds = ReadLE32(IData);
530
531  // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
532  // so that we in the best case only zero out memory once when the OS returns
533  // us new pages.
534  IdentifierInfo** PerIDCache = 0;
535
536  if (NumIds) {
537    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
538    if (!PerIDCache) {
539      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
540      return 0;
541    }
542  }
543
544  // Compute the address of the original source file.
545  const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
546  unsigned len = ReadUnalignedLE16(originalSourceBase);
547  if (!len) originalSourceBase = 0;
548
549  // Create the new PTHManager.
550  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
551                        SL.take(), NumIds, spellingBase,
552                        (const char*) originalSourceBase);
553}
554
555IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
556  // Look in the PTH file for the string data for the IdentifierInfo object.
557  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
558  const unsigned char* IDData =
559    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
560  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
561
562  // Allocate the object.
563  std::pair<IdentifierInfo,const unsigned char*> *Mem =
564    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
565
566  Mem->second = IDData;
567  assert(IDData[0] != '\0');
568  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
569
570  // Store the new IdentifierInfo in the cache.
571  PerIDCache[PersistentID] = II;
572  assert(II->getNameStart() && II->getNameStart()[0] != '\0');
573  return II;
574}
575
576IdentifierInfo* PTHManager::get(StringRef Name) {
577  PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
578  // Double check our assumption that the last character isn't '\0'.
579  assert(Name.empty() || Name.back() != '\0');
580  PTHStringIdLookup::iterator I = SL.find(std::make_pair(Name.data(),
581                                                         Name.size()));
582  if (I == SL.end()) // No identifier found?
583    return 0;
584
585  // Match found.  Return the identifier!
586  assert(*I > 0);
587  return GetIdentifierInfo(*I-1);
588}
589
590PTHLexer *PTHManager::CreateLexer(FileID FID) {
591  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
592  if (!FE)
593    return 0;
594
595  // Lookup the FileEntry object in our file lookup data structure.  It will
596  // return a variant that indicates whether or not there is an offset within
597  // the PTH file that contains cached tokens.
598  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
599  PTHFileLookup::iterator I = PFL.find(FE);
600
601  if (I == PFL.end()) // No tokens available?
602    return 0;
603
604  const PTHFileData& FileData = *I;
605
606  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
607  // Compute the offset of the token data within the buffer.
608  const unsigned char* data = BufStart + FileData.getTokenOffset();
609
610  // Get the location of pp-conditional table.
611  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
612  uint32_t Len = ReadLE32(ppcond);
613  if (Len == 0) ppcond = 0;
614
615  assert(PP && "No preprocessor set yet!");
616  return new PTHLexer(*PP, FID, data, ppcond, *this);
617}
618
619//===----------------------------------------------------------------------===//
620// 'stat' caching.
621//===----------------------------------------------------------------------===//
622
623namespace {
624class PTHStatData {
625public:
626  const bool hasStat;
627  const ino_t ino;
628  const dev_t dev;
629  const mode_t mode;
630  const time_t mtime;
631  const off_t size;
632
633  PTHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s)
634  : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {}
635
636  PTHStatData()
637    : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {}
638};
639
640class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
641public:
642  typedef const char* external_key_type;  // const char*
643  typedef PTHStatData data_type;
644
645  static internal_key_type GetInternalKey(const char *path) {
646    // The key 'kind' doesn't matter here because it is ignored in EqualKey.
647    return std::make_pair((unsigned char) 0x0, path);
648  }
649
650  static bool EqualKey(internal_key_type a, internal_key_type b) {
651    // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
652    // just the paths.
653    return strcmp(a.second, b.second) == 0;
654  }
655
656  static data_type ReadData(const internal_key_type& k, const unsigned char* d,
657                            unsigned) {
658
659    if (k.first /* File or Directory */) {
660      if (k.first == 0x1 /* File */) d += 4 * 2; // Skip the first 2 words.
661      ino_t ino = (ino_t) ReadUnalignedLE32(d);
662      dev_t dev = (dev_t) ReadUnalignedLE32(d);
663      mode_t mode = (mode_t) ReadUnalignedLE16(d);
664      time_t mtime = (time_t) ReadUnalignedLE64(d);
665      return data_type(ino, dev, mode, mtime, (off_t) ReadUnalignedLE64(d));
666    }
667
668    // Negative stat.  Don't read anything.
669    return data_type();
670  }
671};
672
673class PTHStatCache : public FileSystemStatCache {
674  typedef OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
675  CacheTy Cache;
676
677public:
678  PTHStatCache(PTHFileLookup &FL) :
679    Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
680          FL.getBase()) {}
681
682  ~PTHStatCache() {}
683
684  LookupResult getStat(const char *Path, struct stat &StatBuf,
685                       int *FileDescriptor) {
686    // Do the lookup for the file's data in the PTH file.
687    CacheTy::iterator I = Cache.find(Path);
688
689    // If we don't get a hit in the PTH file just forward to 'stat'.
690    if (I == Cache.end())
691      return statChained(Path, StatBuf, FileDescriptor);
692
693    const PTHStatData &Data = *I;
694
695    if (!Data.hasStat)
696      return CacheMissing;
697
698    StatBuf.st_ino = Data.ino;
699    StatBuf.st_dev = Data.dev;
700    StatBuf.st_mtime = Data.mtime;
701    StatBuf.st_mode = Data.mode;
702    StatBuf.st_size = Data.size;
703    return CacheExists;
704  }
705};
706} // end anonymous namespace
707
708FileSystemStatCache *PTHManager::createStatCache() {
709  return new PTHStatCache(*((PTHFileLookup*) FileLookup));
710}
711