PTHLexer.cpp revision 7e3a004c6ed1fe87912203b9c5a113f8da89d261
1274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
3274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//                     The LLVM Compiler Infrastructure
4274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
5274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek// This file is distributed under the University of Illinois Open Source
6274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek// License. See LICENSE.TXT for details.
7274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
8274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//===----------------------------------------------------------------------===//
9274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
10274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek// This file implements the PTHLexer interface.
11274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//
12274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek//===----------------------------------------------------------------------===//
13274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Basic/TokenKinds.h"
150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Basic/FileManager.h"
160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Basic/IdentifierTable.h"
17274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek#include "clang/Lex/PTHLexer.h"
18274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek#include "clang/Lex/Preprocessor.h"
190c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Lex/PTHManager.h"
200c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Lex/Token.h"
210c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "clang/Lex/Preprocessor.h"
220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "llvm/ADT/StringMap.h"
230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek#include "llvm/ADT/OwningPtr.h"
246f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner#include "llvm/Support/Compiler.h"
256f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner#include "llvm/Support/MathExtras.h"
266f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner#include "llvm/Support/MemoryBuffer.h"
276f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner#include "llvm/System/Host.h"
28274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenekusing namespace clang;
29274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
307b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek#define DISK_TOKEN_SIZE (1+1+2+4+4)
31268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
32e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
33e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// Utility methods for reading from the mmap'ed PTH file.
34e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
35e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
365ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattnerstatic inline uint16_t ReadUnalignedLE16(const unsigned char *&Data) {
37d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  uint16_t V = ((uint16_t)Data[0]) |
38da9d61c96c412f6babc7f824152609562f302388Chris Lattner               ((uint16_t)Data[1] <<  8);
39da9d61c96c412f6babc7f824152609562f302388Chris Lattner  Data += 2;
40da9d61c96c412f6babc7f824152609562f302388Chris Lattner  return V;
41da9d61c96c412f6babc7f824152609562f302388Chris Lattner}
42da9d61c96c412f6babc7f824152609562f302388Chris Lattner
43d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenekstatic inline uint32_t ReadUnalignedLE32(const unsigned char *&Data) {
44d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  uint32_t V = ((uint32_t)Data[0])  |
45d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek               ((uint32_t)Data[1] << 8)  |
46d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek               ((uint32_t)Data[2] << 16) |
47d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek               ((uint32_t)Data[3] << 24);
48d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  Data += 4;
49d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  return V;
50d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek}
51d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
525ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattnerstatic inline uint32_t ReadLE32(const unsigned char *&Data) {
53fbc33388c199d6f731170bf55719d57373a09c1fChris Lattner  // Hosts that directly support little-endian 32-bit loads can just
54fbc33388c199d6f731170bf55719d57373a09c1fChris Lattner  // use them.  Big-endian hosts need a bswap.
55f15674c680730c652a37a16a5d3f3ff429b0c308Chris Lattner  uint32_t V = *((uint32_t*)Data);
566f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner  if (llvm::sys::isBigEndianHost())
576f78c3b8b9343e7e9fbf2d457cccf00df6da5d47Chris Lattner    V = llvm::ByteSwap_32(V);
58da9d61c96c412f6babc7f824152609562f302388Chris Lattner  Data += 4;
59e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  return V;
60e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek}
61e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
627e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek// Bernstein hash function:
637e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek// This is basically copy-and-paste from StringMap.  This likely won't
647e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek// stay here, which is why I didn't both to expose this function from
657e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek// String Map.
667e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenekstatic unsigned BernsteinHash(const char* x) {
677e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  unsigned int R = 0;
687e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  for ( ; *x != '\0' ; ++x) R = R * 33 + *x;
697e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  return R + (R >> 5);
707e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek}
717e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
727e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenekstatic unsigned BernsteinHash(const char* x, unsigned n) {
737e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  unsigned int R = 0;
747e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  for (unsigned i = 0 ; i < n ; ++i, ++x) R = R * 33 + *x;
757e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  return R + (R >> 5);
767e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek}
77da9d61c96c412f6babc7f824152609562f302388Chris Lattner
78e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
79e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// PTHLexer methods.
80e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek//===----------------------------------------------------------------------===//
81e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
82da9d61c96c412f6babc7f824152609562f302388Chris LattnerPTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
83277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek                   const unsigned char *ppcond, PTHManager &PM)
842b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner  : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(0),
85277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
862b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner
872b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner  FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
885f074266cc59563036c40516c814d63825723e20Ted Kremenek}
89274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
90e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenekvoid PTHLexer::Lex(Token& Tok) {
91e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted KremenekLexNextToken:
92866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
93866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
94866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Read the raw token data.
95866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
96e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
97866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Shadow CurPtr into an automatic variable.
98aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner  const unsigned char *CurPtrShadow = CurPtr;
99866bdf74547efe32c320554837ffce00fcc084feTed Kremenek
1001b5285e1ba31975864da356b2ed927e87670e654Chris Lattner  // Read in the data for the token.
1015ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  unsigned Word0 = ReadLE32(CurPtrShadow);
1025ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  uint32_t IdentifierID = ReadLE32(CurPtrShadow);
1035ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  uint32_t FileOffset = ReadLE32(CurPtrShadow);
1047b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek
1057b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek  tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
1067b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek  Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
107aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner  uint32_t Len = Word0 >> 16;
1087b78b7c6d96deb1e63f8d0655ee6fa53de0b65efTed Kremenek
109aff6ef8e7bc3c3739f984c390e0af693e60be064Chris Lattner  CurPtr = CurPtrShadow;
110e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
111866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
112866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Construct the token itself.
113866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
114e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
115866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Tok.startToken();
116898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner  Tok.setKind(TKind);
117898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner  Tok.setFlag(TFlags);
11859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  assert(!LexingRawMode);
1192b2453a7d8fe732561795431f39ceb2b2a832d84Chris Lattner  Tok.setLocation(FileStartLoc.getFileLocWithOffset(FileOffset));
120866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  Tok.setLength(Len);
12189d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek
122d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner  // Handle identifiers.
123277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  if (Tok.isLiteral()) {
124277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
125277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  }
126277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  else if (IdentifierID) {
127d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    MIOpt.ReadToken();
128d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
129863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner
130d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    Tok.setIdentifierInfo(II);
131863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner
132863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    // Change the kind of this identifier to the appropriate token kind, e.g.
133863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    // turning "for" into a keyword.
134863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner    Tok.setKind(II->getTokenID());
135863c486fcb6162495a94fddf7ac8409de2638995Chris Lattner
136d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    if (II->isHandleIdentifierCase())
137d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner      PP->HandleIdentifier(Tok);
138d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner    return;
139d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner  }
140d0a69696acca62798dfc8b98f97c92bfa7fa0490Chris Lattner
141866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
142866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  // Process the token.
143866bdf74547efe32c320554837ffce00fcc084feTed Kremenek  //===--------------------------------------==//
1445f074266cc59563036c40516c814d63825723e20Ted Kremenek#if 0
1455f074266cc59563036c40516c814d63825723e20Ted Kremenek  SourceManager& SM = PP->getSourceManager();
1465f074266cc59563036c40516c814d63825723e20Ted Kremenek  llvm::cerr << SM.getFileEntryForID(FileID)->getName()
1475f074266cc59563036c40516c814d63825723e20Ted Kremenek    << ':' << SM.getLogicalLineNumber(Tok.getLocation())
1485f074266cc59563036c40516c814d63825723e20Ted Kremenek    << ':' << SM.getLogicalColumnNumber(Tok.getLocation())
1495f074266cc59563036c40516c814d63825723e20Ted Kremenek    << '\n';
1505f074266cc59563036c40516c814d63825723e20Ted Kremenek#endif
15189d7ee9619d2dbdfa8d956a695c612a104a92cadTed Kremenek
152898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner  if (TKind == tok::eof) {
153e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    // Save the end-of-file token.
154e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek    EofToken = Tok;
155e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
156cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    Preprocessor *PPCache = PP;
15759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
15859d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(!ParsingPreprocessorDirective);
15959d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(!LexingRawMode);
16059d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
16159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    // FIXME: Issue diagnostics similar to Lexer.
16259d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    if (PP->HandleEndOfFile(Tok, false))
163d6f53dc4951aace69014619761760addac9e59ecTed Kremenek      return;
16459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek
165cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
166cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek    return PPCache->Lex(Tok);
167cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek  }
168d6f53dc4951aace69014619761760addac9e59ecTed Kremenek
169898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner  if (TKind == tok::hash && Tok.isAtStartOfLine()) {
17059d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    LastHashTokPtr = CurPtr - DISK_TOKEN_SIZE;
17159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(!LexingRawMode);
17259d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    PP->HandleDirective(Tok);
173d6f53dc4951aace69014619761760addac9e59ecTed Kremenek
17459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    if (PP->isCurrentLexer(this))
17559d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek      goto LexNextToken;
176e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
17759d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    return PP->Lex(Tok);
178e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  }
179e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
180898a0bb1972efb6e03cb1151412ec7392cef07deChris Lattner  if (TKind == tok::eom) {
18159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    assert(ParsingPreprocessorDirective);
18259d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    ParsingPreprocessorDirective = false;
18359d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek    return;
18459d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  }
185274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek
18659d08cb672136322375e5400578ee1fbd0947de2Ted Kremenek  MIOpt.ReadToken();
187cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek}
188cd4e2aecde5bb7810715d5d5a88ac63ce7946f34Ted Kremenek
189e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// FIXME: We can just grab the last token instead of storing a copy
190e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek// into EofToken.
19159d08cb672136322375e5400578ee1fbd0947de2Ted Kremenekvoid PTHLexer::getEOF(Token& Tok) {
192defb7094c835998bb821e894253287625ce8c74dTed Kremenek  assert(EofToken.is(tok::eof));
1930c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  Tok = EofToken;
194274b20863a728cc6a31ee75c670e3733600c1531Ted Kremenek}
19517ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek
19617ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenekvoid PTHLexer::DiscardToEndOfLine() {
19717ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek  assert(ParsingPreprocessorDirective && ParsingFilename == false &&
19817ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek         "Must be in a preprocessing directive!");
1994d35da2e41941965bbee8ed7e8c30e7c21000d71Ted Kremenek
200e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  // We assume that if the preprocessor wishes to discard to the end of
201e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  // the line that it also means to end the current preprocessor directive.
202e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  ParsingPreprocessorDirective = false;
203e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
20474c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  // Skip tokens by only peeking at their token kind and the flags.
20574c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  // We don't need to actually reconstruct full tokens from the token buffer.
20674c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  // This saves some copies and it also reduces IdentifierInfo* lookup.
207da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* p = CurPtr;
20874c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  while (1) {
20974c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    // Read the token kind.  Are we at the end of the file?
21074c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
21174c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    if (x == tok::eof) break;
212e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
21374c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    // Read the token flags.  Are we at the start of the next line?
21474c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
21580d2f3059326f99ebf7c867db1c7f106ec9485f5Ted Kremenek    if (y & Token::StartOfLine) break;
21674c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek
21774c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    // Skip to the next token.
21874c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek    p += DISK_TOKEN_SIZE;
21974c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  }
22074c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek
22174c3e6e5e95af08096aab415d1ce15f15ffff02aTed Kremenek  CurPtr = p;
22217ff58a63197b398ae52697b088dc0fb8b255519Ted Kremenek}
2230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
224268ee7016a2811803989487c0ad3799486092c63Ted Kremenek/// SkipBlock - Used by Preprocessor to skip the current conditional block.
225268ee7016a2811803989487c0ad3799486092c63Ted Kremenekbool PTHLexer::SkipBlock() {
226268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(CurPPCondPtr && "No cached PP conditional information.");
227268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(LastHashTokPtr && "No known '#' token.");
228268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
229da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* HashEntryI = 0;
230268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  uint32_t Offset;
231268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  uint32_t TableIdx;
232268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
233268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  do {
23441a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Read the token offset from the side-table.
2355ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner    Offset = ReadLE32(CurPPCondPtr);
23641a2660377d215d004fe413c03874bd066b5384cTed Kremenek
23741a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Read the target table index from the side-table.
2385ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner    TableIdx = ReadLE32(CurPPCondPtr);
23941a2660377d215d004fe413c03874bd066b5384cTed Kremenek
24041a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Compute the actual memory address of the '#' token data for this entry.
24141a2660377d215d004fe413c03874bd066b5384cTed Kremenek    HashEntryI = TokBuf + Offset;
24241a2660377d215d004fe413c03874bd066b5384cTed Kremenek
24341a2660377d215d004fe413c03874bd066b5384cTed Kremenek    // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
24441a2660377d215d004fe413c03874bd066b5384cTed Kremenek    //  contain nested blocks.  In the side-table we can jump over these
24541a2660377d215d004fe413c03874bd066b5384cTed Kremenek    //  nested blocks instead of doing a linear search if the next "sibling"
24641a2660377d215d004fe413c03874bd066b5384cTed Kremenek    //  entry is not at a location greater than LastHashTokPtr.
24741a2660377d215d004fe413c03874bd066b5384cTed Kremenek    if (HashEntryI < LastHashTokPtr && TableIdx) {
24841a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // In the side-table we are still at an entry for a '#' token that
24941a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // is earlier than the last one we saw.  Check if the location we would
25041a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // stride gets us closer.
251da9d61c96c412f6babc7f824152609562f302388Chris Lattner      const unsigned char* NextPPCondPtr =
252da9d61c96c412f6babc7f824152609562f302388Chris Lattner        PPCond + TableIdx*(sizeof(uint32_t)*2);
25341a2660377d215d004fe413c03874bd066b5384cTed Kremenek      assert(NextPPCondPtr >= CurPPCondPtr);
25441a2660377d215d004fe413c03874bd066b5384cTed Kremenek      // Read where we should jump to.
2555ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner      uint32_t TmpOffset = ReadLE32(NextPPCondPtr);
256da9d61c96c412f6babc7f824152609562f302388Chris Lattner      const unsigned char* HashEntryJ = TokBuf + TmpOffset;
25741a2660377d215d004fe413c03874bd066b5384cTed Kremenek
25841a2660377d215d004fe413c03874bd066b5384cTed Kremenek      if (HashEntryJ <= LastHashTokPtr) {
25941a2660377d215d004fe413c03874bd066b5384cTed Kremenek        // Jump directly to the next entry in the side table.
26041a2660377d215d004fe413c03874bd066b5384cTed Kremenek        HashEntryI = HashEntryJ;
26141a2660377d215d004fe413c03874bd066b5384cTed Kremenek        Offset = TmpOffset;
2625ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner        TableIdx = ReadLE32(NextPPCondPtr);
26341a2660377d215d004fe413c03874bd066b5384cTed Kremenek        CurPPCondPtr = NextPPCondPtr;
26441a2660377d215d004fe413c03874bd066b5384cTed Kremenek      }
26541a2660377d215d004fe413c03874bd066b5384cTed Kremenek    }
266268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  }
26741a2660377d215d004fe413c03874bd066b5384cTed Kremenek  while (HashEntryI < LastHashTokPtr);
26841a2660377d215d004fe413c03874bd066b5384cTed Kremenek  assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
269268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(TableIdx && "No jumping from #endifs.");
270268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
271268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Update our side-table iterator.
272da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
273268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  assert(NextPPCondPtr >= CurPPCondPtr);
274268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  CurPPCondPtr = NextPPCondPtr;
275268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
276268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Read where we should jump to.
2775ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  HashEntryI = TokBuf + ReadLE32(NextPPCondPtr);
2785ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  uint32_t NextIdx = ReadLE32(NextPPCondPtr);
279268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
280268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // By construction NextIdx will be zero if this is a #endif.  This is useful
281268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // to know to obviate lexing another token.
282268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  bool isEndif = NextIdx == 0;
283268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
284268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // This case can occur when we see something like this:
285268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //
286268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //  #if ...
287268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //   /* a comment or nothing */
288268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //  #elif
289268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  //
290268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // If we are skipping the first #if block it will be the case that CurPtr
291268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // already points 'elif'.  Just return.
292268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
29341a2660377d215d004fe413c03874bd066b5384cTed Kremenek  if (CurPtr > HashEntryI) {
29441a2660377d215d004fe413c03874bd066b5384cTed Kremenek    assert(CurPtr == HashEntryI + DISK_TOKEN_SIZE);
295268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    // Did we reach a #endif?  If so, go ahead and consume that token as well.
296268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    if (isEndif)
297e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek      CurPtr += DISK_TOKEN_SIZE*2;
298268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    else
29941a2660377d215d004fe413c03874bd066b5384cTed Kremenek      LastHashTokPtr = HashEntryI;
300268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
301268ee7016a2811803989487c0ad3799486092c63Ted Kremenek    return isEndif;
302268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  }
303268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
304268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
30541a2660377d215d004fe413c03874bd066b5384cTed Kremenek  CurPtr = HashEntryI;
306268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
307268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Update the location of the last observed '#'.  This is useful if we
308268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // are skipping multiple blocks.
309268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  LastHashTokPtr = CurPtr;
310268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
311e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  // Skip the '#' token.
312da9d61c96c412f6babc7f824152609562f302388Chris Lattner  assert(((tok::TokenKind)*CurPtr) == tok::hash);
313e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  CurPtr += DISK_TOKEN_SIZE;
314e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek
315268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Did we reach a #endif?  If so, go ahead and consume that token as well.
316e5680f3cd678014cf0872d34726dc804b0cbbdd4Ted Kremenek  if (isEndif) { CurPtr += DISK_TOKEN_SIZE*2; }
317268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
318268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  return isEndif;
319268ee7016a2811803989487c0ad3799486092c63Ted Kremenek}
320268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
32130a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed KremenekSourceLocation PTHLexer::getSourceLocation() {
3221b5285e1ba31975864da356b2ed927e87670e654Chris Lattner  // getSourceLocation is not on the hot path.  It is used to get the location
3231b5285e1ba31975864da356b2ed927e87670e654Chris Lattner  // of the next token when transitioning back to this lexer when done
32430a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // handling a #included file.  Just read the necessary data from the token
32530a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // data buffer to construct the SourceLocation object.
32630a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek  // NOTE: This is a virtual function; hence it is defined out-of-line.
327b248d53f2599d8e7b53b144b713e163ca521ffcaTed Kremenek  const unsigned char *OffsetPtr = CurPtr + (DISK_TOKEN_SIZE - 4);
3285ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  uint32_t Offset = ReadLE32(OffsetPtr);
3291b5285e1ba31975864da356b2ed927e87670e654Chris Lattner  return FileStartLoc.getFileLocWithOffset(Offset);
33030a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek}
33130a12ec2a7f331d9e08acabe7cda853aaa7ba54bTed Kremenek
3325f074266cc59563036c40516c814d63825723e20Ted Kremenek//===----------------------------------------------------------------------===//
333d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek// OnDiskChainedHashTable
3340c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
3350c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
336d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenektemplate<typename Info>
337d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenekclass OnDiskChainedHashTable {
338d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const unsigned NumBuckets;
339d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const unsigned NumEntries;
340d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const unsigned char* const Buckets;
341d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const unsigned char* const Base;
342d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenekpublic:
343d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  typedef typename Info::internal_key_type internal_key_type;
344d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  typedef typename Info::external_key_type external_key_type;
345d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  typedef typename Info::data_type         data_type;
346d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
347d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
348d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek                         const unsigned char* buckets,
349d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek                         const unsigned char* base)
350d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    : NumBuckets(numBuckets), NumEntries(numEntries),
351d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      Buckets(buckets), Base(base) {
352d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek        assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
353d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek               "'buckets' must have a 4-byte alignment");
354d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      }
355d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
356d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
357d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  bool isEmpty() const { return NumEntries == 0; }
358d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
359d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  class iterator {
360d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    const unsigned char* const data;
361d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    const unsigned len;
362d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  public:
363d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    iterator() : data(0), len(0) {}
364d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    iterator(const unsigned char* d, unsigned l) : data(d), len(l) {}
365d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
366d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    data_type operator*() const { return Info::ReadData(data, len); }
367d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    bool operator==(const iterator& X) const { return X.data == data; }
368d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    bool operator!=(const iterator& X) const { return X.data != data; }
369d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  };
370d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
371d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  iterator find(const external_key_type& eKey) {
372d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    const internal_key_type& iKey = Info::GetInternalKey(eKey);
373d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    unsigned key_hash = Info::ComputeHash(iKey);
374d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
375d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    // Each bucket is just a 32-bit offset into the PTH file.
376d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    unsigned idx = key_hash & (NumBuckets - 1);
377d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
378d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
379d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    unsigned offset = ReadLE32(Bucket);
380d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    if (offset == 0) return iterator(); // Empty bucket.
381d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    const unsigned char* Items = Base + offset;
382d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
383d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    // 'Items' starts with a 16-bit unsigned integer representing the
384d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    // number of items in this bucket.
385d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    unsigned len = ReadUnalignedLE16(Items);
386d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
387d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    for (unsigned i = 0; i < len; ++i) {
388d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      // Read the hash.
389d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      uint32_t item_hash = ReadUnalignedLE32(Items);
390d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
391d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      // Determine the length of the key and the data.
392d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      const std::pair<unsigned, unsigned>& L = Info::ReadKeyDataLength(Items);
393d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      unsigned item_len = L.first + L.second;
394d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
395d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      // Compare the hashes.  If they are not the same, skip the entry entirely.
396d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      if (item_hash != key_hash) {
397d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek        Items += item_len;
398d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek        continue;
399d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      }
400d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
401d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      // Read the key.
402d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      const internal_key_type& X =
403d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek        Info::ReadKey((const unsigned char* const) Items, L.first);
404d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
405d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      // If the key doesn't match just skip reading the value.
406d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      if (!Info::EqualKey(X, iKey)) {
407d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek        Items += item_len;
408d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek        continue;
409d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      }
410d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
411d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      // The key matches!
412d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek      return iterator(Items + L.first, L.second);
413d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    }
414d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
415d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return iterator();
416d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  }
417d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
418d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  iterator end() const { return iterator(); }
419d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
420d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
421d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static OnDiskChainedHashTable* Create(const unsigned char* buckets,
422d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek                                        const unsigned char* const base) {
423d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
424d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    assert(buckets > base);
425d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
426d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek           "buckets should be 4-byte aligned.");
427d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
428d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    unsigned numBuckets = ReadLE32(buckets);
429d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    unsigned numEntries = ReadLE32(buckets);
430d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
431d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek                                            base);
432d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  }
433d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek};
434d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
435d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek//===----------------------------------------------------------------------===//
436d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek// PTH file lookup: map from strings to file data.
437d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek//===----------------------------------------------------------------------===//
4380c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
4390c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek/// PTHFileLookup - This internal data structure is used by the PTHManager
4400c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek///  to map from FileEntry objects managed by FileManager to offsets within
4410c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek///  the PTH file.
4420c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremeneknamespace {
443d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenekclass VISIBILITY_HIDDEN PTHFileData {
444d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const uint32_t TokenOff;
445d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const uint32_t PPCondOff;
4460c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenekpublic:
447d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
448d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
4490c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
450d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  uint32_t getTokenOffset() const { return TokenOff; }
451d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  uint32_t getPPCondOffset() const { return PPCondOff; }
452d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek};
4530c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
454d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenekclass VISIBILITY_HIDDEN PTHFileLookupTrait {
4550c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenekpublic:
456d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  typedef PTHFileData      data_type;
457d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  typedef const FileEntry* external_key_type;
458d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  typedef const char*      internal_key_type;
4590c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
460d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static bool EqualKey(const char* a, const char* b) {
461d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return strcmp(a, b) == 0;
462d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  }
463d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
464d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static unsigned ComputeHash(const char* x) {
4657e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return BernsteinHash(x);
466d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  }
467d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
468d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static const char* GetInternalKey(const FileEntry* FE) {
469d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return FE->getName();
470cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek  }
471cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek
472d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static std::pair<unsigned, unsigned>
473d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  ReadKeyDataLength(const unsigned char*& d) {
474d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return std::make_pair((unsigned) ReadUnalignedLE16(d), 8U);
4750c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
4760c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
477d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static const char* ReadKey(const unsigned char* d, unsigned) {
478d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return (const char*) d;
479d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  }
480d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
481d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  static PTHFileData ReadData(const unsigned char* d, unsigned) {
482d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    uint32_t x = ::ReadUnalignedLE32(d);
483d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    uint32_t y = ::ReadUnalignedLE32(d);
484d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek    return PTHFileData(x, y);
4850c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
4860c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek};
4877e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
4887e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenekclass VISIBILITY_HIDDEN PTHStringLookupTrait {
4897e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenekpublic:
4907e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  typedef uint32_t
4917e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek          data_type;
4927e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
4937e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  typedef const std::pair<const char*, unsigned>
4947e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek          external_key_type;
4957e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
4967e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  typedef external_key_type internal_key_type;
4977e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
4987e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  static bool EqualKey(const internal_key_type& a,
4997e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek                       const internal_key_type& b) {
5007e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
5017e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek                                  : false;
5027e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  }
5037e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
5047e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  static unsigned ComputeHash(const internal_key_type& a) {
5057e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return BernsteinHash(a.first, a.second);
5067e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  }
5077e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
5087e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  // This hopefully will just get inlined and removed by the optimizer.
5097e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  static const internal_key_type&
5107e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  GetInternalKey(const external_key_type& x) { return x; }
5117e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
5127e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  static std::pair<unsigned, unsigned>
5137e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  ReadKeyDataLength(const unsigned char*& d) {
5147e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return std::make_pair((unsigned) ReadUnalignedLE16(d), sizeof(uint32_t));
5157e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  }
5167e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
5177e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  static std::pair<const char*, unsigned>
5187e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  ReadKey(const unsigned char* d, unsigned n) {
5197e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek      assert(n >= 2 && d[n-1] == '\0');
5207e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek      return std::make_pair((const char*) d, n-1);
5217e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    }
5227e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
5237e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  static uint32_t ReadData(const unsigned char* d, unsigned) {
5247e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return ::ReadUnalignedLE32(d);
5257e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  }
5267e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek};
5277e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
528d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek} // end anonymous namespace
529d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
5307e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenektypedef OnDiskChainedHashTable<PTHFileLookupTrait>   PTHFileLookup;
5317e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenektypedef OnDiskChainedHashTable<PTHStringLookupTrait> PTHStringIdLookup;
5320c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5330c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
5340c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek// PTHManager methods.
5350c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek//===----------------------------------------------------------------------===//
5360c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5370c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
538da9d61c96c412f6babc7f824152609562f302388Chris Lattner                       const unsigned char* idDataTable,
539da9d61c96c412f6babc7f824152609562f302388Chris Lattner                       IdentifierInfo** perIDCache,
5407e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek                       void* stringIdLookup, unsigned numIds,
541277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek                       const unsigned char* spellingBase)
5426183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek: Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
5437e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  IdDataTable(idDataTable), StringIdLookup(stringIdLookup),
544277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  NumIds(numIds), PP(0), SpellingBase(spellingBase) {}
5450c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5460c6a77bc1f52f282a969538f139ebde429076ed3Ted KremenekPTHManager::~PTHManager() {
5470c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  delete Buf;
5480c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  delete (PTHFileLookup*) FileLookup;
5497e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  delete (PTHStringIdLookup*) StringIdLookup;
5500e50b6e7c104d00614baa3d80df62f1630a94d9cTed Kremenek  free(PerIDCache);
5510c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
5520c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
55326555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenekstatic void InvalidPTH(Diagnostic *Diags, const char* Msg = 0) {
55426555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  if (!Diags) return;
55526555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  if (!Msg) Msg = "Invalid or corrupted PTH file";
55626555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note, Msg);
55726555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  Diags->Report(FullSourceLoc(), DiagID);
55826555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek}
55926555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek
5608a6aec620dbec1f292fe4116c0373ac81ab90234Ted KremenekPTHManager* PTHManager::Create(const std::string& file, Diagnostic* Diags) {
5610c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Memory map the PTH file.
5620c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  llvm::OwningPtr<llvm::MemoryBuffer>
5630c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  File(llvm::MemoryBuffer::getFile(file.c_str()));
5640c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5658a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek  if (!File) {
5668a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek    if (Diags) {
5678a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek      unsigned DiagID = Diags->getCustomDiagID(Diagnostic::Note,
5688a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek                                               "PTH file %0 could not be read");
5698a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek      Diags->Report(FullSourceLoc(), DiagID) << file;
5708a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek    }
5717e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
5720c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0;
5738a6aec620dbec1f292fe4116c0373ac81ab90234Ted Kremenek  }
5740c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
5750c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Get the buffer ranges and check if there are at least three 32-bit
5760c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // words at the end of the file.
577da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* BufBeg = (unsigned char*)File->getBufferStart();
578da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* BufEnd = (unsigned char*)File->getBufferEnd();
579e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek
580e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek  // Check the prologue of the file.
5814adc71ae2cfc190f8d2cf58876e2a7893aa74ee0Ted Kremenek  if ((BufEnd - BufBeg) < (signed) (sizeof("cfe-pth") + 3 + 4) ||
58226555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek      memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth") - 1) != 0) {
58326555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags);
584e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek    return 0;
58526555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  }
5860c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
58767d15050bbea16ae256e204ecd464f2e454c3c99Ted Kremenek  // Read the PTH version.
588e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek  const unsigned char *p = BufBeg + (sizeof("cfe-pth") - 1);
58967d15050bbea16ae256e204ecd464f2e454c3c99Ted Kremenek  unsigned Version = ReadLE32(p);
59067d15050bbea16ae256e204ecd464f2e454c3c99Ted Kremenek
59126555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  if (Version != PTHManager::Version) {
59226555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags,
59326555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek        Version < PTHManager::Version
59426555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek        ? "PTH file uses an older PTH format that is no longer supported"
59526555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek        : "PTH file uses a newer PTH format that cannot be read");
59667d15050bbea16ae256e204ecd464f2e454c3c99Ted Kremenek    return 0;
59726555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  }
59867d15050bbea16ae256e204ecd464f2e454c3c99Ted Kremenek
59967d15050bbea16ae256e204ecd464f2e454c3c99Ted Kremenek  // Compute the address of the index table at the end of the PTH file.
600e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek  const unsigned char *EndTable = BufBeg + ReadLE32(p);
601e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek
60226555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  if (EndTable >= BufEnd) {
60326555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags);
604e1b6498c41b94c3bc5cede17b0702282543385efTed Kremenek    return 0;
60526555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  }
6060c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
6070c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Construct the file lookup table.  This will be used for mapping from
6080c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // FileEntry*'s to cached tokens.
609f1de4649e917f891d24e0718d02ee904b9edbe9dTed Kremenek  const unsigned char* FileTableOffset = EndTable + sizeof(uint32_t)*2;
6105ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  const unsigned char* FileTable = BufBeg + ReadLE32(FileTableOffset);
6110c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
6120c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (!(FileTable > BufBeg && FileTable < BufEnd)) {
61326555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags);
6140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0; // FIXME: Proper error diagnostic?
6150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
6160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
617d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  llvm::OwningPtr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
61826555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  if (FL->isEmpty()) {
61926555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags, "PTH file contains no cached source data");
620cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek    return 0;
62126555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek  }
6220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
6230c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Get the location of the table mapping from persistent ids to the
6240c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // data needed to reconstruct identifiers.
625f1de4649e917f891d24e0718d02ee904b9edbe9dTed Kremenek  const unsigned char* IDTableOffset = EndTable + sizeof(uint32_t)*0;
6265ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  const unsigned char* IData = BufBeg + ReadLE32(IDTableOffset);
627cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek
628cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek  if (!(IData >= BufBeg && IData < BufEnd)) {
62926555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags);
63026555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    return 0;
6310c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  }
6320c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
6337e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  // Get the location of the hashtable mapping between strings and
6347e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  // persistent IDs.
6357e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  const unsigned char* StringIdTableOffset = EndTable + sizeof(uint32_t)*1;
6367e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  const unsigned char* StringIdTable = BufBeg + ReadLE32(StringIdTableOffset);
6377e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
63826555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags);
63926555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    return 0;
64072b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  }
6417e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek
6427e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  llvm::OwningPtr<PTHStringIdLookup> SL(PTHStringIdLookup::Create(StringIdTable,
6437e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek                                                                  BufBeg));
6447e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  if (SL->isEmpty()) {
6457e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    InvalidPTH(Diags, "PTH file contains no identifiers.");
6467e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return 0;
6477e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  }
64872b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek
649277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  // Get the location of the spelling cache.
650f1de4649e917f891d24e0718d02ee904b9edbe9dTed Kremenek  const unsigned char* spellingBaseOffset = EndTable + sizeof(uint32_t)*3;
651277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  const unsigned char* spellingBase = BufBeg + ReadLE32(spellingBaseOffset);
652277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
65326555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek    InvalidPTH(Diags);
654277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek    return 0;
655277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  }
656277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek
6576183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // Get the number of IdentifierInfos and pre-allocate the identifier cache.
6585ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  uint32_t NumIds = ReadLE32(IData);
659cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek
6606183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // Pre-allocate the peristent ID -> IdentifierInfo* cache.  We use calloc()
6616183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // so that we in the best case only zero out memory once when the OS returns
6626183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  // us new pages.
663cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek  IdentifierInfo** PerIDCache = 0;
6646183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek
665cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek  if (NumIds) {
666cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek    PerIDCache = (IdentifierInfo**)calloc(NumIds, sizeof(*PerIDCache));
667cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek    if (!PerIDCache) {
66826555b18aa2c3b78744e77927acd3faa53ae7369Ted Kremenek      InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
669cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek      return 0;
670cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek    }
6716183e4815a4019e97ad01bd880f12355599b75fdTed Kremenek  }
672cdd8f2153e18796e9e2a126ebcbd4f3e1bd7135bTed Kremenek
67372b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  // Create the new PTHManager.
67472b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
6757e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek                        SL.take(), NumIds, spellingBase);
6760c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
67777ecb3a28f21496ecfdbb3d5f5b66b0d2abf48c9Chris LattnerIdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
6780c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Look in the PTH file for the string data for the IdentifierInfo object.
67977ecb3a28f21496ecfdbb3d5f5b66b0d2abf48c9Chris Lattner  const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
680da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* IDData =
6815ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner    (const unsigned char*)Buf->getBufferStart() + ReadLE32(TableEntry);
682da9d61c96c412f6babc7f824152609562f302388Chris Lattner  assert(IDData < (const unsigned char*)Buf->getBufferEnd());
6830c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
68472b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  // Allocate the object.
685da9d61c96c412f6babc7f824152609562f302388Chris Lattner  std::pair<IdentifierInfo,const unsigned char*> *Mem =
686da9d61c96c412f6babc7f824152609562f302388Chris Lattner    Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
68772b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek
68872b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  Mem->second = IDData;
6897e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  assert(IDData[0] != '\0');
690ea9c26b3dbd74a1497f5609ae6e19a85f42b6073Ted Kremenek  IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
6910c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
69272b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  // Store the new IdentifierInfo in the cache.
69377ecb3a28f21496ecfdbb3d5f5b66b0d2abf48c9Chris Lattner  PerIDCache[PersistentID] = II;
6947e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  assert(II->getName() && II->getName()[0] != '\0');
6950c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  return II;
6960c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
6970c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
69872b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted KremenekIdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
6997e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  PTHStringIdLookup& SL = *((PTHStringIdLookup*)StringIdLookup);
7007e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  // Double check our assumption that the last character isn't '\0'.
7017e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  assert(NameStart[NameEnd-NameStart-1] != '\0');
7027e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  PTHStringIdLookup::iterator I = SL.find(std::make_pair(NameStart,
7037e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek                                                         NameEnd - NameStart));
7047e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  if (I == SL.end()) // No identifier found?
7057e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek    return 0;
70672b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek
7077e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  // Match found.  Return the identifier!
7087e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  assert(*I > 0);
7097e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek  return GetIdentifierInfo(*I-1);
7107e3a004c6ed1fe87912203b9c5a113f8da89d261Ted Kremenek}
71172b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek
712f056d92e182cbe4f62c8d14102544dc38066dabcChris LattnerPTHLexer *PTHManager::CreateLexer(FileID FID) {
713f056d92e182cbe4f62c8d14102544dc38066dabcChris Lattner  const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
7140c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  if (!FE)
7150c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0;
7160c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
7170c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Lookup the FileEntry object in our file lookup data structure.  It will
7180c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // return a variant that indicates whether or not there is an offset within
7190c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // the PTH file that contains cached tokens.
720d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  PTHFileLookup& PFL = *((PTHFileLookup*)FileLookup);
721d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  PTHFileLookup::iterator I = PFL.find(FE);
7220c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
723d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  if (I == PFL.end()) // No tokens available?
7240c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek    return 0;
7250c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek
726d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek  const PTHFileData& FileData = *I;
727d8c02929fe70f03111be73e7b8c402c724238ee9Ted Kremenek
728da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
7290c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek  // Compute the offset of the token data within the buffer.
730da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* data = BufStart + FileData.getTokenOffset();
731268ee7016a2811803989487c0ad3799486092c63Ted Kremenek
732268ee7016a2811803989487c0ad3799486092c63Ted Kremenek  // Get the location of pp-conditional table.
733da9d61c96c412f6babc7f824152609562f302388Chris Lattner  const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
7345ff4317536dbd7f03332bb250c8b35ec04a6f5dbChris Lattner  uint32_t Len = ReadLE32(ppcond);
7351b5285e1ba31975864da356b2ed927e87670e654Chris Lattner  if (Len == 0) ppcond = 0;
73632a8ad526f9bc00539f000a2dd1ac3e167db61c1Ted Kremenek
73772b1b15ee88aac0a63e2c1dc53fe22f5ab297b20Ted Kremenek  assert(PP && "No preprocessor set yet!");
738277faca30c9f8f72b79f55695cbe3395ec246e7cTed Kremenek  return new PTHLexer(*PP, FID, data, ppcond, *this);
7390c6a77bc1f52f282a969538f139ebde429076ed3Ted Kremenek}
740